User:Connel MacKenzie/custom.js/ajaxtranslinks.js

/* // TODO doesn't handle interwikis // TODO handles self links but not unlinked words // TODO selflinked language name causes breakage // TODO Serbian Latin subentries may wrongly match Latin! // TODO handle cases like: *Hungarian: tönköly búza // TODO missing : after language name can cause breakage with subentries // TODO handle: Finnish: jargon (1, 2), ammattikieli (1, 2), slangi (2), mongerrus (3) // TODO handle subentries which are not sublanguages: Chinese and Japanese entries at "Mongolian" // TODO handle trad. and simpl. in some Chinese entries // TODO Cantonese and Mandarin as sublangs can match each other // TODO handle wikified sublanguages // TODO handle translations that have both a main entry and subentries (corn German) // abstract function to do xmlhttprequest function ajax(url, on200, on404) { var req = window.XMLHttpRequest ? new XMLHttpRequest : new ActiveXObject('Microsoft.XMLHTTP'); req.open('GET', url, true); req.onreadystatechange = function { if (req.readyState == 4) { if (req.status == 200) { // TODO handle xmlhttprequest cache bug in some msie versions if (req.responseText == '') { on404; } else { on200(req); }     } else if (req.status == 404) { on404; } else { //debugPrint('bad rsc status ' + req.status + '\n'); }   }  };  req.send(null); } function parse(list_item, anchor, term, lang, sublang, page) { var state = 0; var arr = page.split("\n"); var line; var langs = lang;

// Ancient Greek if (sublang) langs += '|' + sublang + '|' + sublang + ' ' + lang + '|' + lang + ' \\(' + sublang + '\\)|' + lang + ', ' + sublang;

// Chinese if (lang == 'Chinese') langs += '|Mandarin|Cantonese'; else if (lang == 'Mandarin' || lang == 'Cantonese') langs += '|Chinese';

// CJKV/Han characters else if (lang.match(/(CJKV?|Chinese) [cC]haracters/)) langs += '|Translingual';

// Other synonyms else if (lang == 'Guaraní') langs += '|Guarani'; else if (lang == 'Guarani') langs += '|Guaraní'; else if (lang == 'Scots Gaelic') langs += '|Scottish Gaelic'; else if (lang == 'Scottish Gaelic') langs += '|Scots Gaelic'; else if (lang == 'Slovak') langs += '|Slovakian'; else if (lang == 'Slovakian') langs += '|Slovak'; else if (lang == 'Slovene') langs += '|Slovenian'; else if (lang == 'Slovenian') langs += '|Slovene'; else if (lang == 'Tupinambá') langs += '|Tupinamba'; else if (lang == 'Tupinamba') langs += '|Tupinambá';

var rx1 = new RegExp('^==\\s*(' + langs + ')\\s*=='); var rx2 = new RegExp('^==\\s*\\[\\[(' + langs + ')]]\\s*=='); anchor.className = 'dunno'; anchor.title = 'Looking...'; for (var i = 0; i < arr.length; i++) { line = arr[i];

if (line.match(/#\s*[rR][eE][dD][iI][rR][eE][cC][tT]/)) { state = -1; break; } else { var foundlang; var r;

r = line.match(rx1); if (r != null) foundlang = r[1]; r = line.match(rx2); if (r != null) foundlang = r[1];

if (foundlang != null) { // page exists and has an entry for this language anchor.className = ''; anchor.title = term + ' exists in ' + foundlang; if (anchor.href.indexOf('#') == -1) { // TODO unicode in lang names breaks: Guaraní -> #Guaran%C3%AD but #Guaran.C3.AD         anchor.href += '#' + foundlang; //anchor.title = term + ' exists in ' + lang + ' (anchor added)'; }       state = 1; break; }   }  }

if (state == 0) { // page exists but has no entry for this language anchor.className = 'new partlynew'; anchor.title = term + ' exists but not in ' + (sublang ? lang + ' (' + sublang + ')' : lang); } else if (state == -1) { // page exists but is a redirect anchor.className = 'redirect'; anchor.title = term + ' exists but is a redirect'; } } function lookup_langs(list_item, anchor, term, lang, sublang) { function on200(req) { parse( list_item, anchor, term, lang, sublang, req.responseText ); }; function on404 { //list_item.appendChild ( document.createTextNode( ' ' + lang + '/' + term + ':(404)' ) ); }; var url = wgScript + '?title=' + term.replace(' ', '_') + '&action=raw'; ajax(url, on200, on404); } function ajaxtranslinks { if (wgNamespaceNumber != 0) return; var lis = null; // find the translations section // TODO on en.wikt it has no id so we must check all h4 and h5 tags if (wgServer == 'http://en.wiktionary.org') { var bc = document.getElementById('bodyContent'); if (bc != null) { var trans_h = null; var hl = 4; // heading level: check h4 and h5 only var hi;    // heading index outer : for (hl = 4; hl <= 5; hl++) { var hs = bc.getElementsByTagName('h' + hl.toString); for (var i = 0; i < hs.length; i++ ) { var spans = hs[i].getElementsByTagName('span'); // 1st or 2nd span depending on whether edit link exists if (spans[spans.length - 1].firstChild.nodeValue == 'Translations') { trans_h = hs[i]; break outer; }       }      }      if (trans_h != null) { var t = trans_h.nextSibling.nextSibling; if ((t.tagName == 'DIV' && t.className == 'rfc-trans') || //            (t.tagName == 'P' && t.firstChild.tagName == 'B') ||  // subheading            (t.tagName == 'DL'))                                  // ;subheading t = t.nextSibling.nextSibling;

if (t.tagName == 'TABLE' || t.tagName == 'UL') lis = t.getElementsByTagName('li'); else { if ( t.childNodes[3] ) { t = t.childNodes[3].childNodes[1]; lis = t.getElementsByTagName('li'); }       }      }    }  } else if (wgServer == 'http://wiktionarydev.leuksman.com') { var t = document.getElementById('Translations'); if (t != null) { t = t.parentNode.parentNode; lis = t.getElementsByTagName('li'); } }  if (lis != null) { // each language in the translation table for (var i = 0; i < lis.length; i++) { var li = lis[i]; var subentries = li.getElementsByTagName('dl'); // Chinese & Serbian mostly var links = li.getElementsByTagName('a'); var strongs = li.getElementsByTagName('strong'); var lang; var sublang; var offset; // plain lang label if (li.firstChild.nodeType == 3) { // TODO handle missing colon especially with sublanguages following lang = li.firstChild.nodeValue.replace(/\s*(.*?)\s*:\s*/, '$1'); offset = 0; // ttbc } else if (li.firstChild.tagName == 'SPAN' && li.firstChild.className == 'ttbc') { lang = li.firstChild.firstChild.nodeValue; offset = 0;

// wikified lang label } else { if (links[0]) { lang = links[0].firstChild.nodeValue; offset = 1; }     }

if (links.length != 0) { // each red or blue link for this language for (j = offset; j < links.length; j++) { var anchor = links[j]; var term = anchor.title ? anchor.title : anchor.firstChild.nodeValue; sublang = null; // TODO handle inline sublanguages: Greek, Ancient if (lang.indexOf(', ') != -1) { var r = lang.match(/(.*?), (.*)/); lang = r[1]; sublang = r[2];

// multiline sublanguages } else if (subentries.length != 0) { p = anchor.parentNode; if (p.tagName == 'SPAN') // KUchar etc template p = p.parentNode; if ( p.firstChild ) if ( p.firstChild.nodeValue ) sublang = p.firstChild.nodeValue.replace(/\s*(.*?)\s*:\s*/, '$1'); }

if (term.indexOf(':') != -1) { anchor.title = term + ' is on another wiki'; } else if (anchor.className.match(/\bnew\b/)) { anchor.title = term + ' does not exist at all'; } else { lookup_langs(li, anchor, term, lang, sublang); }       }      }      // each self link for this language (should never be more than one) if (strongs.length != 0) { var strong = strongs[0]; var anchor = document.createElement('a'); anchor.appendChild(document.createTextNode(wgTitle)); anchor.href = wgArticlePath.replace('$1', wgTitle); strong.parentNode.replaceChild(anchor,strong); // TODO do we ever need sublang here? lookup_langs(li, anchor, wgTitle, lang, null); }   }  } }

$( ajaxtranslinks ); /*