User:Conrad.Irwin/parser.js

/** * VERSION 1.0α <- You have been warned. * * Firsts divides the page by language (~100% accurately) * Then tries to reconnect homonym specific information. (~80% at the moment) * * Should present this nicely and very customisably to the user * May eventually also output information in a computer readable format * * Things that need doing, *   Split categories across languages *   Recognise (senses) manually marked up (i.e. with italics instead of ) * * Basic file layout intentions (i.e. probably not stuck too) * Init section * Functions for splitting the dom * Functions for language tabbing * Generic functions for homonym parsing/tabbing * User preference functions * Site preference settings [section headers and handlers] * DOM extensions * Hacked in stylesheets * * Paper View * Requested FEATURES 'alternative spellings' *  'noun gender' *  'turn off languages' * var wtp={}; wtp.header = {};   //Store information about the headings that may be found // wtHeaders['pos'] is an array of part of speech headings // wtHeaders['breaks'] is an array of headers that seperate // homonyms. wtp.handler= {};   //Store handlers for specific headings // e.g. wtHandler['Translations'] handles any translation // sections wtp.layout = {};   //This is a named object of arrays, each array providing // keys for the object, that stores the order of the // removed DOM nodes. Keys are of the form "h1-h2-h3" // e.g. "English-Noun-Translations" wtp.page   = {};   //This stores the DOM nodes in sections referencable by                    // their layout key, and headings (-Head-English-Noun) wtp.prefs   = {};  //Stores peoples preferences from wherever they come...

wtp.pos = []   //An array holding the current heading structure // i.e. ['English','Noun','Translation'] wtp.cPos= '-Top';//Contains the layout and page keys normally pos.join('-'); wtp.page[wtp.cPos]=document.createElement("div"); //Store the first bits wtp.homonym=[]//An array of {}s

wtp.ppAbbrevs = {'noun' : 'n.', 'verb' : 'vb.', 'countable' : '[C]', 'uncountable' : '[U]','transitive':'[T]', 'intransitive':'[I]', 'adjective':'adj.', 'adverb':'adv.'};

wtp.gloss=[];//An array of {}s

wtp.set=[]; wtp.bit=[];

/** * Initialise the parser, and see what people want us to do. * function wtp_init{ wtp_loadPrefs;

//Read the entry var bc=parsers_getContentNode; if(!bc)return false;

//Just in case try{ //Parse wtps_splitDom(bc) wtp_createLanguageTabs; //Allow language view parsers_registerView(wtp.languageOutput,               "Toggle Sections",wtp_showLanguageView); //Create other sections wtp_matchHomonyms;

parsers_registerView(document.createElement('div'),"Paper Dictionary",           wtp_createDictionaryView); }catch(e){ document.title+=" (Parser Failed: Notify User:Conrad.Irwin if nescessary.)"; if(document.getElementById('ParserTab-Unchanged')) parsers_tabParser(document.getElementById('ParserTab-Unchanged')); } } /** * Split the DOM into sections by heading, assuming that each  element * is a direct child of the bodyContent node.

function wtps_splitDom(div){ var nod; while(nod=div.childNodes[0]){ div.removeChild(nod); var nn = nod.nodeName.toUpperCase; if( nn.replace(/^H[0-9]/,)==){ if( nod.getAttribute('id') == 'siteSubC' ) delete nod; else wtps_changeHead(nod); }else if( nn=='DIV'){ var nid=nod.getAttribute('id'); if(nid=="catlinksC" ){ nod.setAttribute('id','catlinks'); //TODO\\ get the style out of the main page wtp.page['-Categories']=nod; //TODO\\ Deal with these properly }else if(nod.className=="printfooter" ){ delete nod; }else if(nid=="siteNoticeC"                  ||nid=='contentSubC' || nid=='jump-to-navC'){ delete nod; }else{ wtps_includeElement(nod); }       }else if ( nn=='TABLE' && nod.getAttribute('id')=="tocC"                || !containsText(nod)) { delete nod; }else{ wtps_includeElement(nod); }   }    return true; } /** * Change the active section for the parser function wtps_changeHead(hd){ var level=hd.nodeName.replace(/^H/,"")-2; var span=getChildByClass(hd,"mw-headline");

if( span ){ //Remove current status while ( wtp.pos.length>level            &&    wtp.pos.length>0 ){ wtp.pos.pop; }       //Get parent heading var oPos=wtp.pos.join('-'); if(oPos=='')oPos='-Top'; //Ensure parent can be layed out if(!wtp.layout[oPos])wtp.layout[oPos]=[]; //Move into child heading wtp.pos[level]=extractText(span).replace(/[ \s\t\r\n]+/g,'_'); wtp.cPos=wtp.pos.join('-'); wtp.layout[oPos].push(wtp.cPos); //Set up everything for parsing the child heading wtp.page[wtp.cPos]=document.createElement("div"); wtp.page[wtp.cPos].setAttribute("id",wtp.cPos); wtp.page[wtp.cPos].className=wtp.pos[level] wtp.page['-Head-'+wtp.cPos]=hd; }else{ //Doubt this ever happens. but you never know :)       wtps_includeElement(hd);    } } /** * Include an element in the current parser section function wtps_includeElement(el){    wtp.page[wtp.cPos].appendChild(el); } /** * Create the language tabs, and register the language view with the view tabs * Each tab toggles the className of the language. function wtp_createLanguageTabs{    //Create the display nodes    wtp.languageOutput=document.createElement("div");    wtp.languageOutput.setAttribute("id","LanguageOutput");    wtp.tabs = document.createElement("div");    wtp.tabs.className="LanguageTabs";    wtp.languageOutput.appendChild(wtp.page['-Top']);    wtp.languageOutput.appendChild(wtp.tabs);    //For each language...    if ( wtp.layout['-Top'] ) {    for(var i=0;i<wtp.layout['-Top'].length;i++){        var lng=wtp.layout['-Top'][i];        var tab=document.createElement('span');        var sect=document.createElement('div'); //Create language section sect.setAttribute('id','-Language-'+lng) sect.className="LanguageSectionHidden"; sect.appendChild(wtp.page['-Head-'+lng]); sect.appendChild(wtp.page[lng]); wtp_recursiveAppendChild(sect,wtp.layout[lng]); wtp.languageOutput.appendChild(sect); //Create language tab tab.setAttribute('id','-LanguageTab-'+lng) tab.className="LanguageTabHidden"; tab.appendChild(document.createTextNode(lng.replace(/_/g,' '))); try{ tab.addEventListener('click',               function(e){wtp_tabHandle(e.target,"Language")},false) }catch(e){ tab.attachEvent('onclick',               function{wtp_tabHandle(window.event.srcElement,"Language")}) }       wtp.tabs.appendChild(tab); }   } //endif if(wtp.page['-Categories']) wtp.languageOutput.appendChild(wtp.page['-Categories']); return true; } /** * A hack to try and notice when people click on #Language links on this page function wtp_checkLanguageLinks{ var hlng =document.location.href.replace(/[^#]+#?/,''); if( hlng.length && hlng != wtp.currentLinkLanguage     && wtp.page[hlng]){ var ntab = document.getElementById('-LanguageTab-'+hlng); if(ntab){ wtp_tabHandle(ntab,"Language"); wtp.currentLinkLanguage=hlng; }   }    window.setTimeout(wtp_checkLanguageLinks,100); } /** * Used by the createLanguageTab function to recursively append all subchildren *  of the language section into one element. function wtp_recursiveAppendChild(dest,lay){ if(!lay)return; for(var i=0;i<lay.length;i++){ dest.appendChild(wtp.page['-Head-'+lay[i]]); dest.appendChild(wtp.page[lay[i]]); if(wtp.layout[lay[i]]){ wtp_recursiveAppendChild(dest,wtp.layout[lay[i]]); }   } } /** *In an attempt to get the categories to split by language too. * function wtp_handleCategories(div){ var p = getChildByClass(div,"catlinks"); for(var i=0;i<p.childNodes.length;i++){ } } /** * This handles a click on the language elements, at the moment it switches * between them, but it should be possible to define a toggle behaviour too. function wtp_tabHandle(el,type,showonly){ var id=el.getAttribute('id').replace("-"+type+"Tab-",'-'+type+'-'); var sect=document.getElementById(id);

//Check to see if we are already displaying a language if(el.parentNode){ var otab = getChildByClass(el.parentNode,type+"TabShown"); if(otab){ //Hide old language var oid=otab.getAttribute('id').replace("-"+type+"Tab-","-"+type+"-"); if(oid==id)sect=showonly;//What to do on a dbl click var osect=document.getElementById(oid); if(osect && !showonly){ otab.className=type+"TabHidden"; osect.className=type+"SectionHidden"; }       }    }    //Display a new language if(sect){ wtp['current'+type+'Tab']=el; sect.className=type+"SectionShown"; el.className=type+"TabShown"; } } /** * Called when the language view is selected, it ensures at least one * language is showing function wtp_showLanguageView(parserNode,parserTab){ if(!wtp.currentLanguageTab)wtp.currentLanguageTab=wtp.tabs.firstChild; wtp_tabHandle(wtp.currentLanguageTab,"Language",true); wtp.currentLinkLanguage=document.location.href.replace(/[^#]+#?/,''); window.setTimeout(wtp_checkLanguageLinks,100); } /** * Tries to convert the matched up homonyms to a standard dictionary like entry * much less feature-full! But hopefully nice?? * IS EN.WIKT specific as it needs to parse the PoS section in more detail function wtp_createDictionaryView(node,parserTab){ if(node.childNodes[0])return; //We have already run.

node.className="DictionaryView";

var lng; var pos; var ety; var p;var etyc=1;var fhn=false; for(var i=0;i<wtp.homonym.length;i++){ var hnym = wtp.homonym[i]; //Check we are adding it to the right language if(lng != hnym.language){ lng = hnym.language; var h = document.createElement('h3'); h.appendChild(document.createTextNode(lng.replace(/_/g,' '))); node.appendChild(h); p = document.createElement('p'); node.appendChild(p); //Write the bold word at the start span=document.createElement('span'); span.className="dictHomonym"; span.appendChild(document.createTextNode(wgTitle)); p.appendChild(span); //Add a hidden etymology count (willbe shown if nescessary) ety = hnym.etyTitle; etyc=1; span=document.createElement('span'); span.className="dictOnlyHomonymNumber"; span.appendChild(document.createTextNode(etyc)); fhn='-HiddenFhn-'+lng+etyc; span.setAttribute('id',"-HiddenFhn-"+lng+etyc); p.appendChild(span); //Add the PoS in italics span=document.createElement('span'); span.className="dictPos"; var ps = hnym.pos.title.replace(/_/g,' ').replace(/.+-/,'').toLowerCase; if(wtp.ppAbbrevs[ps]){ span.appendChild(document.createTextNode(wtp.ppAbbrevs[ps])); span.setAttribute('title',ps); }else{ span.appendChild(document.createTextNode(ps)); }           p.appendChild(span); pos = hnym.pos.title; }else if(hnym.etyTitle!=ety){ //start a new paragraph p = document.createElement('p'); node.appendChild(p); etyc+=1; ety = hnym.etyTitle; //Write the bold word at the start span=document.createElement('span'); span.className="dictHomonym"; span.appendChild(document.createTextNode(wgTitle)); p.appendChild(span); //Add homonym number span=document.createElement('span'); span.className="dictHomonymNumber"; span.appendChild(document.createTextNode(etyc)); p.appendChild(span); //Add the PoS in italics span=document.createElement('span'); span.className="dictPos"; var ps = hnym.pos.title.replace(/_/g,' ').replace(/.+-/,'').toLowerCase; if(wtp.ppAbbrevs[ps]){ span.appendChild(document.createTextNode(wtp.ppAbbrevs[ps])); span.setAttribute('title',ps); }else{ span.appendChild(document.createTextNode(ps)); }           p.appendChild(span); //Show the first number if(fhn && document.getElementById(fhn)){ document.getElementById(fhn).className="dictHomonymNumber"; fhn=false; }       }else if(hnym.pos.title!=pos){ wtp_addPreviousFullstop(node.childNodes[node.childNodes.length-1]); pos = hnym.pos.title; //Add the PoS Seperator span=document.createElement('span'); span.className="dictPosSep"; span.appendChild(document.createTextNode(' ● ')); p.appendChild(span); //Add the PoS in italics span=document.createElement('span'); span.className="dictPos"; var ps = hnym.pos.title.replace(/_/g,' ').replace(/.+-/,'').toLowerCase; if(wtp.ppAbbrevs[ps]){ span.appendChild(document.createTextNode(wtp.ppAbbrevs[ps])); span.setAttribute('title',ps); }else{ span.appendChild(document.createTextNode(ps)); }           p.appendChild(span); }else{ wtp_removeLastPunctuation(node.childNodes[node.childNodes.length-1]); //put a sense seperator in, could be something better span=document.createElement('span'); span.className="dictSenseSep"; span.appendChild(document.createTextNode('; ')); p.appendChild(span); }       var eos=false; for(j=0;j<hnym.node.childNodes.length;j++){ //Parse stuff in more detail var on = hnym.node.childNodes[j]; var nn = on.nodeName.toUpperCase; var sns=[]; if(!nn.indexOf('#')){ p.appendChild(cloneNode(on)); //Include Text Nodes }else{ eos=true; if(nn=='DL' || nn=='OL' || nn=='UL' ){ //don't include example sentances (however they have been put in) }else if(nn=='DIV'){ //don't include parsed stuff or other floaty boxy thingys }else if(eos && nn=='SPAN'){ eos=false; if(on.className=='ib-content'){ var txt=extractText(on); sns=txt.split(/ *, */); var k=0; while(k0){ span=document.createElement('span'); span.className='ib-bracket'; span.appendChild(document.createTextNode('('));                   p.appendChild(span);                    span=document.createElement('span');                    span.className='ib-contents';                    span.appendChild(document.createTextNode(sns.join(',')));                    p.appendChild(span);                    span=document.createElement('span');                    span.className='ib-bracket';                    span.appendChild(document.createTextNode(')')); p.appendChild(span); }           }        }    } } /** * Attempts to remove a last [.,;:!?] from within or before the given node. function wtp_removeLastPunctuation(node){ if(node){ if(node.childNodes && node.childNodes.length){ if(wtp_removeLastPunctuation(node.childNodes[node.childNodes.length-1])){ return true; }else{ return wtp_removeLastPunctuation(node.previousSibling); }       }else if(node.nodeValue){ if(node.nodeValue.match(/[\.,;:!\?][ \n\r\s]*$/g)){ node.nodeValue = node.nodeValue.replace(/[\.,;:!\?][ \n\r\s]*$/g,''); return true; }else if(!containsText(node)){ var ret=wtp_removeLastPunctuation(node.previousSibling); node.parentNode.removeChild(node); return ret; }       }    }    return false; } /** * Attempts to add a fullstop to the previous node value, providing no punctuation is already there function wtp_addPreviousFullstop(node){ if(node){ if(node.childNodes && node.childNodes.length){ if(wtp_addPreviousFullstop(node.childNodes[node.childNodes.length-1])){ return true; }else{ return wtp_addPreviousFullstop(node.previousSibling); }       }else if(node.nodeValue){ if(node.nodeValue.match(/([\.,;:!\?])[ \n\r\s]*$/)){ return true; }else if(containsText(node)){ node.nodeValue=node.nodeValue.replace(/(^[ \n\r\s\"])?(\")?[ \n\r\s]*$/g,'$1.$2'); //"               return true;            }else{                var ret=wtp_addPreviousFullstop(node.previousSibling);                node.parentNode.removeChild(node);                return ret;            }        }    } } /** * Extract user preferences from the environment, this should eventually *    be able to get cookies, and predefined js variables, and we may even get *  a nice preference setter function wtp_loadPrefs{        //i.e. turn off completely    if ( ( typeof(wtpNoParser) != "undefined"        && wtpNoParser == false )      || (wgIsArticle == true        && wgNamespaceNumber ==0         && wgAction == "view" ) ){        wtp.prefs['SplitDom']=true;        //Whether to display the language tabs at the top        wtp.prefs['TabLanguages']=            (typeof(wtpNoSplitLanguages)=="undefined")?true:false; //Whether to try and put related homonyms together wtp.prefs['MatchHomonyms']= (typeof(wtpNoJoinHomonyms)=="undefined")?true:false; }   //ewwww var style='#LanguageOutput { background-color: #F8F8F8 }\ .LanguageSectionHidden { display:none; }\ .LanguageSectionShown { display:block; }\ .LanguageTabs { line-height: 31px; margin: 10px }\ .LanguageTabs>span { padding:0px; padding-left: 3px; padding-right: 3px; border: 1px solid #000; \ white-space:pre; margin-left: -1px; }\ .LanguageTabHidden{ background-color: #BBB }\ .LanguageTabShown{ background-color: #EEE }\ \        .SetSectionHidden { display:none; }\ .SetSectionShown { display:block; }\ .SetTabHidden{ background-color: #BBB }\ .SetTabShown{ background-color: #EEE }\ .SetTabs { line-height: 31px; margin: 10px }\ .SetTabs>span { padding:0px; padding-left: 3px; padding-right: 3px; border: 1px solid #000; \ white-space:pre; margin-left: -1px; }\ \        .HomonymSectionHidden { display:none; }\ .HomonymSectionShown { display:block; }\ .HomonymTabHidden{ background-color: #BBB }\ .HomonymTabShown{ background-color: #EEE }\ .HomonymTabs { line-height: 31px; margin: 10px }\ .HomonymTabs> span { padding:0px; padding-left: 3px; padding-right: 3px; border: 1px solid #000; \ white-space:pre; margin-left: -1px; }\ \        .ParserViewHidden{ display: none; } \ .ParserViewShown{ display: block; } \ .ParserTabHidden { color: #0000FF } \ .ParserTabShown{ color: #000000; font-weight: bold; } \ .UnrecognisedSection{ background-color: #FFFFF } \ .UnrecognisedSectionTitle{ color: #FF0000} \ .Unparsable{ color: #EE3333 }\ \        .DictionaryView{ }\ .dictHomonym{ font-weight: bold; margin-right: 2px; }\ .dictPos{ font-style: italic; margin-right: 2px; }\ .dictPosSep{ font-weight: bold; /*font-style: italic;*/ }\ .dictHomonymNumber{ font-weight: bold; display:inline; margin-right: 2px; }\ .dictOnlyHomonymNumber{ display: none; }\ .dictGrammar{ font-style: italic; margin-right: 2px; font-size:90%; };\ ';   try{ document.getElementsByTagName('head')[0].innerHTML+=''+style+' '; }catch(e){ try{ document.write(' '+style+' '); }catch(e){ // alert('Conrad Irwin / parser.js\nBorken Stylesheet'); }   } } /*************** Homonym Matching ***************/ /** * Parse the page and try and match the scrambled homonyms together * Does it by parsing all the sections according to the handlers defined by * their headings function wtp_matchHomonyms{ for(var l=0;l<wtp.layout['-Top'].length;l++){ var lang=wtp.layout['-Top'][l]; if(!wtp.layout[lang]) continue; var flat = wtp_flattenLayout(lang); for(var i=0; i<flat.length; i++){ var sect=flat[i].replace(/.+-/,).toLowerCase.replace(/[ _]([0-9]*|[IVX]*)$/,); if(sect && wtp.handler[wtp.section[sect]]){ wtp.handler[wtp.section[sect]](wtp.page[flat[i]],flat[i]); }else{ wtp.page[flat[i]].className="UnrecognisedSection"; wtp.page['-Head-'+flat[i]].className="UnrecognisedSectionTitle"; }       }        wtp_matchCurrent; } } /** * Converts the tree structure of wtp.layout into a flat array for looping function wtp_flattenLayout(lang){ var output=[]; if(wtp.layout[lang]){ for(var i=0;i<wtp.layout[lang].length;i++){ output.push(wtp.layout[lang][i]); output=output.concat(wtp_flattenLayout(wtp.layout[lang][i])); }   }    return output; } /** * Tries to match homonym's to glosses and otherwise deal with sections * Sections can either be attached to "homonyms" "part of speech" or "language" function wtp_registerHomonym(node,definition){ var def = definition.toLowerCase.split(/\W+/).sort; wtp.homonym.push({       'match' : def,        'node' : node,        'text' : definition,        'pos'  : wtp.set[wtp.set.length-1],        'language' : wtp.set[wtp.set.length-1].title.replace(/-.+/,'')    }); return wtp.homonym[wtp.homonym.length-1]; } function wtp_registerHomonymSection(node,gloss,title){ var glo=gloss.toLowerCase.split(/\W+/).sort; wtp.gloss.push({       'match' : glo,        'node' : node,        'gloss' : gloss,        'title' : title    }); wtp.gloss[wtp.gloss.length-1].language=title.replace(/-.+/,''); } /** * Will add a comment when I know what this does.... function wtp_registerSet(node,title){ wtp.set.push({'node':node,'title':title}); } function wtp_registerSetBit(node,title){ wtp.bit.push({'node':node,'title':title}); } /** * This is called when it is obvious that all sections associated with a * set of homonyms have been found, on en.wikt when a new "Etymology" section * starts, for example function wtp_matchCurrent{ //First match the homonyms to the glosses var glo; while(glo=wtp.gloss.pop) { var best=-1;var highest=0; //Find the highest definition score for each gloss for(var i=0;ihighest &&                 (!def.score || !def.score[glo.title] || score>def.score[glo.title])){ highest=score;best=i; }       }        if(best>-1){ var def=wtp.homonym[best]; if(def.score){ //Try again if we are displacing a previous match if(def.score[glo.title]){ wtp.gloss.push(glo); }           }else{ def.score={}; }           def.score=highest; if(!def.gloss)def.gloss={}; def.gloss[glo.title]=glo; }else{ //Add it to general sections wtp_registerSetBit(glo.node,glo.title); }   } //Then create the gloss tabs for(var i=0;i<wtp.homonym.length;i++){ var def=wtp.homonym[i]; if(def.done)continue;else def.done=true; var tabs=wtp.homonym[i].tabs; var div=wtp.homonym[i].div if(!tabs){ tabs=document.createElement('div'); tabs.className="HomonymTabs"; div=document.createElement('div'); tabs.appendChild(div); def.node.appendChild(tabs); wtp.homonym[i].tabs=tabs; wtp.homonym[i].div=div; }       for(var gc in def.gloss){ var glo=def.gloss[gc]; var tab=document.createElement('span'); tab.className="HomonymTabHidden"; tab.appendChild(document.createTextNode(glo.title.replace(/.+-/,'').replace('_',' '))); tab.setAttribute('id','-HomonymTab-'+gc+'-'+i+'-'+glo.title); tabs.insertBefore(tab,div); glo.node.setAttribute('id','-Homonym-'+gc+'-'+i+'-'+glo.title); glo.node.className="HomonymSectionHidden"; if(glo.node.parentNode) glo.node.parentNode.removeChild(glo.node); if(wtp.page['-Head-'+glo.title].parentNode) wtp.page['-Head-'+glo.title].parentNode.removeChild(                       wtp.page['-Head-'+glo.title]); div.appendChild(glo.node); try{ tab.addEventListener('click',                       function(e){wtp_tabHandle(e.target,"Homonym")},false) }catch(e){ tab.attachEvent('onclick',                       function{                            wtp_tabHandle(window.event.srcElement,"Homonym")                        }                    ); }        }    } //Then add the set bits to each set (Etymology to Noun etc.) for(var i=0;i                    wtp.set[i].title.replace(/[^-]/g,).length                  && bit.title.indexOf(wtp.set[i].title)<0){ //shouldn't match }else{ if(document.getElementById('-SetTab-'+i+'-'+bit.title)){ var j=1; while(document.getElementById('-SetTab-'+i+'-'+bit.title+'-'+j)){ j=j+1; }                   title+='_'+j; }               var tab=document.createElement('span'); tab.className="SetTabHidden"; tab.appendChild(document.createTextNode(title.replace(/.+-/,'').replace(/_/g,' '))); tab.setAttribute('id','-SetTab-'+i+'-'+title); tabs.insertBefore(tab,div); bit.node.setAttribute('id','-Set-'+i+'-'+title); bit.node.className="SetSectionHidden"; if(bit.node.parentNode) bit.node.parentNode.removeChild(bit.node); div.appendChild(bit.node); if(wtp.page['-Head-'+bit.title].parentNode) wtp.page['-Head-'+bit.title].parentNode.removeChild(                       wtp.page['-Head-'+bit.title]); try{ tab.addEventListener('click',                       function(e){wtp_tabHandle(e.target,"Set")},false) }catch(e){ tab.attachEvent('onclick',                       function{                            wtp_tabHandle(window.event.srcElement,"Set")                        }                    ); }           }}        }    }    wtp.bit=[]; } /** * This function tries to work out a score for the closeness of match between * two probably very different sorted arrays of strings * It also edits the arrays as it goes to make the answers better function wtp_glossScore(glo,def){ var gc=0; var dc=0; var score=0.0; while(gc2){ //Letter modifier should be tweaked score+=1.0+(glo[gc].length/2) gc+=1;dc+=1; }else{ //Remove short words def.splice(dc,1); glo.splice(gc,1); }       }else if(glo[gc]<def[dc]){ gc+=1; }else{ dc+=1; }   }    return score; } /****************** DOM Helpers *****************/ /** * containsText(node) is a W3C DOM complaint cross-browser way of *  saying (node.innerText=="") though it may well be slower (no benchmarking) * however it is almost always faster than (extractText(node)=="") function containsText(el){ for(var i=0;i0 ) return true; }else if(nod.nodeName.indexOf('#')!=0){ if(containsText(nod)) return true; }   }    if(el.nodeValue && el.nodeValue.replace(/[^a-zA-Z]/g,'').length>0 ){ return true; }else{ return false; } } /** * extractText(node) is a W3C DOM compliant cross-browser way of * saying (node.innerText) though it may well be slower (no benchmarking) * function extractText(el){ //if(FF)return el.textContents; var output=""; for( var i=0;i<el.childNodes.length;i++ ){ var nod=el.childNodes[i]; if( nod.nodeName.toUpperCase=='#TEXT' ){ output+=nod.nodeValue; }else if(nod.nodeName.indexOf('#')!=0){ output+=extractText(nod); }   }    return output; } /** * node.getChildByClass is essentialy document.getElementById but locally function getChildByClass(node,clsname){ if(!node) return; for(var i=0;i<node.childNodes.length;i++){ if( node.childNodes[i].className==clsname){ return node.childNodes[i]; }   } } /** * Copies nodes from one place to another, removing all attributes but className * to avoid id repition and unwanted event handles function cloneNode(onode){ if(onode.nodeType==3){ return document.createTextNode(onode.nodeValue); }else if(onode.nodeType==1){ var ret = document.createElement(onode.nodeName); if(onode.className) ret.className=onode.className; for(var i=0;i<onode.attributes.length;i++){ var an = onode.attributes[i].nodeName; if(an == 'id') ret.setAttribute(an,onode.attributes[i].nodeValue+'C'); else if(an == 'style') ret.style.cssText = onode.style.cssText.replace(/(display|visibility):[^;]+/,''); else ret.setAttribute(an, onode.attributes[i].nodeValue); }       for(var i=0;i<onode.childNodes.length;i++){ ret.appendChild(cloneNode(onode.childNodes[i])); }       return ret; }else{ //A comment. return document.createTextNode(''); } } /**************** en.wikt specifics **************/ /** * Each possible title (at any level) and which handler it should use wtp.section = { //From User:AutoFormat/Headers thanks Ullman!! //English POS 'noun':'pos','noun_form':'pos','noun_phrase':'pos','proper_noun':'pos', 'prenoun':'pos', 'verb':'pos','verb_form':'pos','verb_phrase':'pos', 'preverb':'pos', 'transitive_verb':'pos','intransitive_verb':'pos', 'adjective':'pos','adjective_form':'pos','adjective_phrase':'pos', 'adverb':'pos','adverb_phrase':'pos', 'pronoun':'pos','conjunction':'pos','contraction':'pos', 'interjection':'pos','article':'pos','preposition':'pos', 'prefix':'pos','suffix':'pos','affix':'pos','infix':'pos', 'idiom':'pos','phrase':'pos', 'acronym':'pos','abbreviation':'pos','initialism':'pos', 'symbol':'pos','letter':'pos', 'numeral':'pos','ordinal_numeral':'pos','cardinal_numeral':'pos', 'number':'pos','ordinal_number':'pos','cardinal_number':'pos', //Other POS 'particle':'pos','proverb':'pos','han_character':'pos', 'kanji':'pos','hanzi':'pos','hanja':'pos', 'pinyin':'pos','pinyin_syllable':'pos','syllable':'pos', 'katakana_character':'pos','hiragana_letter':'pos', 'hiragana_character':'pos', 'counter':'pos','classifier':'pos','adnominal':'pos','determiner':'pos', 'expression':'pos','postposition':'pos','root':'pos','participle':'pos', //More interesting stuff 'synonyms':'thesaurus', 'antonyms':'thesaurus', 'translations':'translations', 'translations_to_be_checked':'trivia', //this puts them by PoS not Homonym 'etymology':'etymology', 'conjugation':'trivia', 'inflection':'trivia','declension':'trivia', 'participles':'trivia','infinitives':'trivia', 'alternative_forms':'trivia', 'alternative_spellings':'trivia', 'pronunciation':'trivia', 'derived_terms':'trivia', 'related_terms':'trivia', 'descendants':'trivia', 'mutation':'trivia', 'compounds':'trivia', 'abbreviations':'trivia','forms':'trivia', 'hypernyms':'trivia','hyponyms':'trivia','meronyms':'trivia', 'homonyms':'trivia','holonyms':'trivia','troponyms':'trivia', 'homophones':'trivia','hyphenation':'trivia',

'devanagari_spelling':'trivia','urdu_spelling':'trivia', 'cyrillic_spelling':'trivia','roman_spelling':'trivia',

'kanji_reading':'trivia', 'scientic_names':'trivia', 'proverbs':'trivia','expressions':'trivia','coordinate_terms':'trivia', 'see_also':'trivia', 'external_links':'trivia','references':'trivia', 'names_in_other_languages':'trivia','variants_and_pet_forms':'trivia', 'anagrams':'trivia','trivia':'trivia','shorthand':'trivia', 'usage_notes':'trivia','dictionary_notes':'trivia','quotations':'trivia' //There are a few missing, I got bored ;) }

wtp.handler['pos'] = function(el,title){ var start=true; wtp_registerSet(el,title); for(var i=0;i<el.childNodes.length;i++){ var p=el.childNodes[i]; if(p.nodeName.toUpperCase=='OL'){ start=false; for(var j=0;j<p.childNodes.length;j++){ var li=p.childNodes[j]; if(li.nodeName.toUpperCase=='LI'){ var defs=extractText(li); var hnym=wtp_registerHomonym(li,defs); hnym.etyTitle=wtp.curEty; }else if(containsText(li)&&li.nodeName.indexOf('#')){ li.className="Unparsable"; }           }        }else if(p.nodeName.indexOf('#')){ if(p.className=='infl-table'             || p.className=='infl-inline'              || p.nodeName == 'P' && p.firstChild                && p.firstChild.className=='infl-inline'){ start=false; //Skip element }else if(start){ p.className="infl-inline"; //Guess first section is inflection }else if(containsText(p)){ p.className="Unparsable"; }       }    } } wtp.curEty=''; wtp.handler['etymology'] = function(el,title){ wtp_matchCurrent; wtp.curEty=title; //This is to let paperView differentiate between homonyms wtp_registerSetBit(el,title); } wtp.handler['trivia'] = function(el,title){ wtp_registerSetBit(el,title); } wtp.handler['translations'] = function(el,title){ var success=1; for(var i=0;i<el.childNodes.length;i++){ var div=el.childNodes[i]; if(div.nodeName.indexOf('#')){ if(div.nodeName=="DIV" && div.className=='NavFrame'){ success*=wtp.handler['navframe'](div,title); i--; }else{ success=0; }       }else if(containsText(div)){ success=0; }   }    if(success){ el.parentNode.removeChild(el); }else{ wtp_registerSetBit(el,title); } } wtp.handler['navframe'] = function(el,title){

var head=false;var body=false; //To collect NavHead and NavContent var success=1; //If parse was succesful [1 on success, 0 or 2 on failure]

for(var i=0;i-1){ success=0; }else{ body=div; body.style.cssText=""; body.id="="+body.id; el.parentNode.removeChild(el); wtp_registerHomonymSection(body,extractText(head),title); }           }else if(containsText(div)){ success=0; div.className="Unparsable"; }       }else if(containsText(div)){ success=0; }   }    return (head&&body)?success:0; } wtp.handler['thesaurus'] = function(el,title){

var success=true; for(var i=0;i<el.childNodes.length;i++){ var ol=el.childNodes[i]; if(ol.nodeName.toUpperCase=='UL'){ for(var j=0;j<ol.childNodes.length;j++){ var li=ol.childNodes[j]; var senseTag=getChildByClass(li,'ib-content'); if(!senseTag)senseTag=getChildByClass(li,'qualifier-content'); if(senseTag){ var t=true; while(t){ var fc = li.childNodes[0]; if(fc && fc.nodeName.indexOf('#') &&                            (fc.className=='ib-colon' || fc.className=='sense-qualifier-colon')){ t=false; }                           li.removeChild(fc); };                   if(containsText(li)) wtp_registerHomonymSection(li,extractText(senseTag),title); }else{ success=false; el.className="Unparsable"; }           }        }else if(ol.nodeName.toUpperCase=='DIV' && ol.className=='NavFrame'){ if(!wtp.handler['navframe'](div,title)) success=false; }else if(containsText(ol)){ //Something we dont understand success=false; el.className="Unparsable"; }   }    if(success){ el.parentNode.removeChild(el); }else{ wtp_registerSetBit(el,title); } } /**************** General Parser Functions **************/ var parsers={}; /** * This section will be split to a different file if nescessary * parsers should use getContentNode to get the page content, * this ensures that the content node remains available for others to use function parsers_getContentNode{ var bc=document.getElementById('bodyContent'); if(bc) return cloneNode(bc); // else alert("Conrad Irwin / parser.js\nThis doesn't look like a wiktionary page, no can do I'm afraid."); return false; } /** * registerView allows parsers to add a view tab at the top of the page * it should be called instead of manually appending an output to the DOM * an optional function may be specified, which will be run *   funct(node,title); before the output is made visible. function parsers_registerView(node,title,funct,selected,first){ if(!parsers.view){ var bc = document.getElementById('bodyContent'); //Create tabs at top of page parsers.tabs = document.createElement("div"); parsers.tabs.className="ParserTabs"; bc.parentNode.insertBefore(parsers.tabs,bc); var obc = document.createElement("div"); bc.parentNode.insertBefore(obc,bc); bc.parentNode.removeChild(bc); obc.appendChild(bc); parsers.view={}; parsers.insertPoint=obc; parsers_registerView(obc,"Unchanged",false,false,true); }   //Create tab for new view. var tab = document.createElement("span"); tab.className="ParserTabHidden"; tab.appendChild(document.createTextNode(title)); title=title.replace(/[ \s\r\n]+/,'_'); tab.setAttribute("id","ParserTab-"+title); node.className="ParserViewHidden"; node.setAttribute("id","ParserView-"+title); try{ tab.addEventListener('click',           function(e){parsers_tabParser(e.target)},false) }catch(e){ tab.attachEvent('onclick',           function{parsers_tabParser(window.event.srcElement)}) }   if(!first){ parsers.tabs.appendChild(document.createTextNode(' • ')); parsers.insertPoint.parentNode.insertBefore(node,parsers.insertPoint); }   parsers.tabs.appendChild(tab); parsers.view[title] = [node,tab,funct]; if( (selected && !getCookie('wtParserView'))     ||(getCookie('wtParserView')==tab.id) ) parsers_tabParser(tab); } /** * Essentially the onclick handler for parser view tabs, this switches between * the different displays (using hidden/block display on the divs) and * preexecutes any provided functions function parsers_tabParser(tab){ var vw = tab.getAttribute("id").replace(/^ParserTab-/,""); if(parsers.view[vw]){ if(parsers.currentView){ parsers.currentView[1].className="ParserTabHidden"; parsers.currentView[0].className="ParserViewHidden"; }       parsers.currentView = parsers.view[vw]; setCookie('wtParserView',parsers.currentView[1].getAttribute('id')) if(typeof(parsers.currentView[2])=='function'){ parsers.currentView[2](               parsers.currentView[0],parsers.currentView[1]); }       parsers.currentView[1].className="ParserTabShown"; parsers.currentView[0].className="ParserViewShown"; } }

//Get this show on the road if(wgAction=='view' && wgNamespaceNumber==0 && wgArticleId!=0) addOnloadHook(wtp_init);