User:Hippietrail/vector.js

// to prevent interpretation of template syntax // Hide the sidebar if I'm running on a screen smaller than my new netbook if (screen.width < 1366) { mw.loader.load('//en.wikipedia.org/w/index.php?title=User:PleaseStand/hide-vector-sidebar.js&action=raw&ctype=text/javascript'); }

$(document).ready(function { "use strict";

// collate requested entries when editing $('#wpTextbox1').on('wikiEditor-toolbar-doneInitialSections', function {   console.log('edit toolbar appeared');

sectionSorting; });

// highlight my current language of interest's entries in what links here list

$('#mw-whatlinkshere-list > li > a').each(function(a, b) {   /*if (this.text.match(/[\u10D0-\u10FB]/))      $(this).css('background-color', 'yellow');    else if (this.text.match(/[\uAC00-\uD7AF]/))      $(this).css('background-color', 'palegreen');    else*/    if (this.text.match(/[\u1780-\u17FF]/))      $(this).css('background-color', 'palegreen');    /*else if (this.text.match(/[ăâîşșţțĂÂÎŞȘŢȚ]/))      $(this).css('background-color', 'magenta');    else if (this.text.match(/[Ӏ]/))      $(this).css('background-color', 'cyan');*/  }); });

// callable from the console with a language code function sectSortCons(langCode) { var lang = { code: langCode, how: 'console' }; if (initLang(lang)) { sortSectionInTextarea(document.getElementById('wpTextbox1'), lang, function(fb) { 	  console.log('sectSortConsole feedback:', fb);  	}); } }

// called, with no args, when the edit toolbar is loaded // calls the language guesser then hands off to installSortButton function sectionSorting { "use strict";

// can be synchronous or asynchronous so using .when on an object or promise $.when(guessSectLang).done(function(lang) {   if (lang && lang.code) {      if (initLang(lang)) {        installSortButton(lang);      }    } else {      console.log('passed no lang from guesser');    }  }); }

// called when edit toolbar is loaded, lang is guessed, and collator created // adds sort button to toolbar and reacts to button click function installSortButton(lang) { "use strict";

// add sort button to toolbar // based on a mediawiki wiki example $('#wpTextbox1').wikiEditor('addToToolbar', {   'section': 'main',    'group': 'format',    'tools': {      'collate': {        label: 'Collate',        type: 'button',        icon: '//upload.wikimedia.org/wikipedia/commons/2/2b/Button_arrow_down.PNG',        action: {          type: 'callback',          execute: function(context) {            sortSectionInTextarea(context.$textarea[0], lang, function (colour) { console.log('got some feedback:', colour); $('[rel="collate"]')[0].style.outline = '2px solid ' + colour; });         }        }      }    }  }); }

// tries to sort the text in a HTML DOM textarea function sortSectionInTextarea(ta, lang, feedback) { "use strict";

var lines = ta.value.split('\n');

var sorted = sortSectionAsArray(lines, lang, function (stuff) {   console.log('five calling back to four with:', stuff);    switch (stuff) {      case 'was already sorted':      	feedback('blue');      	break;      case 'put in order':      	feedback('lightgreen');      	break;      case 'exception':      	feedback('red');      	break;      default:        console.log('unexpected stuff:', stuff);    }  });

if (sorted) ta.value = sorted.join('\n'); }

// strips header, footer, and dividers from array // sorts array // puts header, footer, and divs back function sortSectionAsArray(lines, lang, callback) { "use strict";

var hfd = stripHFD(lines), // modifies "lines" sorted, // lines stripped + sorted reconstructed; // sorted lines with HFD restored

// group lines starting with colons or asterisks as part of previous lines = (function(inp) {   var out = [];    inp.forEach(function(e, i) { if (i && /^\*[:\*]/.test(e)) { out[out.length - 1] += '\n' + e;     } else { out.push(e); }   });    return out;  })(lines);

try { // build an index - will throw an exception if it can't   var indexed = lines.map(function(e) {      var kv; // key value pair for this line

lang.patterns.some(function(re, i) {       var m = e.match(re);        if (m) {          kv = { k: m[1], l: e };          // terminate array.some          return true;        }      });

if (kv) { // return this element of the map return kv; } else { // didn't match any of the re's       throw 'can\'t find a collation key for line "' + e + '"'; }   });    // TODO string compare should treat foobar foo-bar foo bar as equivalent

// is it already sorted? if (indexed.every(function(e, i, ar) { return i === 0 || compareItems(lang, ar[i-1].k, e.k) === -1; })) {     callback('was already sorted'); } else { // sort indexed data using the collator // and keep only the sorted lines sorted = indexed.sort(function(a, b) {	     return compareItems(lang, a.k, b.k);	    }).map(function(e, i) {	      return e.l;	    }); callback('put in order'); reconstructed = restoreHFD(sorted, hfd); // TODO how to check whether dividers moved } } catch (ex) { console.log('caught exception:', ex); callback('exception'); }

return reconstructed; }

// strip out header, footer, divs function stripHFD(lines) { "use strict";

var hfd = {}; // header is the == a == part // TODO a blank line after == a == seems to be acceptable hfd.header = stripSortHeader(lines);

// footer is all the categories and interlanguage links with optional // blank lines before, after, and between them hfd.footer = stripSortFooter(lines);

// dividers are the "top", "mid", "bottom" templates for splitting lists into columns hfd.dividers = stripSortDividers(lines);

return hfd; }

// also modifies input "lines" function stripSortHeader(lines) { var header = lines.shift;

return header; }

// also modifies input "lines" // TODO allow for categories inside language sections before function stripSortFooter(lines) { var footer = [];

// from the bottom up!

// categories and internaguage links with optional blank line after each for (i = 0; i < 2; ++i) { if (lines.length && lines[lines.length - 1] === '') footer.unshift(lines.pop);

while (lines.length) { if (/^\[\[.*:.*]]\s*$/.test(lines[lines.length - 1])) { footer.unshift(lines.pop); } else { break; }   }  }

// rather than cats and interlangs there could be a between lang sections if (lines.length && lines[lines.length - 1] === '') footer.unshift(lines.pop);

if (lines.length && /^\s*$/.test(lines[lines.length - 1])) footer.unshift(lines.pop);

// attention line after content and before any cats and interlangs if (lines.length && lines[lines.length - 1] === '') footer.unshift(lines.pop);

// don't double escape this literal regex! if (lines.length && /^\s*$/.test(lines[lines.length - 1])) footer.unshift(lines.pop);

// optional blank line after conent and before any footer stuff if (lines.length && lines[lines.length - 1] === '') footer.unshift(lines.pop);

return footer; }

// also modifies input "lines" function stripSortDividers(lines) { // regexes to use for multi-column dividers var divRegexes = [ // der-top can be followed by "|details" or a number "3", der-mid can only have a number '^\\s*$', '^\\s*$' ],   divs = [], dupeLines = lines.slice; // we want this function to modify the original array so we need a copy // since we're returning the dividers it's a hassle to also return a new // version of the array // TODO we could just do a for loop in reverse

var prevPos; dupeLines.forEach(function(l, i) {   divRegexes.some(function(re) { if (new RegExp(re).test(l)) { // splice returns an array so we have to use [].concat rather than [].push // 'lines' gets shorter each time 'divs' gets bigger so take this into account each splice console.log('++ divider #' + divs.length + ' @ pos ' + i + (typeof prevPos !== 'undefined' ? ' run ' + (i-prevPos) : '')); prevPos = i;       divs = divs.concat(lines.splice(i - divs.length, 1)); return true; // terminate some }   });  });  return divs; }

// ignore hyphens and spaces in comparison unless that makes them equal // apostrophes are curently ignored too but we can make that per-language if need be function compareItems(lang, a, b) { var re; // don't ignore spaces for Vietnamese if (lang.collator.resolved.locale === 'vi') re = new RegExp(/[-\']/g); else re = new RegExp(/[-\' ]/g);

// compare a with b normally return lang.collator.compare(a.replace(re, ), b.replace(re, )) // but b with a when all else is equal || lang.collator.compare(a, b); }

// restore header, footer, divs function restoreHFD(sorted, hfd) { "use strict";

var dividers = hfd.dividers, header = hfd.header, footer = hfd.footer;

// intersperse dividers if (dividers.length) { sorted = (function split(arrayIn, dividers) {		var numColumns = dividers.length - 1,		 i = 0, j, size,		  d = 0,	      arrayOut = [dividers[d++]];

while (i < arrayIn.length) { size = Math.ceil((arrayIn.length - i) / numColumns), j = i + size; arrayOut = arrayOut.concat(arrayIn.slice(i, j), dividers[d++]); numColumns--; i = j;	   }

return arrayOut; })(sorted, dividers); }

// prepend header sorted.unshift(header);

// append footer if (typeof footer !== 'undefined') sorted = sorted.concat(footer);

return sorted; }

function getIndexingPatterns(langCode) { // regexes to use for all languages to extract terms var termRegexes = [ '^\\*\\s*{{[lmt]\\|' + langCode + '\\|(.*?)[\\|}].*', '^\\*\\s*\\[\\[(?:w:)?(.*?)[\\|\\]].*', '^\\*\\s*{{' + langCode + '-[lr]\\|(.*?)[\\|}].*', '^\\*\\s*{{term\\|(.*?)\\|lang=' + langCode + '[\\|}].*', '^\\*\\s*{{link\\|' + langCode + '\\|(.*?)[\\|}].*', '^\\*\\s*{{l/' + langCode + '(?:/[A-Z][a-z][a-z][a-z])?\\|(.*?)[\\|}].*', '^\\*\\s*.*', '^\\*\\s*', '^\\*\\s*', '^\\*\\s*\'\'\\[\\[(.*?)]]\'', '^\\*\\s*\\[\\[w:.*?\\|(.*?)]]', '^\\*\\s*{{' + langCode + '-link\\|(.*?)[\\|}].*' ];

// regexes to use for particular languages to extract terms if (langCode === 'en') { termRegexes.push('^\\*\\s*{{REEHelp\\|(.*?)[\\|}].*'); } else if (langCode === 'ar') { termRegexes.push('^\\*\\s*.*'); }

return termRegexes; }

// can return a language object or a promise function guessSectLang { "use strict";

var m, // regex match scratch var ulsLangs, langName, // language name langCode; // language code

//{ ## pseudocode ## // if (name = from page title) { //  code = name to code via table //  if (no code) //    code = name to code via ULS //  if (code) //    RETURN name & code // //   if (no code) { //    code = name to code via AJAX API expandtemplates Lua getByCanonicalName //    RETURN name & code //  }  // }  //  // else (no name) { //  code = from template arg in wikitext in textarea //  if (code) //    RETURN code //  else (no code) { //    name = from AJAX API parse sections //    TODO name to code as above!! //  }  // }  //}

// for requests page the language name is in the page title m = mw.config.get.wgTitle.match(/^Requested entries \((.*)\)$/); if (m && m[1]) { langName = m[1];

switch (langName) { case 'English': langCode = 'en'; break; case 'Vietnamese': langCode = 'vi'; break; }

// look for language code via MediaWiki JavaScript ULS if (!langCode) { console.log('no built-in lang code, trying to get lang code by ULS.');

ulsLangs = mw.config.get('wgULSLanguages');

if (ulsLangs) { // reverse lookup the language code -> language name map // TODO use array.some instead? langCode = Object.keys(ulsLangs).filter(function(key) {         return ulsLangs[key] === langName;        })[0]; } else { console.log('this wiki doesn\'t have the ULS interface we need.'); }   }

if (langCode) { return { name: langName, code: langCode, how: 'local' }; } else { console.log('still no lang code, trying to use Lua via the MediaWiki API via AJAX ...');

return (new mw.Api).get({       action: 'expandtemplates',        format: 'json',        prop: 'wikitext',        text: ''      }).then(function(result) {        // returns and empty string as 'wikitext' if language name is unknown        // see: https://en.wiktionary.org/wiki/Module:languages/templates        return { name: langName, code: result.expandtemplates.wikitext, how: 'ajax+lua' };      }); } } else { console.log('can\'t extract language name from page title');

// regexes to use for all languages to extract language codes // for guessing language based on section content var langcodeRegexes = [ // list items '^\\*\\s*{{[lmt]\\|(.*?)\\|.*?[\\|}].*', '^\\*\\s*{{(.*?)-[lr]\\|.*?[\\|}].*', '^\\*\\s*{{term\\|.*?\\|lang=(.*?)[\\|}].*', '^\\*\\s*{{link\\|(.*?)\\|.*?[\\|}].*', '^\\*\\s*{{l/(.*?)(?:/[A-Z][a-z][a-z][a-z])?\\|.*?[\\|}].*', '^\\*\\s*.*', '^\\*\\s*{{(.*?)-link\\|.*?[\\|}].*',

// attention template '^{{attention\\|(.*?)[\\|}].*', // category link '^\\[\\[.*?:([a-z][a-z][a-z]?):.*?]].*' ];

// look in the textarea var ta = document.getElementById('wpTextbox1'), lines = ta.value.split('\n'), header, goodHeader;

// header is the === See also === part, etc // TODO a blank line after == a == seems to be acceptable header = lines.shift;

m = header.match(/^(=*)\s?(.*?)\s?(=*)\s*$/); if (m[1] === m[3] && m[1].length > 1) { // if not the main namespace don't bother checking the heading (variations of ...) if (wgNamespaceNumber !== 0) { goodHeader = true; } else if ([         'Antonyms',          'Compounds',          'Derived terms',          'Hyponyms',          'Related terms',          'See also',          'Synonyms',        ].indexOf(m[2]) !== -1) { goodHeader = true; }

if (goodHeader) { lines.some(function(line, j) {         return langcodeRegexes.some(function(re, i) { m = line.match(re);

if (m) { langCode = m[1]; return true; }         });        });

if (langCode) { return { //langName: 'do we need the lang name if we have the name code?', code: langCode, how: 'textarea' };       }      }    }  }

return { name: langName, code: langCode, how: 'failed' }; }

// get collator and patterns for language function initLang(lang) { var res = false;

lang.collator = new Intl.Collator(lang.code);

if (lang.collator.resolved.locale === lang.code) { // regexes to use to extract terms lang.patterns = getIndexingPatterns(lang.code); res = true; } else { console.log('we tried to get a collator for code "' + lang.code + '", but got one for code "' + lang.collator.resolved.locale + '"'); // TODO reflect this failure in the UI somehow? }

return res; } //