User:Matthias Buchmeier/creation.js

/* Version: User:Conrad.Irwin/creation.js 17:06, 16 May 2013 Automatically create form-of entries based on meta-data within entries.

/** * Escape the values we are passing to AutoEdit */ //AutoEdit irritatingly doesn't undo %XX encodings, so we can't send them. //FIXME: Not a lot we can do about &s maybe patch AutoEdit. function get_escape (get) { return get.replace (/&/g,"%26"); //This shouldn't do anything to variables. } function clean_regexp (re) { return re.replace (/([\\\*\+\[\]\{\}\(\)\.~])/g,"\\$1"); } function clean_variable (va) { return va.replace (/([\\~])/g,"\\$1").replace(/&/g,'{'+'{subst:⅋}}').replace(/#/,'{'+'{subst:♯}}'); //Yucky HACK } function variable (needle, replacement) { return get_escape ('s~{'+'{'+'{' + clean_regexp (needle) + '}}}~' + clean_variable (replacement) + '~g;') }

/** * Variables in the templates. */ // The page's language. function set_lang (lang) { return variable ('lang', lang); } // The optional language parameter to give to a template (either '' for english, or '|lang=xx') function set_template_lang (lang, form) { if(lang == 'tr' || lang == 'az') { //This is HORRIBLE! support for { {inflection of}} which puts its parameters

// After the linked word (instead of before :       if (form == 'definite-plural')            return variable('template-lang', '||definite|p|lang=' + lang);        else if (form == 'definite-accusative')            return variable('template-lang', '||definite|acc|s|lang=' + lang);        else if (form == 'plural-definite-accusative')            return variable('template-lang', '||definite|acc|p|lang=' + lang);        else if (form == 'dative')            return variable('template-lang', '||dat|s|lang=' + lang);        else if (form == 'plural-dative')            return variable('template-lang', '||dat|p|lang=' + lang);        else if (form == 'locative')            return variable('template-lang', '||loc|s|lang=' + lang);        else if (form == 'plural-locative')            return variable('template-lang', '||loc|p|lang=' + lang);        else if (form == 'ablative')            return variable('template-lang', '||abl|s|lang=' + lang); else if (form == 'plural-ablative') return variable('template-lang', '||abl|p|lang=' + lang); else if (form == 'genitive') return variable('template-lang', '||definite|gen|s|lang=' + lang); else if (form == 'plural-genitive') return variable('template-lang', '||definite|gen|p|lang=' + lang); }   // Did I say the last one was horrible?... else if (lang == 'gd' && form == 'genitive-and-plural') {       return variable('template-lang', '|lang=gd}}%0A%23 {'+'{plural of|' + wgTitle + '|lang=gd'); }   return variable ('template-lang', ((lang == '' || lang == 'en') ? '' : '|lang=' + lang)); } function get_gender_template (gender, lang) {

} //The gender template with a leading space (or an empty string if no gender) function set_gender_template (gender, lang) {

if (lang == 'he') return variable ('gender-template', ' {'+'{romanization of Hebrew}}')

if (lang == 'bg') return variable ('gender-template', ' ['+'[Category:Bulgarian terms lacking transliteration]]')

gender = gender.replace('pl','p').replace(/([mfnc])p/, '$1-p');

if (gender.length == 0) { return variable ('gender-template', ''); } else { return variable ('gender-template', '{'+'{'+ gender + '}}'); } }

//The optional |g= argument to { {head}} function set_template_gender (gender, lang, form) { if (form == 'diminutive-plural') return variable('template-gender','|plural=1'); if (lang == 'ca') return variable('template-gender','|' + gender);

if (form == 'plural' && gender.indexOf('p') < 0) gender += 'p';

gender = gender.replace('pl','p').replace(/([mfnc])p/, '$1-p');

if (gender.length == 0) return variable('template-gender',''); else return variable('template-gender', '|g=' + gender); } //Form of templates for genders. These are the ones that exist, if we need others, //either create them or use { {form-of}} manually. function gender_form (gender) {

if (gender == 'mpl') { return 'masculine plural'; } else if (gender == 'f') { return 'feminine'; } else if (gender == 'fpl') { return 'feminine plural'; } else if (gender == 'n') { return 'neuter'; } else if (gender == 'mfpl'){ return 'plural'; } else { throw("Not simple gender?!"); }

} // This pages name. // Returns in PIPED format. Which, while kind of bad, works as either embedded in %s or { {template|%s}} // and most templates take an optional display parameter as the first after the link. function set_origin (given, lang) {

//Remove links from given parameters, and unencode underscores to spaces if (given) { given = given.replace(/\[\[([^\|\]])*\|?([^\]]+)\]\]/g,"$2").replace(/_/g,' '); return variable ('origin', wgTitle + '|' + given); }

return variable ('origin', wgTitle); } // The page we are about to create with links on individual words. function set_pagename_linked_and_template_head (link, lang, form) { var pagename = (link.innerText || link.textContent);

var op = pagename; pagename = pagename.replace (/([ -])/g,"]]$1[[")

if (op != pagename) { pagename = +pagename+; return variable('pagename-linked', pagename) + variable('template-head','|head=' + pagename) }

return variable('pagename-linked', pagename) + variable('template-head',''); } function set_template_sc (lang) { var sc; switch (lang) { case 'he': sc = 'Hebr'; break; default: return variable('template-sc',''); }   return variable('template-sc','|sc='+sc) } //The form-of template we are to use - used for the basic verbs. function set_form_template (form, lang, gender, link) { var formof = form.replace(/-/g,' ');

if (lang == 'eo') { return variable('form-template', 'eo-form of'); }   if (lang == 'sv' && (form.match(/^superlative/) || form.match(/^comparative/) || form.match(/^positive/))) { tmp = false; switch (form+' '+gender) { case 'positive n': tmp = 'sv-adj-form-abs-indef-n'; break; case 'positive m': tmp = 'sv-adj-form-abs-def-m'; break; case 'positive-definite ': tmp = 'sv-adj-form-abs-def'; break; case 'positive-plural ': tmp = 'sv-adj-form-abs-pl'; break; case 'comparative ': if (get_part_of_speech(link) == 'Adjective') tmp = 'sv-adj-form-comp'; else tmp = 'sv-adv-form-comp'; break; case 'superlative-attributive m': tmp = 'sv-adj-form-sup-attr-m'; break; case 'superlative-attributive-definite ': tmp = 'sv-adj-form-sup-attr'; break; case 'superlative-attributive-plural ': tmp = 'sv-adj-form-sup-attr-pl'; break; case 'superlative-predicative ': tmp = 'sv-adj-form-sup-pred'; break; case 'superlative ': tmp = 'sv-adv-form-sup'; break; default: throw("Unknown sv template."); }       return variable('form-template', tmp); }

if (formof == 'plural' && lang=='es') { return variable('form-template','plural of|nocat=1') }

if (formof == 'construct') { return variable('form-template', "form of|" + formof.replace("-"," ") + " form"); }   if (lang == 'da') { return variable('form-template', "form of|" +formof.replace("-"," ")); } else if (lang == 'tr' || lang == 'az' ) { return variable('form-template', 'inflection of'); }

if (formof == 'positive') { formof = gender_form (gender) if (formof == 'plural') { return variable('form-template',"form of|plural"); }   } else { formof = formof.replace('third person', 'third-person'); formof = formof.replace('simple past and participle', 'past'); }

if (formof == "diminutive plural") return variable("form-template", "diminutive of|plural=1");

if (lang == 'gd' && formof == 'genitive and plural') { formof = 'genitive' }   formof = formof + ' of';

return variable('form-template', formof); } //The part of speech, normally determined by other means. function get_part_of_speech (link) {

var node = link; while(node) { while (node.previousSibling) { node = node.previousSibling; if (node.nodeType == 1 && node.nodeName.match(/^[hH][3456]$/)) { return node.firstChild.innerHTML.replace(/^[1-9.]* /,''); }       }        node = node.parentNode; }   throw("This entry seems to be formatted incorrectly.");

} function set_part_of_speech (link,lang,form) { if (lang=='da') return variable('part-of-speech', 'Verb'); return variable('part-of-speech', get_part_of_speech(link)); } /** * Check if we know of a template that will do the job. * Use an explicit list to make it harder to subvert. */ function get_preload_template (form, lang, link) {

var prefix = 'User:Conrad.Irwin/creation.js/';

if (lang == 'es') { return prefix + 'inflForm'; }   else if (lang == 'da' && form.match(/genitive/)) { return prefix + 'inflNoun'; }   else if (lang == 'tr' || lang == 'az' || (lang == 'ca' && form == 'plural')) { return prefix + 'caNoun'; }

if (lang == 'sv' && (form.match(/^superlative/) || form.match(/^comparative/) || form.match(/^positive/))) { return prefix + 'swAd'; }

if (lang == 'nl' && form == 'diminutive') return prefix + 'nlDiminutive';

if (lang == 'nl' && (form == 'comparative' || form == 'superlative')) return prefix + 'complexAdjective';

if (lang == 'he') { switch (form) { case 'plural': case 'construct': return prefix + 'inflNoun'; default: return false; }   }else{ switch (form) { case 'plural' : case 'diminutive' : case 'genitive' : case 'diminutive-plural' : case 'genitive-and-plural': return prefix + 'basicNoun'; case 'plural-definite': case 'plural-indefinite': case 'singular-definite': case 'vocative': case 'singular-vocative': case 'plural-vocative': return prefix + 'inflNoun'; case 'third-person-singular': case 'present-participle': case 'simple-past': case 'past-participle': if(lang=='da') return prefix+'inflForm'; case 'simple-past-and-participle': return prefix + 'basicVerb'; case 'present': case 'past': case 'infinitive': case 'imperative': return prefix + 'inflForm'; case 'positive': return prefix + 'positiveAdjective'; case 'comparative': case 'superlative': case 'inflected-form': case 'exaggerated': if ((lang == 'hu' || lang == 'cs' || lang == 'sl') && get_part_of_speech(link) == 'Adverb') return prefix + 'basicAdverb'; return prefix + 'basicAdjective'; default: return false; }   }

} function unAnchorEncode(x) { return decodeURI(x.replace(/\./g, '%')); } /** * Convert a raw new link into a snazzy one. */ function add_create_button (details, link) {

var d = { lang: 'en', lang2: null, gender: '', form: '', origin: wgTitle, given_origin: false, pos: get_part_of_speech(link), target: (link.innerText || link.textContent) }

for (var i = 0;i < details.length; i++) { if (details[i].match(/(^| +)([^ ]+)-form-of( +|$)/)) { d.form = RegExp.$2; } else if (details[i].match(/(^| +)lang-([^ ]+)( +|$)/)) { d.lang = RegExp.$2; } else if (details[i].match(/(^| +)lang2-([^ ]+)( +|$)/)) { d.lang2 = RegExp.$2; } else if (details[i].match(/(^| +)gender-(([mfn]+|c)(pl)?)( +|$)/)) { d.gender = RegExp.$2; } else if (details[i].match(/(^| +)origin-(.+)( +|$)/)) { d.given_origin = unAnchorEncode(RegExp.$2); } else if (details[i].match(/(^| +)transliteration-(.+)( +|$)/)) { d.transliteration = unAnchorEncode(RegExp.$2); }

}

var workerHref = '';

try { if (d.pos && (preload = get_preload_text(d))) {

workerHref = '&preloadtext=' + encodeURIComponent(preload);

} else if (d.pos && (preload = get_preload_template(d.form, d.lang, link)) ) { if (d.lang == 'io' && d.origin.indexOf('-') > 0) { return; /* request of User:Razorflame */ } workerHref = '&preload=' + encodeURIComponent(preload) + '&autoedit=' + ((d.lang == 'sv' || d.lang == 'es' || d.lang == 'da') ? set_part_of_speech (link,d.lang,d.form) : '') + set_lang (d.lang) + set_template_lang (d.lang, d.form) + set_gender_template (d.gender, d.lang) + set_template_gender(d.gender, d.lang, d.form) + set_origin (d.given_origin, d.lang2 ? d.lang2 : d.lang) + set_pagename_linked_and_template_head (link, d.lang, d.form) + set_form_template (d.form, d.lang, d.gender, link) + set_template_sc (d.lang) }

if (workerHref) {

link.href += '&editintro=User:Conrad.Irwin/creation.js/intro' + workerHref + '&preloadsummary=' + encodeURIComponent(                       'Creating ' + d.form + ' form of ' + d.origin                        + ' (Accelerated)' ) + '&preloadminor=true';

link.style.color = '#22CC00'; }       }catch(e) { } }

/** * For many languages the above is far too limiting - so let's define some more powerful functions. * * The functions are structured top-down, this allows innermost functions to just return text that is * wrapped by outer functions, seems to result in shorter code, but should probably be refactored out :p. * * So each "declaration function" accepts a function (f) and returns an "edit function". * and each "edit function" accepts some details (d) and returns the text to be wrapped (by calling f(d) where necessary) */ function get_preload_text(d) { try{ return get_preload_text[d.lang](d); } catch (e) { return false; } }

/** These templates are for constructing preloadtext manually */ function language_header(f) { return function (d) { return "=={"+"{subst:" + d.lang + "}}==\n\n" + f(d);}; }

// Find the part of speech by looking at the current part of speech heading function default_pos(f) { return function (d) { return "===" + d.pos + "===\n" + f(d); }; }

// PAGENAME function bolded_headword(f) { return function (d) {

var pagename = d.target; var np = pagename.replace(/([ -])/g,"]]$1[[") if (np != pagename && !/(^\]\])|(\[\[$)/.test(np)) pagename =  + np + ;

return "" + pagename + "" + (           d.gender ?  ' {'+'{'+ d.gender.replace('pl','p').split().join('|') + '}}' :         ) + "\n\n" + f(d); }; }

// { {head|fr|...}} function head_template(f) { return function (d) { return "{"+"{head|" + d.lang + "|" + (           d.gender.indexOf('pl') > -1 ? 'plural' : d.pos.toLowerCase        ) + (            (d.gender ? "|g=" + d.gender.replace('pl','') : "") + (d.gender.indexOf('pl') > -1 ? '|g2=p' : '')       ) + "}}\n\n" + f(d); } } // { {head|nl}} function posless_head(f) { return function (d) { return '{' + '{head|' + d.lang + '}}\n\n' + f(d); } } // { {head|nl}} or { {nl-noun-dim}} function nl_head(f) { return function (d) { if (d.form == 'diminutive') { return '{' + '{nl-noun-dim}}\n\n' + f(d); } else { return '{' + '{head|nl}}\n\n' + f(d); }   } }

// { {fa-word}} function persian_head(f) { return function(d) { var head = '{'+'{fa-word' + (d.transliteration ? ('|tr=' + d.transliteration) : '') + '}}'; return head + "\n\n" + f(d); } }

// { {tg-word}} function tajik_head(f) { return function(d) { var head = '{'+'{tg-word' + (d.transliteration ? ('|tr=' + d.transliteration) : '') + '}}'; return head + "\n\n" + f(d); } }

// All definition lines start with # function definition_line(f) { return function (d) { return "# " + f(d) + "\n"; }; }

// Common part of eo function eo_form_of(f) { return function (d) { return '{'+'{eo-form of|' + f(d) + '}}'; }; }

// These ones always seem to happen toegether function basic_entry(f) { return language_header( default_pos( f ) ); }

// English get_preload_text.en = basic_entry( bolded_headword( definition_line(   function(d) {        var formof = d.form.replace(/-/g, ' ');        formof = formof.replace('third person', 'third-person');        formof = formof.replace('simple past and participle', 'en-past');        return '{' + '{' + formof + ' of|' + d.origin + '}}';    } ))); // Esperanto get_preload_text.eo = basic_entry( bolded_headword( definition_line( eo_form_of( function (d) { var origin = d.origin; //Nouns if (origin.substr(origin.length - 1) == 'o') { var base = origin.substr(0, origin.length - 1) + '|o'; var proper = d.pos == 'Proper noun'; switch(d.form) { case 'uncountable-accusative': return base.replace(/([ao]) /, '|$1n|') + 'n|unc=yes'; case 'plural': return base.replace(/([ao]) /, '|$1j|') + 'j' + (proper ? '-proper' : ''); case 'accusative': return base.replace(/([ao]) /, '|$1n|') + 'n' + (proper ? '-properpl' : ''); case 'accusative-plural': return base.replace(/([ao]) /, '|$1jn|') + 'jn' + (proper ? '-properpl' : ''); default: return preload_text_error; }                   //Adjectives } else if (origin.substr(origin.length - 1) == 'a') { var base = origin.substr(0, origin.length - 1) + '|a'; switch(d.form) { // These names mirror those used for other languages case 'definite-plural': return base.replace(/([ao]) /, '|$1j|') + 'j'; case 'definite-accusative': return base.replace(/([ao]) /, '|$1n|') + 'n'; case 'plural-definite-accusative': return base.replace(/([ao]) /, '|$1jn|') + 'jn'; default: return preload_text_error; }

}               return preload_text_error; } )))); // Persian get_preload_text.fa = basic_entry( persian_head ( definition_line( function (d) {

if (d.form == 'comparative') return '{'+'{fa-adj-form|c|' + d.origin + '}}'; else if(d.form == 'superlative') return '{'+'{fa-adj-form|s|' + d.origin + '}}'; else return preload_text_error; } )));	// Tajik get_preload_text.tg =   basic_entry( tajik_head ( definition_line( function (d) {

if (d.form == 'comparative') return '{'+'{tg-adj-form|c|' + d.origin + '}}'; else if(d.form == 'superlative') return '{'+'{tg-adj-form|s|' + d.origin + '}}'; else return preload_text_error; } ))); // French get_preload_text.fr =   basic_entry( head_template( definition_line( function (d) { var template = { 'f-singular':'feminine of', 'fpl-other-plural':'feminine plural of', 'fpl-plural': 'plural of', 'mpl-plural': 'plural of', 'mpl-other-plural':'masculine plural of'};

if (template[d.gender + '-' + d.form]) return ''; return preload_text_error; } ))); get_preload_text.pl =   basic_entry( bolded_headword( definition_line( function (d) { var formof = d.form.replace(/-/g, ' '); formof = formof.replace('third person', 'third-person'); formof = formof.replace('simple past and participle', 'past'); return '{' + '{' + formof + ' of|' + d.origin + '|lang=' + d.lang + (d.pos == 'Adverb' ? '|POS=adverb' : '') + '}}'; } )));

get_preload_text.nl = basic_entry( nl_head( definition_line( function (d) {       var arg;        switch (d.form) {            case 'plural':                arg = 'pl=1';                break;            case 'diminutive':                arg = 'dim=1';                break;            default:                return preload_text_error;        }        return '';    } )));

get_preload_text.yi = function (d) { var a = '==Yiddish==\n\n===' + d.pos + '===\n'; switch (d.form) { case 'plural': return a + '\{\{head|yi'+(d.transliteration?'|tr='+d.transliteration:'')+'}}\n\n# \{\{plural of|' + d.origin + '|lang=yi}}'; case 'past-participle': return a + '\{\{head|yi|past participle'+(d.transliteration?'|tr='+d.transliteration:'')+'}}\n\n# \{\{past participle of|'+d.origin+'|lang=yi}}' default: return preload_text_error } }

/** A sanely named wrapper around throw */ function preload_text_error { throw true; }

/** * Recursively find first red link in "form-of" spans. * FIXME: would be better to return an array as multiple params often occur */ function find_red_link (span) {

var poss = span.firstChild;

while (poss) { if(poss.nodeType == 1) { if (poss.nodeName.toUpperCase == 'A' && poss.className.indexOf('new') >= 0) return poss; else if (recurse = find_red_link(poss)) return recurse; }       poss = poss.nextSibling; }

return null; } /** * Recursively find anything tagged with "form-of" */

function find_form_of_spans { if (typeof(document.getElementsByClassName) == 'function') { return document.getElementsByClassName('form-of'); } else { var spans = document.getElementsByTagName ('span'); var form_ofs = new Array ;

for (var i=0; i<spans.length; i++) { if (spans[i].className.match(/(^| +)form-of( +|$)/)) { form_ofs.push (spans[i]); }       }

return form_ofs; } }

/** * Get the show on the road */ $( function {

// Mainspace only. if (wgNamespaceNumber && wgPageName != 'Wiktionary:Sandbox') return;

poss = find_form_of_spans ;

for (var i = 0;i<poss.length; i++) { var link = find_red_link (poss[i]); if (link) { add_create_button (poss[i].className.replace(/(^| +)form-of( +|$)/,'').split(' '), link); }   }

} );