MediaWiki:Gadget-TranslationAdder-Data.js

// // // implicit dependencies : ext.gadget.LanguageUtils /* jshint maxerr:1048576, strict:true, undef:true, latedef:true, es5:true */ /* global $, ScriptUtils, mw */

window.LangMetadata = function { //Singleton if (arguments.callee.instance) return arguments.callee.instance; else arguments.callee.instance = this;

//

// FIXME: merge into above var c = 'Chinese'; var a = 'Arabic'; var nesting = { aae: 'Albanian/Arbëresh', aat: 'Albanian/Arvanitika', als: 'Albanian/Tosk', aln: 'Albanian/Gheg', apj: 'Apache', apm: 'Apache', apw: 'Apache', //Bai bca: 'Bai/Central Bai', bfc: 'Bai/Northern Bai', bfs: 'Bai/Southern Bai', lay: 'Bai/Lama Bai', //Eastern Cham (cjm) and Western Cham (cja) to be nested under Cham (atitarev) cjm: 'Cham/Eastern Cham', cja: 'Cham/Western Cham', syr: 'Aramaic', syc: 'Aramaic', xcl: 'Armenian', axm: 'Armenian', // ang:'English',enm:'English', don't nest English (Encyclopetey) //Persian nesting prs: 'Persian/Dari', "fa-cls": 'Persian/Classical Persian', "fa-ira": 'Persian/Iranian Persian', fro: 'French', frm: 'French', oge: 'Georgian', gsw: 'German', ksh: 'German', pfl: 'German', //gmh:'German',goh:'German', don't nest OHG/MHG (-sche) grc: 'Greek/Ancient Greek', gmy: 'Greek/Mycenaean Greek', //el:'Greek/Modern', don't nest Modern Greek (Atelaes) sga: 'Irish', mga: 'Irish', kmr: 'Kurdish/Northern Kurdish', ckb: 'Kurdish/Central Kurdish', sdh: 'Kurdish/Southern Kurdish', lki: 'Kurdish/Laki Kurdish', //Lawa lwl: 'Lawa/Eastern Lawa', lcp: 'Lawa/Western Lawa', "nds-de": 'Low German', "nds-nl": 'Low German', nb: 'Norwegian/Bokmål', nn: 'Norwegian/Nynorsk', mhr: 'Mari/Eastern Mari', mrj: 'Mari/Western Mari', cmg: 'Mongolian', cst: 'Ohlone/Northern Ohlone', css: 'Ohlone/Southern Ohlone', //Roglai rog: 'Roglai/Northern Roglai', rgs: 'Roglai/Southern Roglai', roc: 'Roglai/Cacgia Roglai', rmn: 'Romani', rml: 'Romani', rmc: 'Romani', rmf: 'Romani', rmo: 'Romani', rmy: 'Romani', rmw: 'Romani', dsb: 'Sorbian', hsb: 'Sorbian', osp: 'Spanish', //Sama ssb: 'Sama/Southern Sama', sml: 'Sama/Central Sama', sse: 'Sama/Balangingi Sama', slm: 'Sama/Pangutaran Sama', //Sami sma: 'Sami/Southern Sami', sju: 'Sami/Ume Sami', sje: 'Sami/Pite Sami', smj: 'Sami/Lule Sami', sme: 'Sami/Northern Sami', sjk: 'Sami/Kemi Sami', smn: 'Sami/Inari Sami', sms: 'Sami/Skolt Sami', sia: 'Sami/Akkala Sami', sjd: 'Sami/Kildin Sami', sjt: 'Sami/Ter Sami', tji: 'Tujia/Northern Tujia', tjs: 'Tujia/Southern Tujia', owl: 'Welsh', wlm: 'Welsh', "yok-bvy": 'Yokuts', "yok-dly": 'Yokuts', "yok-gsy": 'Yokuts', "yok-kry": 'Yokuts', "yok-nvy": 'Yokuts', "yok-ply": 'Yokuts', "yok-svy": 'Yokuts', "yok-tky": 'Yokuts', zh: c,		yue: c,		dng: c,		gan: c,		hak: c,		czh: c,		cjy: c,		cmn: c,		mnp: c,		cdo: c,		nan: c,		czo: c,		cpx: c,		wuu: c,		hsn: c,		lzh: c,		cnp: c,		csp: c,		ltc: c,		och: c,		wxa: c,		"nan-hbl": c,		"nan-hnm": c,		"nan-luh": c,		"nan-tws": c,		"zhx-sht": c,		"zhx-sic": c,		"zhx-tai": c,		arq: a,		aao: a,		bbz: a,		abv: a,		shu: a,		acy: a,		adf: a,		avl: a,		arz: a,		afb: a,		ayh: a,		acw: a,		ayl: a,		acm: a,		ary: a,		ars: a,		apc: a,		ayp: a,		acx: a,		aec: a,		ayn: a,		ssh: a,		ajp: a,		arb: a,		apd: a,		pga: a,		acq: a,		abh: a,		aeb: a,		auz: a	};

var stripArabicDiacritics = { strip: "\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652" }; var stripGraveAndAcute = { strip: "\u0300\u0301" }; var fullToHalfWidthNumbers = { from: "０１２３４５６７８９", to: "0123456789" };	// These should reflect the replacements made in Module:languages, but should not necessarily be equal. var diacriticStrippers = { ang: { from: "ĀāǢǣĊċĒēĠġĪīŌōŪūȲȳ", to: "AaÆæCcEeGgIiOoUuYy", strip: "\u0304\u0307", }, //macron and above dot ar: stripArabicDiacritics, aao: stripArabicDiacritics, acm: stripArabicDiacritics, acx: stripArabicDiacritics, adf: stripArabicDiacritics, aeb: stripArabicDiacritics, afb: stripArabicDiacritics, ajp: stripArabicDiacritics, apc: stripArabicDiacritics, apd: stripArabicDiacritics, arq: stripArabicDiacritics, ary: stripArabicDiacritics, arz: stripArabicDiacritics, fa: stripArabicDiacritics, ps: stripArabicDiacritics, sd: stripArabicDiacritics, ur: stripArabicDiacritics, chl: { from: "ÁáÉéÍíÓóÚú", to: "AaEeIiOoUu", strip: "\u0304", }, //acute accent he: { strip: "\u05B0\u05B1\u05B2\u05B3\u05B4\u05B5\u05B6\u05B7\u05B8\u05B9\u05BA\u05BB\u05BC\u05BD\u05BF\u05C1\u05C2", from: "-'\"",			to: "־׳״",		},		la: {			from: "ĀāĒēĪīŌōŪūȲȳ",			to: "AaEeIiOoUuYy",			strip: "\u0304",		}, //macron		lt: {			from: "áãàéẽèìýỹñóõòúù",			to: "aaaeeeiyynooouu",			strip: "\u0340\u0301\u0303",		},		nci: {			from: "ĀāĒēĪīŌōŪūȲȳ",			to: "AaEeIiOoUu",			strip: "\u0304",		}, //macron		//strip ́ and ̀ on Cyrillic Slavic languages, Serbo-Croatian has a longer list		ru: stripGraveAndAcute,		uk: stripGraveAndAcute,		be: stripGraveAndAcute,		bg: stripGraveAndAcute,		orv: stripGraveAndAcute,		cu: stripGraveAndAcute,		rue: stripGraveAndAcute,		mk: stripGraveAndAcute,		sh: {			from: "ȀȁÀàȂȃÁáĀāȄȅÈèȆȇÉéĒēȈȉÌìȊȋÍíĪīȌȍÒòȎȏÓóŌōȐȑȒȓŔŕȔȕÙùȖȗÚúŪūѝӣ",			to: "AaAaAaAaAaEeEeEeEeEeIiIiIiIiIiOoOoOoOoOoRrRrRrUuUuUuUuUuии",			strip: "\u030F\u0300\u0311\u0301\u0304",		},		sl: {			from: "áÁàÀâÂȃȂȁȀéÉèÈêÊȇȆȅȄíÍìÌîÎȋȊȉȈóÓòÒôÔȏȎȍȌŕŔȓȒȑȐúÚùÙûÛȗȖȕȔệỆộỘẹẸọỌəł",			to: "aAaAaAaAaAeEeEeEeEeEiIiIiIiIiIoOoOoOoOoOrRrRrRuUuUuUuUuUeEoOeEoOel", strip: "\u0301\u0300\u0302\u0311\u030f\u0323", },		kk: stripGraveAndAcute, ky: stripGraveAndAcute, tg: stripGraveAndAcute, sa: { strip: "ः", },		/** visarga **/ bo: { strip: "།", },		/** shad **/ tr: { from: "ÂâÛû", to: "AaUu", strip: "\u0302", },		ja: fullToHalfWidthNumbers, cmn: fullToHalfWidthNumbers, yue: fullToHalfWidthNumbers, nan: fullToHalfWidthNumbers, ko: fullToHalfWidthNumbers, zu: { strip_init_hyphen: true, }	};	function stringOrNull(val) { return typeof val == "string" ? val : null; }	function makeMap(obj) { var from = stringOrNull(obj.from); var to = stringOrNull(obj.to); var strip = stringOrNull(obj.strip); var map = {}; if (from && to) { for (var i = 0; i < from.length; i++) { map[from.charAt(i)] = to.charAt(i); }		}		if (strip) { for (var ii = 0; ii < strip.length; ii++) { map[strip.charAt(ii)] = ""; }		}		return map; }	/** letters and number 1 are used instead of palochka **/ var palochkaCorrections = { from: "Il1", to: "ӏӏӏ" };	// 	var arabicYa = "ي"; // Arabic letter yāʾ (U+064A ARABIC LETTER YEH) var arabicKaf = "ك"; // Arabic letter kāf (U+0643 ARABIC LETTER KAF) var farsiYe = "ی"; // Persian letter ye (U+06CC ARABIC LETTER FARSI YEH) var farsiKaf = "ک"; // Persian letter kâf (U+06A9 ARABIC LETTER KEHEH) var alifMaqsura = "ى"; // Arabic letter ʾalif maqṣūra (U+0649 ARABIC LETTER ALEF MAKSURA) /** commonly misspelled Persian letters (Arabic instead of Persian) (to be expanded) **/ var persianCorrections = { from: arabicKaf + arabicYa + alifMaqsura, to: farsiKaf + farsiYe + farsiYe, };	// These replacements will be applied when a word is submitted. var orthographicalCorrections = { // Replace grave accents with acute accents for translations // into Bulgarian per Wiktionary policy bg: { map: { "\u0300": "\u0301", "Ѐ": "Е́", "Ѝ": "И́", "ѐ": "е́", "ѝ": "и́" }, },		abq: palochkaCorrections, ady: palochkaCorrections, av: palochkaCorrections, ce: palochkaCorrections, dar: palochkaCorrections, inh: palochkaCorrections, kbd: palochkaCorrections, lbe: palochkaCorrections, lez: palochkaCorrections, tab: palochkaCorrections, /** Roman to Cyrillic**/ cv: { from: "ĂăĔĕÇçŸÿ", to: "ӐӑӖӗҪҫӲӳ", },		kv: { from: "ÖöIi", to: "ӦӧІі", },		koi: { from: "ÖöIi", to: "ӦӧІі", },		kpv: { from: "ÖöIi", to: "ӦӧІі", },		os: { from: "Ææ", to: "Ӕӕ", },		/* Obsolete or incorrectly used letters */ mn: { from: "ЄєѲѳЇїVv", to: "ӨөӨөҮүҮү", strip: "\u0300\u0301", },		/* cedilla to comma below */ ro: { from: "ŞŢşţ", to: "ȘȚșț", },		/** ʻ is a standard symbol in Uzbek but is often replaced with ' or ` **/ uz: { from: "'`", to: "ʻʻ", },		fa: persianCorrections, "fa-ira": persianCorrections, "fa-cls": persianCorrections, prs: persianCorrections, ur: persianCorrections, /** commonly misspelled Arabic letters (Persian instead of Arabic) (to be expanded) **/ ar: { from: farsiKaf + farsiYe, to: arabicKaf + arabicYa, },		ota: { from: farsiKaf + arabicYa + alifMaqsura, to: arabicKaf + farsiYe + farsiYe, },		ps: { from: arabicKaf, to: farsiKaf, },		/** some letters are considered more standard (to be expanded) **/ cu: { from: "ыѹ", to: "ꙑу", },		/** some letters are considered more standard (to be expanded) **/ orv: { from: "ыѹ", to: "ꙑу", },		/** obsolete letters **/ ab: { from: "ҔҕҦҧ", to: "ӶӷԤԥ", },	};	//Returns true if a Wiktionary exists for the specified language this.hasWiktionary = function(lang) { if (metadata[lang] && (metadata[lang].haswikt || metadata[lang].wiktprefix)) return true; };	//Returns the domain-name prefix of the Wiktionary for the specified language this.getWiktionaryPrefix = function(lang) { if (metadata[lang]) return metadata[lang].wiktprefix || (metadata[lang].haswikt && lang); };	// Keep this in sync with ignore_caps in Module:translations. this.ignoreCaps = { "ko": true }; // Calls the callback with a boolean indicating whether the specified language // has a Wiktionary with the specified entry. The callback might be called // synchronously, or it might be called asynchronously. this.hasWiktionaryWithEntry = function(lang, title, callback) { if (this.hasWiktionary(lang)) { if (this.ignoreCaps[lang]) title = title.replace(/^\^/, ""); // Use this when we want to give up support for Internet Explorer 11. // URL constructor and URLSearchParams are not supported in IE11. // var url = new URL("https://" + this.getWiktionaryPrefix(lang) + ".wiktionary.org/w/api.php"); // url.search = new URLSearchParams({			// 	"format": "json",			// 	"formatversion": "2",			// 	"action": "query",			// 	"titles": title,			// 	"converttitles": 1,			// 	"origin": "*",			// }); var url = new mw.Uri("https://" + this.getWiktionaryPrefix(lang) + ".wiktionary.org/w/api.php"); url.extend({				"format": "json",				"formatversion": "2",				"action": "query",				"titles": title,				"converttitles": 1,				"origin": "*",			}); $.getJSON(				url.toString,				function(data) {					var page = data && data.query && data.query.pages && data.query.pages[0];					callback(page instanceof Object ? !page.missing : false);				}			); } else callback(false); };

//Given a language code return a default script code. this.guessScript = function(lang) { var scripts = (new ScriptUtils).GetScriptsByLangCode(lang); if (scripts && scripts.length > 0) return scripts[0]; else return false; if (metadata[lang]) { // enwikt language template? (ur-Arab, polytonic) if (metadata[lang].wsc) { return metadata[lang].wsc; }			// ISO script code? (Arab, Grek) if (metadata[lang].sc) { if (typeof metadata[lang].sc == 'object') return metadata[lang].sc[0]; else return metadata[lang].sc; }		}

return false; };

// In a given language, would we expect a translation of the title to have the capitalisation // of word? this.expectedCase = function(lang, title, word) { if (metadata[lang] && metadata[lang].allowCaps) return true;

if (title.substr(0, 1).toLowerCase != title.substr(0, 1)) return true;

return word.substr(0, 1).toLowerCase == word.substr(0, 1); };

//Returns a string of standard gender letters (mfnc) or an empty string this.getGenders = function(lang) { if (metadata[lang]) return metadata[lang].g;	};

//Returns a string of standard noun class numbers or an empty string this.hasNounClasses = function(lang) { if (metadata[lang]) return metadata[lang].nclass; };

//Returns true if the specified lang uses optional vowels or diacritics this.needsRawPageName = function(lang) { if (metadata[lang]) return metadata[lang].alt && !diacriticStrippers[lang]; };	function applyMap(map, str) { var input = str.split(""); var output = "";

for (var i = 0; i < input.length; i++) { var char = input[i]; var repl = map[char]; output += repl ? repl : char; }		return output; }	this.applyOrthographicalCorrections = function(lang, word) { word = word.replace('\xAD', ''); // remove soft hyphens var corrections = orthographicalCorrections[lang]; if (corrections) { var map = corrections.map ? corrections.map : makeMap(corrections); return applyMap(map, word); }		return word; };

// Computes the raw page name by removing diacritics (for Latin, etc.) this.computeRawPageName = function(lang, word) { var stripper = diacriticStrippers[lang]; if (stripper) { var map = makeMap(stripper); var output = applyMap(map, word);

if (stripper.strip_init_hyphen && output.length > 0 && output.charAt(0) == '-') output = output.substr(1);

return output; }	};

// Calls Module:links's 'remove_diacritics' method on word and rawPageName, // and invokes callback with two arguments: (1) the *real* raw page name // (superseding even rawPageName), and (2) whether this real raw page name // needs to be explicitly specified in the wikitext (i.e., whether the raw page	// name computed from rawPageName is different from the one that would have been	// computed from just word). this.retrieveRawPageName = function(lang, word, rawPageName, callback) { var ents = { '<': 'lt', '&': 'amp', '>': 'gt' };		var temps = { '|': '!',			'=': '=',			'{': '(',			'}': ')'		};

function encode(s) { s = s || ''; s = s.replace(/[|={}]/g, function(c) {				return '';			}); s = s.replace(/[<&>]/g, function(c) {				return '&' + ents[c] + ';';			}); s = s.replace(/\t/g, ' '); //s = encodeURIComponent(s); return s;		}

function removeDiacritics(s) { return ''; }

var text = removeDiacritics(word) + '\t' + removeDiacritics(rawPageName || word); $.ajax({			dataType: "json",			url: '/w/api.php',			data: {				format: 'json',				action: 'expandtemplates',				prop: 'wikitext',				text: text			},			success: function(data) {				data = data && data.expandtemplates && data.expandtemplates.wikitext;				if (/<!--/.test(data)) {					data = false;				}				if (data) {					data = data && data.split('\t');					var wordMinus = data[0];					var rawPageNameMinus = data[1];					callback(rawPageNameMinus || wordMinus, rawPageNameMinus != wordMinus);				} else {					callback(rawPageName || word, rawPageName && rawPageName != word);				}			}		}); };

//Given user input, return a language code. Normalises ISO 639-1 codes and names to 639-3. this.cleanLangCode = function(lang) { var key = lang.toLowerCase.replace(' ', ''); if (clean[key]) return clean[key]; else return lang; };

// Get the nesting for a given sub-language this.getNested = function(lang) { if (nesting[lang]) return nesting[lang]; else return ""; };

function temporalSortKey(langname) { return langname.replace(/^(Ancient|Classical|Old|Middle|Early Modern|Modern) (.*)/, function(m, modifier, name) {			return ({ Ancient: 0, Old: 1, Middle: 2, "Early Modern": 3, Modern: 4 })[modifier] + name;		}); }

// For enforcing an ordering on nested languages. this.nestsBefore = function(a, b) { return temporalSortKey(a) < temporalSortKey(b); };

this.getScripts = function(lang) { var scripts = (new window.ScriptUtils).GetScriptsByLangCode(lang) || []; return scripts; }; }; //