User:Erutuon/scripts/OrangeLinks.js

/** * General idea: for each bluelink with an anchor, the script fetches the categories for its target page * and checks whether it contains a part-of-speech category. If a suitable category is found, the script * assumes the anchor is valid. If not, the link is coloured orange. * * Previous version by User:Yair rand, based in turn on an idea by User:Hippietrail. * This script is a complete rewrite. */

// /* jshint esversion: 5, loopfunc: true, latedef: true, scripturl: true, undef: true, unused: true */ /* globals mw, jQuery */

(function { 'use strict';

// Disable orange links on a page with a HTML tag containing id="no-orange-links". if (document.querySelector("#no-orange-links")) return;

var api = new mw.Api; var fresh = [], // list of titles to get categories of	queue = {}, // map from title to list of links with that title catcache = {}; // map from page name to list of categories

var articlePathRx = new RegExp('^'	+ mw.util.escapeRegExp(mw.config.get('wgArticlePath')).replace('\\$1', '(.+)')	+ '$'); // hax

var isDebug = mw.util.getParamValue('debug') && window.console; var debugLogger = {}; // add methods that call console method if "isDebug" is defined [ "error", "info" ].forEach(function (key) {	debugLogger[key] = isDebug		? window.console[key].bind(console)		: function {}; });

// see mw:Manual:index.php // alternative: /^\/(?:w(?:\/(?:index\.php)?)?)?$/ var indexPhp = [ '/', '/w', '/w/', '/w/index.php' ];

function getPageName(url) { if (!(url instanceof mw.Uri)) throw new TypeError("Expected mw.Uri"); var match = articlePathRx.exec(url.path); return match ? match[1] // if path is index.php, page name is value of "title" parameter : indexPhp.indexOf(url.path) !== -1 ? url.query.title : undefined; }

// These lists are not as normative. Common headers are included even when they // are not officially recommended. var POSHeaders = [ 'Abbreviation', 'Acronym', 'Adjective', 'Adjectival noun', 'Adnominal', 'Adverb', 'Ambiposition', 'Article', 'Brivla', 'Circumfix', 'Circumposition', 'Classifier', 'Combining form', 'Conjunction', 'Contraction', 'Counter', 'Determiner', 'Diacritical mark', 'Gerund', 'Gismu', 'Han character', 'Hanja', 'Hanzi', 'Ideophone', 'Idiom', 'Infix', 'Initialism', 'Interfix', 'Interjection', 'Kanji', 'Letter', 'Ligature', 'Logogram', 'Noun', 'Number', 'Numeral', 'Ordinal number', 'Participle', 'Particle', 'Phrase', 'Postposition', 'Predicative', 'Prefix', 'Preposition', 'Prepositional phrase', 'Pronoun', 'Proper noun', 'Proverb', 'Punctuation mark', 'Rafsi', 'Relative', 'Romanization', 'Root', 'Stem', 'Suffix', 'Syllable', 'Symbol', 'Transliteration', 'Verb', 'Verbal noun', ];

var otherHeaders = [ 'Abbreviations', 'Alternative forms', 'Alternative scripts', 'Anagrams', 'Antonyms', 'Citations', 'Compounds', 'Conjugation', 'Coordinate terms', 'Declension', 'Definitions', 'Derived characters', 'Derived terms', 'Descendants', 'Etymology', 'External links', 'Forms', 'Further reading', 'Gallery', 'Glyph origin', 'Holonyms', 'Homophones', 'Hypernyms', 'Hyponyms', 'Idioms', 'Inflection', 'Meronyms', 'Nouns and adjectives', 'Paronyms', 'Production', 'Pronunciation', 'Proverbs', 'Quotations', 'Readings', 'References', 'Related characters', 'Related terms', 'See also', 'Statistics', 'Stem set', 'Synonyms', 'Translations', 'Troponyms', 'Usage notes', 'Verbal stems', 'Verbs', 'Verbs and verb derivatives', 'Verbs and verbal derivatives', ]; var knownNonLanguageHeader = new RegExp('^(' + otherHeaders.join('|') + '|' + POSHeaders.join('|') + ')', 'i');

var hyphenLowercaseLanguageNames = "Wong-gie|Kamkata-viri|Yan-nhangu|Kinaray-a|Nyanga-li|Koresh-e Rostam|Pai-lang|Muak Sa-aak|Kua-nsi";

var senseidSuffixRegex = new RegExp("^(" + hyphenLowercaseLanguageNames + "|.+?)(?:-[a-z].*)?$");

// header is not language header if it does not begin with a character that // occurs at the beginning of a language name (refer to // Special:PermaLink/51354043) and is not a recognized non-language header function maybeLanguageHeader(header) { return header && /^[A-Z'ÀÁÖǀǁǂǃ]/.test(header) && !knownNonLanguageHeader.test(header); }

// get page title if link points to another page on this wiki function getTitle(link) { if (!(link instanceof HTMLAnchorElement)) throw new TypeError("Expected HTMLAnchorElement"); // XXX: is there no other way to skip TabbedLanguages tabs? var parentClasses = link.parentNode.classList; if (parentClasses.contains("unselectedTab")	|| parentClasses.contains("selectedTab")) return; // skip empty hrefs, script URLs, local links var rawHref = link.getAttribute('href'); if (!(rawHref && rawHref.indexOf("javascript:") !== 0 && rawHref[0] !== "#")) return; var url = new mw.Uri(link.href); // check that the domain is correct if (url.getAuthority !== location.hostname) return; var pageName = getPageName(url); return pageName ? new mw.Title(decodeURIComponent(pageName + '#' + url.fragment.replace(/\.([0-9A-Fa-f][0-9A-Fa-f])/g, '%$1'))) : undefined; }

// XXX: save resources by caching something or other between calls to // processLink on same link? function processLink(link) { try { if (link._ORANGED === link.href) // XXX: eliminate this hack return;

var title = getTitle(link); // check that title is in main namespace and that the fragment could be		// a language name if (!(title && title.getNamespaceId === 0 && maybeLanguageHeader(title.fragment))) return; var prefixedText = title.getPrefixedText; var categories = catcache[prefixedText]; if (categories) { link._ORANGED = link.href; // XXX: hack // assumes sense ids start with Basic Latin lowercase letter and // that language names do not contain hyphen followed by Basic Latin // lowercase letter, which is not always true: // search for ': hastemplate:"senseid" insource:/senseid\|[^\|]+\|[^a-z]/' // with single quotes removed and see Special:PermaLink/51356029 var langName = title.fragment.replace(senseidSuffixRegex, '$1'); // XXX: discount "German Low German", etc. but allow // "Vietnamese Han tu" and "Vietnamese Nom" to count as an			// existing vi entry // XXX: use /^ (lemmas|non-lemma forms)$/ instead? if (!categories.some(function (category) { return category.indexOf(langName) === 0 && /^ ([a-z]|Han tu|Nom)/.test(category.substring(langName.length)); })) {				link.classList.add('partlynew'); debugLogger.info('partlynew ', link, '; cats=', categories,					' frag=', title.fragment, ' langName=', langName); }		} else { var entry = queue[prefixedText]; if (!entry) { entry = queue[prefixedText] = []; fresh.push(prefixedText); }			entry.push(link); }	} catch (e) { debugLogger.error(e, 'while processing', link, ' href=', link.href); } }

function processQueue(queue) { function collect(titles) { return jQuery.Deferred(function (d) {			var query = {				'action': 'query',				'titles': titles.join('|'),				'redirects': 1,				'prop': 'categories',				'cllimit': 100,			};			function fail(code, details, xhr) {				d.reject(code, details, xhr);			}			function pluckResults(result) {				debugLogger.info('result', result);				var pages = result.query.pages, redirects = result.query.redirects;				for (var pageid in pages) {					var title = new mw.Title(pages[pageid].title).getPrefixedText;					var categories = pages[pageid].categories || [];					var cache = catcache[title];					if (!cache)						cache = catcache[title] = [];					// 'Category:'.length === 9					for (var i = 0; i < categories.length; ++i)						cache.push(categories[i].title.substring(9));

debugLogger.info('cache for', title, 'is', cache); }				if (redirects) { redirects.forEach(function (redirect) {						var origin = new mw.Title(redirect.from).getPrefixedText;						var target = new mw.Title(redirect.to).getPrefixedText;						catcache[origin] = catcache[target];					}); }				if (result['continue']) { api.get(jQuery.extend(query, result['continue'])).then(pluckResults, fail); } else d.resolve(titles); }

api.get(query).then(pluckResults, fail); });	}	while (fresh.length > 0) {		collect(fresh.splice(0, 25)).then(function (titles) { titles.forEach(function (title) {				queue[title].forEach(processLink);			}); }, function (code, details, xhr) { console.error(code, details, xhr); });	}	fresh = []; }

Array.prototype.forEach.call(document.getElementsByTagName('a'), processLink); processQueue(queue);

if (mw.util.getParamValue('@orangelinks.no_live')	return;
 * typeof window.MutationObserver !== "function")

var mo = new MutationObserver(function (events) {	for (var i = 0; i < events.length; ++i) {		var event = events[i];		if (event.type === 'childList') {			event.addedNodes.forEach(function (addedNode) { if (addedNode instanceof Element) { if (addedNode.tagName === 'A') processLink(addedNode); else Array.prototype.forEach.call(							addedNode.getElementsByTagName('a'), processLink); }			});		} else if (event.type === 'attributes' && event.target.tagName === 'A'		&& (event.attributeName === 'href' || event.attributeName === 'class'))			processLink(event.target);	}	processQueue(queue); });

mo.observe(document, {	attributes: true,	childList: true,	subtree: true });

});

//