User:Erutuon/scripts/cleanup.js

/* * My set of wikitext cleanup buttons that perform tedious editing tasks. * The buttons are inserted above the textbox when certain conditions are * fulfilled using User:Erutuon/scripts/CleanupButtons.js. */

/* jshint boss: true, esversion: 6, eqeqeq: true, varstmt: true, unused: true, undef: true */ /* globals $, CleanupButtons, mw */ //

if ( [ "edit", "submit" ].includes(mw.config.get("wgAction"))		&& mw.config.get("wgPageContentModel") === "wikitext"		// Not in edit conflict view.		&& !document.querySelector(".mw-twocolconflict-changes-col")) { $.when(	$.getScript("//en.wiktionary.org/w/index.php?title=User:Erutuon/scripts/CleanupButtons.js&action=raw&ctype=text/javascript"),	$.ready ).done(function { "use strict";

const namespaceNumber = mw.config.get("wgNamespaceNumber"); const entryspace = namespaceNumber === 0 || namespaceNumber === 118; const isTemplate = namespaceNumber === 10; const pageName = mw.config.get("wgPageName"); const categories = mw.config.get("wgCategories");

// Regular expression for Han (Chinese) characters, based on Module:Unicode data/scripts. // JSHint still doesn't understand Unicode-flagged regular expressions or // Unicode codepoint escapes. const HaniRegex = /(?:[\u2E80-\u303F\u337B-\u337F\u3400-\u9FFF]|[\uD840-\uD879][\uDC00-\uDFFF]|\uD87A[\uDC00-\uDFE0])/;

const POSHeaders = `Abbreviation Acronym Adjectival noun Adjective Adnominal Adverb Affix Article Circumfix Classifier Combining form Conjugation Conjunction Contraction Counter Declension Definitions Determiner Diacritical mark Gerund Hanja Hanzi Idiom Infix Initialism Interfix Interjection Kanji Letter Ligature Logogram Noun Number Numeral Ordinal number Participle Particle Phrase Postposition Predicative Prefix Preposition Prepositional phrase Pronoun Pronunciation Proper noun Proverb Punctuation mark Relative Romanization Root Stem Suffix Syllable Symbol Verb Verbal noun`;

const POSHeaderAndTemplate = new RegExp("((?:"	+ POSHeaders.replace(/\n/g, "|")	+ ")\s*=+)[ \t]*\n+([^\n]+)\n+(?=#)");

function normalize(entry) { return entry .replace(/\t/g, " ") .replace(/^([;:#*]+)(?=[^;:#*\s])/gm, "$1 ") .replace(/(?:\n *)*\n==/g, "\n\n==") .replace(/^((?!=)[^\n]*(?:\n(?!=)[^\n]*?)*?)\n+==/, "$1\n==") .replace(/==\n\n+(?!=)/g, "==\n") // Single newline between PoS header and headword template; // two newlines after headword template. .replace(POSHeaderAndTemplate, "$1\n$2\n\n") .replace(/^\n*+\n*|\n++\s*$/, "") .replace(/\n++\s+/g, "\n\n\n\n") .replace(/\n +| +\n/g, "\n") .replace(/(^|\n)(=+) *([^\n]+?) *\2(\n|$)/g, "$1$2$3$2$4"); }

window.normalize = normalize;

const cleanupFunctions = [ // Template for adding new rules {	condition: false, textBoxIncludes: "", button: { text: "" }, minorEdit: false, func: function (content) { const oldContent = content; let count = 0; content = content.replace(				//g,				function(wholeMatch)				{					++count;					return wholeMatch;				}			); CleanupButtons.notifyReplacements(count); CleanupButtons.addSummary(count, ""); CleanupButtons.addSummary(content !== oldContent, ""); return content; } }, {	condition: entryspace, textBoxIncludes: /[\u0080-\u009F]/, button: { text: "C1 controls" }, minorEdit: true, func: function (content) { const oldContent = content; content = content.replace(/\u0096(–|–)|(–|–)\u0096/g, "$1"); content = content.replace(/\u0097(&mdash;|—)|(&mdash;|—)\u0097/g, "$1"); content = content.replace(/\u0092/g, "’"); content = content.replace(/\u0096/g, "–"); content = content.replace(/\u0097/g, "—"); content = content.replace(/[\u0080-\u009F]/g, ""); CleanupButtons.addSummary(				content !== oldContent,				"removed or replaced meaningless characters (C1 controls, U+0080-U+009F)"			); return content; } }, {	condition: function(entry) { return entryspace && normalize(entry) !== entry; },	button: { text: "normalize" }, minorEdit: true, func: function (content) { const oldContent = content; content = normalize(content); CleanupButtons.addSummary(content !== oldContent, "normalize"); return content; } }, {	textBoxIncludes: /\[https?:\/\/en\.wikipedia\.org\/wiki\//, button: { text: "fix Wikipedia links" }, minorEdit: true, func: function (content) { let count = 0; content = content.replace(				/\[https?:\/\/en\.wikipedia\.org\/wiki\/([^ ]+) ([^\]]+)\]/g,				function(wholeMatch, fullPageName, linkText)				{					++count;					fullPageName = fullPageName.replace(/_/g, ' ');					linkText = linkText.replace(/_/g, ' ');					if (fullPageName === linkText)						return ``;					else						return `${linkText}`;				}			); CleanupButtons.notifyReplacements(count); CleanupButtons.addSummary(count, "fixed Wikipedia links"); return content; } }, {	condition: pageName === "Appendix:French_doublets", button: { text: "templatize doublet tables" }, minorEdit: false, func: function(content) { const oldContent = content; // Escape refs, which contain links that shouldn't be modified. const escaped = []; let i = 0; const escape = function (text, regexString) { const regex = new RegExp(regexString, "g"); text = text.replace(					regex,					function(match) {						escaped[i] = match;						let replacement = "%%" + i + "%%";						i += 1;						return replacement;					}				); return text; };			content = escape(content, "]*>[^<]+"); // Rows and cells content = content.replace(/\n\|-/g, "\n"); content = content.replace(/\n! |\n\| | ?\|\| –?/g, "|"); // Link templates content = content.replace(/(\{\{(?:l|m)[^}]+\}\}), ([\w, ]+)/g, "$1 ($2)"); content = content.replace(				/\{\{(?:l|m)\|[a-z-]+\|([^}]+)}}/g,				function (wholeMatch, link) {					if ( link.includes("|") )						return "" + link + "";					else						return link;				}); // Table to template content = content.replace(				/\n\{\|[^|\n]+(.+)\n(.+)/g,				function (wholeMatch, headers, firstLine) {					if ( firstLine === "" )						mw.notify("Malformed table: " + wholeMatch);					const cols = firstLine.replace(/[^|]+/g, "").length;					return "\n{{/table|cols=" + cols + "\n" + headers + "\n" + firstLine;				}); content = content.replace(/\n\|\}/g, "\n}}"); content = content.replace(/\(([^']+)\)/g, "($1)"); // Unescape content = content.replace(				/%%(\d+)%%/g,				function(wholematch, number) {					return escaped[Number(number)];				}); CleanupButtons.addSummary(content !== oldContent, "templatized tables"); return content; } }, {	textBoxIncludes: /{{grc-[^}]+head=/, button: { text: "update grc headword" }, minorEdit: true, func: function (content) { const oldContent = content; content = content.replace(				/\{\{(grc-[^|}]+)([^}]*)\|head=([^|}]+)/g,				"{{$1|$3$2"			); CleanupButtons.addSummary(content !== oldContent, "updated parameters of an Ancient Greek headword template"); return content; } }, {	textBoxIncludes: /'[ἀ-῾]|[ἀ-῾]'/, button: { text: "curly apostrophe in Ancient Greek" }, minorEdit: true, func: function (content) { let count = 0; const letter = "[Α-Ωα-ω]"; // macron, breve, rough breathing, smooth breathing, // diaeresis, acute, grave, circumflex const diacritic = "[\u0304\u0306\u0314\u0313\u0308\u0301\u0300\u0342]"; const apostrophe = "['᾿ʼ᾽]"; // Actually letter plus any sequence of diacritics plus apostrophe // followed by spacing character or end of string. const letterPlusApostrophe = new RegExp ("(" + letter + diacritic + "*)" + apostrophe + "(?=\\s|$)", "g"); // Actually spacing character or pipe plus apostrophe // followed by letter. const apostrophePlusLetter = new RegExp ("(\\s|\\|)" + apostrophe + "(?=" + letter + ")", "g"); const curlyApostrophe = "’"; content = content.replace(				/\{\{Q[^}]+\}\}/g,				function(wholeMatch) {					return wholeMatch.replace( /quote=[^|}]+/g, function (wholeMatch) { return wholeMatch .normalize("NFD") .replace(									letterPlusApostrophe,									function(wholeMatch, letter) {										++count;										return letter + curlyApostrophe;									}) .replace(									apostrophePlusLetter,									function(wholeMatch, before) {										++count;										return before + curlyApostrophe;									}) .normalize("NFC"); });				});			CleanupButtons.notifyReplacements(count); CleanupButtons.addSummary(count, "enforced curly apostrophe"); return content; } }, {	condition: function (content) { return content.includes("==Ancient Greek") && (content.includes('==Noun') || content.includes('==Proper noun')); },	button: { text: "update declension" }, minorEdit: false, func: function (content) { const oldContent = content; content = content.replace(				/Inflection(?===)/g,				"Declension"			); content = content.replace(				/References(?===)/g,				"Further reading"			); let genitive = content.match(/\{\{grc-(?:noun|proper noun)\|([^|]+)/); if (genitive !== null) genitive = genitive[1]; content = content.replace(				/\{\{grc-decl[^|]+(\|[^}]+)\}\}/g,				function (wholeMatch, templateContent) {					const out = [];					templateContent.replace( /\|(?:([^=|]+)=)?([^|]+)/g, function (wholeMatch, key, value) { if (key === "form") out.push(`|${key}=${value}`); });					out.push(`|${pageName}${genitive ? "|" + genitive : ""}`);					return ``;				}); CleanupButtons.addSummary(content !== oldContent, "updated declension table and headers"); return content; } }, {	condition: false, textBoxIncludes: /0x[0-9a-f]+/, button: { text: "uppercase hexadecimal" }, minorEdit: false, func: function (content) { return content.replace(				/0x([0-9a-f]+)/g,				function(wholeMatch, digits) {					return `0x${digits.toUpperCase}`;				}); } }, {	condition: function (content) { if (entryspace && categories.includes("Japanese lemmas")) { const links = content.match(/(?:\[\^\+\]\]|\{\{(?:ja-l|l\|ja)\|[^}]+\}\})/g); if ( links !== null && HaniRegex.test(links.join) ) return true; }		return false; },	button: { text: "auto-add ruby" }, minorEdit: false, func: function (content) { let count1 = 0, count2 = 0; content = content.replace(				/(?:\[\[([^\]]+)\]\]|\{\{(?:ja-l|l\|ja)\|([^}]+)\}\})/g,				function(wholeMatch, entry1, entry2) {					let entry = entry1 || entry2;					if (HaniRegex.test(entry)) {						if (entry.includes("|")) {							let firstTwoParams = entry.match(/^([^|]+)\|([^|]+)/);							if (firstTwoParams) {								++count2;								return "";							} else								return wholeMatch;						}						++count1;						return "r";					}					else						return wholeMatch;				}			); CleanupButtons.notifyReplacements(count1 + count2); CleanupButtons.addSummary(count1, "auto-added ruby using Module:User:Suzukaze-c/02"); CleanupButtons.addSummary(count2, "replaced T:ja-l with T:ja-r"); return content; } }, {	condition: function (content) { const jaReadingsTemplate = content.match(/\{\{ja-readings[^}]+\}\}/); if (jaReadingsTemplate === null) return null; if (jaReadingsTemplate[0].match(/\[\[/)) return true; else return false; },	button: { text: "update " }, minorEdit: false, func: function (content) { const oldContent = content; content = content.replace(				/(?:\*\s+)?\{\{ja-readings/g,				"{{subst:#invoke:User:Suzukaze-c/02|test_13"			); CleanupButtons.addSummary(content !== oldContent, "updated T:ja-readings using Module:User:Suzukaze-c/02"); return content; } }, {	textBoxIncludes: /Ancient Greek (?:(?:proper )?noun|adjective|verb) forms/, button: { text: "cleanup" }, minorEdit: true, func: function (content) { let count = 0; content = content.replace(				/(=== ?((?:Proper )?[Nn]oun|Adjective|Verb) ?===\n)(.+)/g,				function(wholeMatch, header, POS, headword) {					++count;					let head = "";					if ( headword.normalize("NFD").match(/[αιυ]/) )						head = "|head=grc";					POS = POS.toLowerCase;					return header + "";				}); content = content.replace(				/\s*\[\[Category:Ancient Greek (?:(?:proper )?noun|adjective|verb) forms(?:\|[^\]]+)?\]\]\s*/g,				function {					++count;					return "\n\n";				}); content = content.replace(				/(?:# .+)+/g,				function(wholeMatch) {					++count;					return wholeMatch.replace( /(inflection of(?:\|lang=grc)?\|)([^|]+)/g, function(wholeMatch, before, word) { if ( wholeMatch.normalize("NFD").match(/[αιυ]/) ) { word = "grc"; return before + word; } else return wholeMatch; });				});			content = content.replace(				/===Alternative forms===(?:\n.+\n*)+?(?====)/g,				function(wholeMatch) {					wholeMatch = wholeMatch.replace( /\{\{l\|/g, function { ++count; return "{{alter|"; })					.replace( /(\{\{alter\|grc)((?:\|[^|}]+)+)(?=\|\||\})/g, function(wholeMatch, before, terms) { if ( terms.normalize("NFD").match(/[αιυ]/) ) { terms = terms.replace(/\|/g, "!"); terms = "grc"; return before + terms; } else return wholeMatch; });					return wholeMatch;				}); CleanupButtons.notifyReplacements(count); CleanupButtons.addSummary(count, "cleaned up entry linked from User:DTLHS/cleanup/lemma categorization"); return content; } }, {	textBoxIncludes: /Category:(:?Vietnamese|Chinese|vi|zh)/, button: { text: "templatize categories" }, minorEdit: true, func: function (content) { let count = 0; const getLangCode = function(langName) { return { Chinese: "zh", Vietnamese: "vi" }[langName]; };			content = content.replace(				/\[\[Category:(Vietnamese|Chinese) ([^\]|]+)(?:\|[^\]]+)?\]\]/g,				function(wholeMatch, langName, cat) {					++count;					const langCode = getLangCode(langName);					return "";				}) .replace(				/\[\[Category:(vi|zh):([^|\]]+)(?:\|[^\]]+)?\]\]/g,				function(wholeMatch, langCode, cat) {					++count;					return "";				}); const combineCats = function(wholeMatch, template, langCode, cat1, cat2) { ++count; return ""; };			while ( content.match(/\{\{(C|cln)\|(vi|zh)((?:\|[^}\n]+)+)\}\}\s+\{\{\1\|\2((?:\|[^|}\n]+)+)\}\}/) ) { const oldContent = content; content = content.replace(					/\{\{(C|cln)\|(vi|zh)((?:\|[^}\n]+)+)\}\}\s+\{\{\1\|\2((?:\|[^}\n]+)+)\}\}/g,					combineCats				); if ( oldContent === content ) break; }			CleanupButtons.notifyReplacements(count); CleanupButtons.addSummary(count, "templatized categories to generate proper sortkey"); return content; } }, {	textBoxIncludes: /(\/[^\/]+\/|\^\+\])(?=[\s\.\,\:\;\)])/,	button: { text: "add " },	minorEdit: true,	func:		function (content) {			let count = 0;			content = content.replace( /(?:\/[^\/]+\/|\^\+\])(?=[\s\.\,\:\;\)])/g,				function(wholematch) {					++count;					return "";				}); CleanupButtons.notifyReplacements(count); CleanupButtons.addSummary(count, "added T:IPAchar"); return content; } }, {	textBoxIncludes: " {{", button: { text: "add {{temp}}" }, minorEdit: true, func: function (content) { let count = 0; content = content.replace(				/ \{\{((?:[^{}\n]+|\{\{[^}\n]+\}\})+\}\})<\/nowiki>/g,				function(wholeMatch, match1) {					++count;					return "{{temp|" + match1;				}			); CleanupButtons.notifyReplacements(count); CleanupButtons.addSummary(count, "added T:temp"); return content; } }, {	textBoxIncludes: /<\/?tt>/, button: { text: "replace &lt;tt&gt; with &lt;code&gt;" }, minorEdit: true, func: function (content) { let count = 0; content = content.replace(				/<(\/?)tt>/g,				function(wholematch, closing) {					++count;					return `<${closing}code>`;				}			); CleanupButtons.notifyReplacements(count); CleanupButtons.addSummary(count, "replaced deprecated  tag with  with {{para|1}},	and  with .									*/ {	textBoxIncludes: /<(code|tt)\>\|?[^=\n<]+=[^<]*<\/(code|tt)\>/,	button: {		text: "add {{para}}"	},	minorEdit: true,	func: function (content) {		let count1 = 0, count2 = 0, count3 = 0, count4 = 0;		content = content.replace(			/<(?:code|tt)\>\|?([^=\n<]+)=<\/(?:code|tt)\>/g,			function(wholematch, match1) {				++count1;				return `{{para|${match1}}}`;			})		.replace(			/<(?:code|tt)\>\|?([^=\n<]+)=([^<|]+)<\/(?:code|tt)\>/g,			function(wholematch, match1, match2) {				++count1;				return `{{para|${match1}|${match2}}}`;			})		.replace(			/\* *(\{\{para[^}]+\}\}) *- *(.+)/g,			function(wholematch, match1, match2) {				++count2;				return `; ${match1}\n: ${match2}`;			})		.replace(			/<(\/?)tt\>/g,			function(wholematch, match1) {				++count3;				match1 = match1 || "";				return `<${match1}code>`; })		.replace( /\{\{para[^}]+\}\}/g, function (wholematch) { if ( !wholematch.includes("'''") ) return wholematch.replace(						/([^']+)/g,						function(wholematch, match1) {							++count4;							return ` ${match1} `;						}); else return wholematch; });		const totalcount = count1 + count2 + count3 + count4;		CleanupButtons.notifyReplacements(totalcount);		CleanupButtons.addSummary(count1, "added T:para");		CleanupButtons.addSummary(count2, "neater format for parameter definitions");		CleanupButtons.addSummary(count3, "updated deprecated  tag");		return content;	} },

{	textBoxIncludes: /[A-Z]\w+ (\{\{(l|m)\||'*'\[\[)/, button: { text: "add {{cog}}" }, minorEdit: true, func: function (content) { let count = 0; content = content.replace(				/(===\s*Etymology(?: \d)?\s*===)((?:.*\n)+?)(?=\n*===|$)/g,				function(wholematch, header, content) {					content = content.replace( /((?:[A-Z][\w-]* ?)+) (?:\{\{(?:l|m)\|([^|]+)\|([^}]+)\}\}|'*\[\[([^\]]+)\]\]'*)/g, function(wholematch, langName, langCode, parameters, term) { let preceding = ""; const testFunc = (word) => langName.startsWith(word); while ( [ "From", "See", "Consider", "Compare", "Confer", "Note" ].some(testFunc) ) { const match = langName.match(/^(\w+\s+)(.+)$/); if (match === null) { console.log("Failed match in " + langName); return wholematch; }								preceding += match[1], langName = match[2]; }							++count; if ( langCode ) return `${preceding}{{cog|${langCode}|${parameters}}}`; else return `${preceding}{{cog|{{subst:#invoke:languages/templates|getByCanonicalName|${langName}}}|${term}}}`; });					return header + content;				}); CleanupButtons.notifyReplacements(count); CleanupButtons.addSummary(count, "added T:cog"); return content; } }, {	textBoxIncludes: /\|\s*\d+px/, button: { text: "convert pixel to scaling size" }, minorEdit: true, func: function (content) { let count = 0; content = content.replace(				/\[\[(?:[Ff]ile|[Ii]mage)(?:[^\]]+|\[+[^\]]+\]+)+\]\](?=\n)/g,				function(wholematch) {					return wholematch.replace( /\|\s*(\d+)px\s*(?=[|\]])/, function(match, number) { // Convert string to number. number = Number(number); // Convert to upright value. number = number / 220; // Round to nearest hundredth. number = Math.floor( number * 100 ) / 100; ++count; return "|upright=" + number; });				});			CleanupButtons.addSummary(count, "converted image sizes in pixels to scaling values, which are more compatible with different user preferences"); return content; } }, {	condition: function(entry) { return isTemplate && !pageName.endsWith("/documentation") && !entry.includes("{{documentation}}") },	button: { text: "add {{documentation}}" }, minorEdit: true, func: function(content) { content = content.trim + " {{documentation}} "; CleanupButtons.addSummary(true, "add T:documentation"); return content; }, }, ];

const buttons = new CleanupButtons;

for ( const buttonInfo of cleanupFunctions ) if (CleanupButtons.evaluateConditions(buttonInfo.condition, buttonInfo.textBoxIncludes)) buttons.addButton(buttonInfo);

}); // $.when.done } // if

//