User:Erutuon/scripts/auto-glossary.js

// The script works with the Template:auto-glossary template to automatically create a glossary for a particular category via scraping.

glossaryStyles = document.createElement("style"); glossaryStyles.textContent = ` .mw-parser-output .auto-glossary a[rel~="mw:ExtLink"]:empty::after { content: '[' counter(mw-numbered-ext-link) ']'; counter-increment: mw-numbered-ext-link; }	.mw-parser-output .auto-glossary td:first-child { counter-reset: mw-numbered-ext-link; text-align: center; }	.mw-parser-output .auto-glossary ol { margin: 0 0 0 2em; padding: 0; }`; document.head.appendChild(glossaryStyles);

(async => {	for (const autoGlossary of document.querySelectorAll(".auto-glossary")) {		const category = autoGlossary.getAttribute("data-category");		const label = autoGlossary.getAttribute("data-label"); // The label parameter is currently unused.		const lang = autoGlossary.getAttribute("data-language");		const inlineOnly = autoGlossary.getAttribute("data-inline-only");

// Get a list of words in the given category. const actionAPI = new mw.Api({ ajax: { headers: { "Api-User-Agent": "Userscript developed by User:Ioaxxere" } } }); const restAPI = new mw.Rest({ ajax: { headers: { "Api-User-Agent": "Userscript developed by User:Ioaxxere" } } }); const wordlist = []; const continueParam = ""; while (true) { const response = await actionAPI.get({				action: "query",				list: "categorymembers",				cmprop: "title",				cmlimit: "max",				cmtitle: "Category:" + category,				cmcontinue: continueParam			});

response.query.categorymembers.forEach(entry => {				if (entry.ns === 0 || entry.ns === 100 || entry.ns === 118) {					wordlist.push(entry.title);				}			});

if (response.continue) { continueParam = response.continue.cmcontinue; } else { break; }		}		wordlist.sort((a, b) => (/^[A-Za-z]/.test(a) ? /^[A-Za-z]/.test(b) ? a.localeCompare(b, undefined, { sensitivity: "base" }) : -1 : /^[A-Za-z]/.test(b) ? 1 : 0)); // ChatGPTsort

const loadingIndicator = document.createElement("div"); let entriesLoaded = 0; autoGlossary.appendChild(loadingIndicator);

const glossaryTable = document.createElement("table"); glossaryTable.id = "no-orange-links"; glossaryTable.className = "wikitable"; glossaryTable.style = "width: 100%; table-layout: fixed; display: table;"; glossaryTable.innerHTML = " Terms Definitions    ";

let batchSize = 500; for (let batchStart = 0; batchStart < wordlist.length; batchStart += batchSize) { if (batchStart == batchSize) { batchSize = 10e99; // Finish the rest of the list. }			let ongoingRequests = 0; const glossaryQueries = await Promise.all(wordlist.slice(batchStart, batchStart + batchSize).map(async (word) => { // Limit concurrent requests, otherwise the browser might run of out memory. while (ongoingRequests >= 200) await new Promise(resolve => setTimeout(resolve, 10)); ongoingRequests++; const response = await fetch("https://en.wiktionary.org/api/rest_v1/page/html/" + encodeURIComponent(word), {					headers: { "User-Agent": "Userscript developed by User:Ioaxxere." }				}); const responseText = await response.text; ongoingRequests--;

const responseDocument = new DOMParser.parseFromString(responseText, "text/html"); let row = null;

try { const langID = lang.replaceAll(" ", "_"); const L2_section = responseDocument.getElementById(langID).parentNode; glossaryDefs = Array.from(L2_section.getElementsByTagName("li")).filter(li => {						// Check if the li element contains an inline category.						if (Array.from(li.querySelectorAll(":scope > link[rel=\"mw:PageProp/Category\"], :scope > span > span > link[rel=\"mw:PageProp/Category\"]")) .some(linkElement => linkElement.href.startsWith("./Category:" + category.replaceAll(" ", "_")))) {							return true;						}						// Check if the headword element contains the category.						const headwordLine = li.parentElement.previousElementSibling;						if (!inlineOnly && headwordLine && headwordLine.tagName === "P" && Array.from(headwordLine.querySelectorAll("link[rel=\"mw:PageProp/Category\"]")) .some(linkElement => linkElement.href.startsWith("./Category:" + category.replaceAll(" ", "_")))) {							return true;						}					}); // If no definitions found: grab all top-level definitions. if (!inlineOnly && !glossaryDefs.length) { glossaryDefs = L2_section.querySelectorAll(".mw-parser-output section > ol > li"); }					// Insert a row if definitions were found. if (glossaryDefs.length) { row = document.createElement("tr"); const entryCell = row.insertCell(0); const entryLink = document.createElement("a"); entryLink.href = "https://en.wiktionary.org/wiki/" + encodeURIComponent(word) + "#" + langID; entryLink.textContent = word; entryCell.appendChild(entryLink);

const defCell = row.insertCell(1).appendChild(document.createElement("ol")); glossaryDefs.forEach(def => defCell.appendChild(def));

// Convert URLs into absolute URLs. defCell.querySelectorAll("a[href]").forEach(link => {							link.href = new URL(link.href, "https://en.wiktionary.org/wiki/" + encodeURIComponent(word)).href;						});

// Clean HTML. defCell.querySelectorAll("link, .previewonly, .maintenance-line").forEach(elem => { elem.remove; }); defCell.querySelectorAll("*").forEach(elem => {							Array.from(elem.attributes).forEach(attr => { if (attr.name != "class" && attr.name != "href" && attr.name != "style" && attr.name != "title" && attr.name != "rel") { elem.removeAttribute(attr.name); }							});						});					}				} catch { console.log("auto-glossary.js could not parse: " + word); }

entriesLoaded++; loadingIndicator.textContent = `Loaded entry: ${entriesLoaded}/${wordlist.length}`; return row; }));

if (batchStart == 0) { autoGlossary.appendChild(glossaryTable); }

glossaryQueries.forEach(row => {				if (row) {					glossaryTable.lastChild.appendChild(row); // Append each row into .				}			});

}		// Clear out loading display. Array.from(autoGlossary.childNodes).forEach(elem => {			if (elem != glossaryTable) {				elem.remove;			}		}); }	// Make sure this block executes after all glossaries have been loaded. // Collapse quotes and nyms. TODO: Optimize this so the defaultVisibilityToggles gadget doesn't have to run twice. if (document.querySelector(".auto-glossary")) { mw.config.set({"wgNamespaceNumber": 0}); // To trick the gadget into thinking we're in mainspace. mw.loader.load("/w/index.php?title=MediaWiki:Gadget-defaultVisibilityToggles.js&action=raw&ctype=text/javascript"); } });