User:Ioaxxere/auto-glossary.js

// This gadget works with the Template:auto-glossary template to automatically create a glossary for a particular category via scraping.

if (document.querySelector(".auto-glossary")) { glossaryStyles = document.createElement("style"); glossaryStyles.textContent = ` .mw-parser-output .auto-glossary a[rel~="mw:ExtLink"]:empty::after { content: '[' counter(mw-numbered-ext-link) ']'; counter-increment: mw-numbered-ext-link; }		.mw-parser-output .auto-glossary td:first-child { counter-reset: mw-numbered-ext-link; text-align: center; }		.mw-parser-output .auto-glossary ol { margin: 0 0 0 2em; padding: 0; }`;	document.head.appendChild(glossaryStyles); }

(async => {	for (let autoGlossary of document.querySelectorAll(".auto-glossary")) {		let category = autoGlossary.getAttribute("data-category");		let label = autoGlossary.getAttribute("data-label"); // The label parameter is currently unused.		let lang = autoGlossary.getAttribute("data-language");		let inlineOnly = autoGlossary.getAttribute("data-inline-only");

// Get a list of words in the given category. let actionAPI = new mw.Api({ ajax: { headers: { "Api-User-Agent": "Userscript developed by User:Ioaxxere" } } }); let restAPI = new mw.Rest({ ajax: { headers: { "Api-User-Agent": "Userscript developed by User:Ioaxxere" } } }); let wordlist = []; let continueParam = ""; while (true) { let response = await actionAPI.get({				action: "query",				list: "categorymembers",				cmprop: "title",				cmlimit: "max",				cmtitle: "Category:" + category,				cmcontinue: continueParam			});

response.query.categorymembers.forEach(entry => {				if (entry.ns === 0 || entry.ns === 100 || entry.ns === 118) {					wordlist.push(entry.title);				}			});

if (response.continue) { continueParam = response.continue.cmcontinue; } else { break; }		}		wordlist.sort((a, b) => (/^[A-Za-z]/.test(a) ? /^[A-Za-z]/.test(b) ? a.localeCompare(b, undefined, { sensitivity: "base" }) : -1 : /^[A-Za-z]/.test(b) ? 1 : 0)); // ChatGPTsort

let loadingIndicator = document.createElement("div"); let entriesLoaded = 0; autoGlossary.appendChild(loadingIndicator);

let glossaryTable = document.createElement("table"); glossaryTable.id = "no-orange-links"; glossaryTable.className = "wikitable"; glossaryTable.style = "width: 100%; table-layout: fixed; display: table;"; glossaryTable.innerHTML = " Terms Definitions    ";

let batchSize = 500; for (let batchStart = 0; batchStart < wordlist.length; batchStart += batchSize) { if (batchStart == batchSize) { batchSize = 10e99; // Finish the rest of the list. }			let ongoingRequests = 0; let glossaryQueries = await Promise.all(wordlist.slice(batchStart, batchStart + batchSize).map(async (word) => { // Limit concurrent requests, otherwise the browser might run of out memory. while (ongoingRequests >= 200) await new Promise(resolve => setTimeout(resolve, 10)); ongoingRequests++; let response = await fetch("https://en.wiktionary.org/api/rest_v1/page/html/" + encodeURIComponent(word), {					headers: { "User-Agent": "Userscript developed by User:Ioaxxere." }				}); let responseText = await response.text; ongoingRequests--;

let responseDocument = new DOMParser.parseFromString(responseText, "text/html"); let row = null;

try { let langID = lang.replaceAll(" ", "_"); let L2_section = responseDocument.querySelector("#" + langID).parentNode; glossaryDefs = Array.from(L2_section.querySelectorAll("li")).filter(li => {						// Check if the li element contains an inline category.						if (Array.from(li.querySelectorAll(":scope > link[rel=\"mw:PageProp/Category\"], :scope > span > span > link[rel=\"mw:PageProp/Category\"]")) .some(linkElement => linkElement.getAttribute("href").startsWith("./Category:" + category.replaceAll(" ", "_")))) {							return true;						}						// Check if the headword element contains the category.						let headwordLine = li.parentElement.previousElementSibling;						if (!inlineOnly && headwordLine && headwordLine.matches("p") && Array.from(headwordLine.querySelectorAll("link[rel=\"mw:PageProp/Category\"]")) .some(linkElement => linkElement.getAttribute("href").startsWith("./Category:" + category.replaceAll(" ", "_")))) {							return true;						}					}); // If no definitions found: grab all top-level definitions. if (!inlineOnly && !glossaryDefs.length) { glossaryDefs = L2_section.querySelectorAll(".mw-parser-output section > ol > li"); }					// Insert a row if definitions were found. if (glossaryDefs.length) { row = document.createElement("tr"); let entryCell = row.insertCell(0); let entryLink = document.createElement("a"); entryLink.href = "https://en.wiktionary.org/wiki/" + encodeURIComponent(word) + "#" + langID; entryLink.textContent = word; entryCell.appendChild(entryLink);

let defCell = row.insertCell(1).appendChild(document.createElement("ol")); glossaryDefs.forEach(def => defCell.appendChild(def));

// Convert URLs into absolute URLs. defCell.querySelectorAll("a[href]").forEach(link => {							link.href = new URL(link.getAttribute("href"), "https://en.wiktionary.org/wiki/" + encodeURIComponent(word)).href;						});

// Clean HTML. defCell.querySelectorAll("link, .previewonly, .maintenance-line").forEach(elem => { elem.remove; }); defCell.querySelectorAll("*").forEach(elem => {							Array.from(elem.attributes).forEach(attr => { if (attr.name != "class" && attr.name != "href" && attr.name != "style" && attr.name != "title" && attr.name != "rel") { elem.removeAttribute(attr.name); }							});						});					}				} catch { console.log("auto-glossary.js could not parse: " + word); }

entriesLoaded++; loadingIndicator.textContent = `Loaded entry: ${entriesLoaded}/${wordlist.length}`; return row; }));

if (batchStart == 0) { autoGlossary.appendChild(glossaryTable); }

glossaryQueries.forEach(row => {				if (row) {					for (quote of document.querySelectorAll("ol > li")) {						window.setupHiddenQuotes(quote);					}					glossaryTable.lastChild.appendChild(row); // Append each row into .				}			});

}		// Clear out loading display. Array.from(autoGlossary.childNodes).forEach(elem => {			if (elem != glossaryTable) {				elem.remove;			}		}); } });