Module:User:Victar/headword

local export = {}

local m_data = mw.loadData("Module:headword/data")

local isLemma = m_data.lemmas local isNonLemma = m_data.nonlemmas local notranslit = m_data.notranslit local toBeTagged = m_data.toBeTagged

local parameters = { lang = { type = "object" }, script = { type = "object" }, heads = { type = "table" }, translits = { type = "table" }, transcripts = { type = "table" }, inflections = { type = "table" }, genders = { type = "table" }, categories = { type = "table" }, pos_category = { type = "string" }, sort_key = { type = "string" }, id = { type = "string" }, }

local function test_script(text, script_code) if type(text) == "string" and type(script_code) == "string" then local sc = require("Module:scripts").getByCode(script_code) local characters if sc then characters = sc:getCharacters end local out if characters then text = mw.ustring.gsub(text, "%W", "") out = mw.ustring.find(text, "[" .. characters .. "]") end if out then return true else return false end else mw.log("Parameters to test_script were incorrect.") return nil end end

local function preprocess(data) --[=[	Special:WhatLinksHere/Wiktionary:Tracking/headword/heads-not-table Special:WhatLinksHere/Wiktionary:Tracking/headword/translits-not-table ]=]	if type(data.heads) ~= "table" then if data.heads then require("Module:debug").track("headword/heads-not-table") end data.heads = { data.heads } end if type(data.translits) ~= "table" then if data.translits then require("Module:debug").track("headword/translits-not-table") end data.translits = { data.translits } end if not data.heads or #data.heads == 0 then data.heads = {""} end local title = mw.title.getCurrentTitle -- Determine if term is reconstructed local is_reconstructed = data.lang:getType == "reconstructed" or title.nsText == "Reconstruction" -- Create a default headword. local subpagename = title.subpageText local pagename = title.text local default_head if is_reconstructed then default_head = require("Module:utilities").plain_gsub(pagename, data.lang:getCanonicalName .. "/", "") else default_head = subpagename end -- Add links to multi-word page names when appropriate if data.lang:getCode ~= "zh" then local spacingPunctuation = "([%s%p]+)" -- 	Variable containing anything that is				not a punctuation character found inside of words.				Used to exclude characters from the above regex. local notWordPunc = "([^-־׳״'.·*]+)" local contains_words = false for possibleWordBreak in mw.ustring.gmatch(default_head, spacingPunctuation) do			if mw.ustring.find(possibleWordBreak, notWordPunc) then contains_words = true break end end if (not is_reconstructed) and contains_words then local function workaround_to_exclude_chars(s) return mw.ustring.gsub(s, notWordPunc, "]]%1[[") end default_head = ""				.. mw.ustring.gsub(					default_head,					spacingPunctuation,					workaround_to_exclude_chars					)				.. "" --[=[			use this when workaround is no longer needed: default_head = ""				.. mw.ustring.gsub(default_head, WORDBREAKCHARS, "%1")				.. "" Remove any empty links, which could have been created above at the beginning or end of the string. ]=]			default_head = mw.ustring.gsub(default_head, "%[%[%]%]", "") end end if is_reconstructed then default_head = "*" .. default_head end -- If a head is the empty string "", then replace it with the default for i, head in ipairs(data.heads) do		if head == "" then head = default_head else if head == default_head and data.lang:getCanonicalName == "English" then table.insert(data.categories, data.lang:getCanonicalName .. " terms with redundant head parameter") end end data.heads[i] = head end --	Try to detect the script if it was not provided			We use the first headword for this, and assume			that all of them have the same script			This *should* always be true, right? if not data.sc then data.sc = require("Module:scripts").findBestScript(data.heads[1], data.lang) end -- Make transliterations for i, head in ipairs(data.heads) do		local translit = data.translits[i] -- Try to generate a transliteration if necessary -- Generate it if the script is not Latn or similar, and if no transliteration was provided if translit == "-" then translit = nil elseif not translit and not (				data.sc:getCode:find("Latn", nil, true)				or data.sc:getCode == "Latinx"				or data.sc:getCode == "None"				) and (not data.sc or data.sc:getCode ~= "Imag") then translit = data.lang:transliterate(require("Module:links").remove_links(head), data.sc) -- There is still no transliteration? -- Add the entry to a cleanup category. if not translit and not notranslit[data.lang:getCode] then translit = " transliteration needed " table.insert(data.categories, data.lang:getCanonicalName .. " terms needing transliteration") end end -- Link to the transliteration entry for languages that require this if translit and data.lang:link_tr then translit = require("Module:links").full_link{ term = translit, lang = data.lang, sc = require("Module:scripts").getByCode("Latn"), tr = "-" }		end data.translits[i] = translit end if data.id and type(data.id) ~= "string" then error("The id in the data table should be a string.") end end

-- Format a headword with transliterations local function format_headword(data) local m_links = require("Module:links") local m_scriptutils = require("Module:script utilities") if data.heads and #data.heads and data.lang then require("Module:debug").track{ "headword/heads/" .. #data.heads, "headword/heads/" .. #data.heads .. "/" .. data.lang:getCode }	end -- Are there non-empty transliterations? -- Need to do it this way because translit[1] might be nil while translit[2] is not local has_translits = false -- Format the headwords for i, head in ipairs(data.heads) do		if data.translits[i] then has_translits = true end -- Apply processing to the headword, for formatting links and such if head:find("", nil, true) and (not data.sc or data.sc:getCode ~= "Imag") then			head = m_links.language_link({term = head, lang = data.lang},•" .. translits_formatted			end		end	end	return table.concat(data.heads, " or ") .. translits_formatted end

local function format_genders(data) if data.genders and #data.genders > 0 then local gen = require("Module:gender and number") return " " .. gen.format_list(data.genders, data.lang) else return "" end end

local function format_inflection_parts(data, parts) local m_links = require("Module:links") for key, part in ipairs(parts) do		if type(part) ~= "table" then part = {term = part} end local qualifiers = "" if part.qualifiers and #part.qualifiers > 0 then qualifiers = require("Module:qualifier").format_qualifier(part.qualifiers) .. " "			-- Special:WhatLinksHere/Wiktionary:Tracking/headword/qualifier require("Module:debug").track("headword/qualifier") end local partaccel = part.accel local face = part.hypothetical and "hypothetical" or "bold" local nolink = part.hypothetical or part.nolink -- Convert the term into a full link -- Don't show a transliteration here, the consensus seems to be not to -- show them in headword lines to avoid clutter. part = m_links.full_link(			{				term = not nolink and part.term or nil,				alt = part.alt or (nolink and part.term or nil),				lang = part.lang or data.lang,				sc = part.sc or parts.sc or (not part.lang and data.sc),				id = part.id,				genders = part.genders,				tr = part.translit or (not (parts.enable_auto_translit or data.inflections.enable_auto_translit) and "-" or nil),				accel = parts.accel or partaccel,			},			face,			false			) part = qualifiers .. part parts[key] = part end local parts_output = "" if #parts > 0 then parts_output = " " .. table.concat(parts, " or ") elseif parts.request then parts_output = " [please provide] " .. require("Module:utilities").format_categories(				{data.lang:getCanonicalName .. " entries needing inflection"},				lang,				nil,				nil,				data.force_cat_output,				data.sc				) end return "''" .. parts.label .. "''" .. parts_output end

-- Format the inflections following the headword local function format_inflections(data) if data.inflections and #data.inflections > 0 then -- Format each inflection individually for key, infl in ipairs(data.inflections) do			data.inflections[key] = format_inflection_parts(data, infl) end return " (" .. table.concat(data.inflections, ", ") .. ")" else return "" end end

local function show_headword_line(data) -- Check the namespace against the language type if mw.title.getCurrentTitle.nsText == "" then if data.lang:getType == "reconstructed" then error("Entries for this language must be placed in the Reconstruction: namespace.") elseif data.lang:getType == "appendix-constructed" then error("Entries for this language must be placed in the Appendix: namespace.") end end local tracking_categories = {} local pos_category = data.lang:getCanonicalName .. " " .. data.pos_category if pos_category ~= "Translingual Han characters" then table.insert(data.categories, 1, pos_category) end -- Is it a lemma category? if isLemma[data.pos_category] or isLemma[data.pos_category:gsub("^reconstructed ", "")] then table.insert(data.categories, 1, data.lang:getCanonicalName .. " lemmas") -- Is it a nonlemma category? elseif isNonLemma[data.pos_category] or isNonLemma[data.pos_category:gsub("^reconstructed ", "")] or isLemma[data.pos_category:gsub("^mutated ", "")] or isNonLemma[data.pos_category:gsub("^mutated ", "")] then table.insert(data.categories, 1, data.lang:getCanonicalName .. " non-lemma forms") -- It's neither; we don't know what this category is, so tag it with a tracking category. else --[=[		Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos ]=]		table.insert(tracking_categories, "head tracking/unrecognized pos") require("Module:debug").track{ "headword/unrecognized pos", "headword/unrecognized pos/lang/" .. data.lang:getCode, "headword/unrecognized pos/pos/" .. data.pos_category }	end -- Preprocess preprocess(data) local m_links = require("Module:links") if data.lang:getType ~= "reconstructed" then for _, head in ipairs(data.heads) do			if mw.title.getCurrentTitle.prefixedText ~= m_links.getLinkPage(m_links.remove_links(head), data.lang) then --[=[				Special:WhatLinksHere/Wiktionary:Tracking/headword/pagename spelling mismatch ]=]				require("Module:debug").track{ "headword/pagename spelling mismatch", "headword/pagename spelling mismatch/" .. data.lang:getCode }				break end end end -- Format and return all the gathered information return format_headword(data) .. format_genders(data) .. format_inflections(data) .. require("Module:utilities").format_categories(			tracking_categories, data.lang, data.sort_key, nil, data.force_cat_output, data.sc			) end

function export.full_headword(data) local tracking_categories = {} -- Script-tags the topmost header. local pagename = mw.title.getCurrentTitle.text local fullPagename = mw.title.getCurrentTitle.fullText local namespace = mw.title.getCurrentTitle.nsText if not data.lang or type(data.lang) ~= "table" or not data.lang.getCode then error("In data, the first argument to full_headword, data.lang should be a language object.") end if not data.sc then data.sc = require("Module:scripts").findBestScript(data.heads and data.heads[1] ~= "" and data.heads[1] or pagename, data.lang) else -- Track uses of sc parameter local best = require("Module:scripts").findBestScript(pagename, data.lang) require("Module:debug").track("headword/sc") if data.sc:getCode == best:getCode then require("Module:debug").track("headword/sc/redundant") require("Module:debug").track("headword/sc/redundant/" .. data.sc:getCode) else require("Module:debug").track("headword/sc/needed") require("Module:debug").track("headword/sc/needed/" .. data.sc:getCode) end end local displayTitle -- Assumes that the scripts in "toBeTagged" will never occur in the Reconstruction namespace. if namespace == "" and data.sc and toBeTagged[data.sc:getCode] or			data.sc:getCode == "Jpan" and (test_script(pagename, "Hira") or test_script(pagename, "Kana")) then displayTitle = '' .. pagename .. ' '	elseif namespace == "Reconstruction" then displayTitle, matched = mw.ustring.gsub(			fullPagename,			"^(Reconstruction:[^/]+/)(.+)$",			function(before, term)				return before ..					require("Module:script utilities").tag_text( term, data.lang, data.sc					)			end		) if matched == 0 then displayTitle = nil end end if displayTitle then local frame = mw.getCurrentFrame frame:callParserFunction(			"DISPLAYTITLE",			displayTitle		) end if data.force_cat_output then --[=[		Special:WhatLinksHere/Wiktionary:Tracking/headword/force cat output ]=]		require("Module:debug").track("headword/force cat output") end if data.getCanonicalName then error('The "data" variable supplied to "full_headword" should not be a language object.') end -- Were any categories specified? if data.categories and #data.categories > 0 then local lang_name = require("Module:string").pattern_escape(data.lang:getCanonicalName) for _, cat in ipairs(data.categories) do -- Does the category begin with the language name? If not, tag it with a tracking category. if not mw.ustring.find(cat, "^" .. lang_name) then mw.log(cat, data.lang:getCanonicalName) table.insert(tracking_categories, "head tracking/no lang category") --[=[				Special:WhatLinksHere/Wiktionary:Tracking/head tracking/no lang category ]=]				require("Module:debug").track{ "headword/no lang category", "headword/no lang category/lang/" .. data.lang:getCode }			end end if not data.pos_category and mw.ustring.find(data.categories[1], "^" .. data.lang:getCanonicalName) then data.pos_category = mw.ustring.gsub(data.categories[1], "^" .. data.lang:getCanonicalName .. " ", "") table.remove(data.categories, 1) end end if not data.pos_category then error(			'No valid part-of-speech categories were found in the list '			.. 'of categories passed to the function "full_headword". '			.. 'The part-of-speech category should consist of a language\'s '			.. 'canonical name plus a part of speech.'			) end -- Categorise for unusual characters local standard = data.lang:getStandardCharacters if standard then if mw.ustring.len(mw.title.getCurrentTitle.subpageText) ~= 1 and not mw.ustring.match(mw.title.getCurrentTitle.text, "^Unsupported titles/") then for character in mw.ustring.gmatch(mw.title.getCurrentTitle.subpageText, "([^" .. standard .. "])") do				local upper = mw.ustring.upper(character) if not mw.ustring.find(upper, "[" .. standard .. "]") then character = upper end table.insert(					data.categories,					data.lang:getCanonicalName .. " terms spelled with " .. character				) end end end -- Categorise for palindromes if mw.title.getCurrentTitle.nsText ~= "Reconstruction" and require('Module:palindromes').is_palindrome(			mw.title.getCurrentTitle.subpageText, data.lang, data.sc			) then table.insert(data.categories, data.lang:getCanonicalName .. " palindromes") end return show_headword_line(data) .. require("Module:utilities").format_categories(			data.categories, data.lang, data.sort_key, nil, data.force_cat_output, data.sc			) .. require("Module:utilities").format_categories(			tracking_categories, data.lang, data.sort_key, nil, data.force_cat_output, data.sc			) end

return export