Module:etymology/templates/descendant

local concat = table.concat local insert = table.insert local listToSet = require("Module:table/listToSet") local rsplit = mw.text.split

local put_module = "Module:parse utilities" local labels_module = "Module:labels" local languages_module = "Module:languages" local scripts_module = "Module:scripts"

local export = {}

local error_on_no_descendants = false

local function qualifier(content) if content then return ' ( ' .. content .. ' ) ' end end

local function track(page) return require("Module:debug/track")("descendant/" .. page) end

local function ine(arg) if arg == "" then return nil else return arg end end

local function add_tooltip(text, tooltip) return ' ' .. text .. ' ' end

local function split_on_comma(term) if term:find(",%s") then return require(put_module).split_on_comma(term) else return rsplit(term, ",") end end

-- Params that modify a descendant term (as also supported by, ). Doesn't include gloss=, which we -- handle specially. local param_term_mods = {"alt", "g", "id", "lit", "pos", "t", "tr", "ts"} local param_term_mod_set = listToSet(param_term_mods) -- Boolean params indicating whether a descendant term (or all terms) are particular sorts of borrowings. local bortypes = {"inh", "bor", "lbor", "slb", "obor", "translit", "der", "clq", "pclq", "sml", "unc"} local bortype_set = listToSet(bortypes) -- Aliases of clq=. local calque_aliases = {"cal", "calq", "calque"} local calque_alias_set = listToSet(calque_aliases) -- Aliases of pclq=. local partial_calque_aliases = {"pcal", "pcalq", "pcalque"} local partial_calque_alias_set = listToSet(partial_calque_aliases) -- Miscellaneous list params. local misc_list_params = {"q", "qq", "lb"} local misc_list_param_set = listToSet(misc_list_params)

-- Add a "regular" list param such as g=, gloss=, lit=, etc. "Regular" here means that `param` and `param1` are -- the same thing. `type` if given is the param type (e.g. "boolean") and `alias_of` is used for params that are -- aliases of other params. local function add_regular_list_param(params, param, type, alias_of) params[param] = {type = type, alias_of = alias_of, list = true, allow_holes = true} end

-- Add an index-separated list param such as bor=, calq=, qq=, etc. "Index-separated" means that `param` and -- `param1` are different. Non-numbered `param` is accessible as `args.param` while numbered `param1`, `param2`, -- etc. are accessible as `args.partparam[1]`, `args.partparam[2]`, etc. `type` if given is the param type (e.g. -- "boolean") and `alias_of` is used for params that are aliases of other params. local function add_index_separated_list_param(params, param, type, alias_of) params[param] = {alias_of = alias_of, type = type} params["part" .. param] = {alias_of = alias_of and "part" .. alias_of or nil, type = type, list = param, allow_holes = true, require_index = true} end

-- Convert a raw lb= param (or nil) to a list of label info objects of the format described in get_label_info in -- Module:labels). Unrecognized labels will end up with an unchanged display form. Return nil if nil passed in. local function split_and_process_raw_labels(raw_lb, lang)	if not raw_lb then		return nil	end	return require(labels_module).split_and_process_raw_labels { labels = raw_lb, lang = lang, nocat = true } end

-- Return a function of one argument `arg` (a param name), which fetches args[`arg`] if index == 0, else -- args["part" .. `arg`][index]. local function get_val(args, index) return function(arg) if index == 0 then return args[arg] else return args["part" .. arg][index] end end end

-- Return the arrow text for the `index`th term, or the overall arrow text if index == 0. local function get_arrow(args, index) local val = get_val(args, index) local arrow

if val("bor") then arrow = add_tooltip("→", "borrowed") elseif val("lbor") then arrow = add_tooltip("→", "learned borrowing") elseif val("slb") then arrow = add_tooltip("→", "semi-learned borrowing") elseif val("obor") then arrow = add_tooltip("→", "orthographic borrowing") elseif args.translit then arrow = add_tooltip("→", "transliteration") elseif val("clq") then arrow = add_tooltip("→", "calque") elseif val("pclq") then arrow = add_tooltip("→", "partial calque") elseif val("sml") then arrow = add_tooltip("→", "semantic loan") elseif val("inh") or (val("unc") and not val("der")) then arrow = add_tooltip(">", "inherited") else arrow = "" end -- allow der=1 in conjunction with bor=1 to indicate e.g. English "pars recta" -- derived and borrowed from Latin "pars". if val("der") then arrow = arrow .. add_tooltip("⇒", "reshaped by analogy or addition of morphemes") end

if val("unc") then arrow = arrow .. add_tooltip("?", "uncertain") end

if arrow ~= "" then arrow = arrow .. " "	end

return arrow end

-- Return the pre-qualifier text for the `index`th term, or the overall pre-qualifier text if index == 0. local function get_pre_qualifiers(args, index, lang) local val = get_val(args, index) local quals

if index > 0 then local labels = split_and_process_raw_labels(val("lb"), lang) if labels then labels = require(labels_module).format_processed_labels { labels = labels, lang = lang, no_ib_content = true }			if labels ~= "" then -- not sure labels can be an empty string but it seems possible in some circumstances quals = {labels} end end end if val("q") then quals = quals or {} insert(quals, val("q")) end if quals then return require("Module:qualifier").format_qualifier(quals) .. " "	else return "" end end

-- Return the post-qualifier text for the `index`th term, or the overall post-qualifier text if index == 0. local function get_post_qualifiers(args, index, lang) local val = get_val(args, index) local postqs = {}

if val("inh") then insert(postqs, qualifier("inherited")) end if val("lbor") then insert(postqs, qualifier("learned")) end if val("slb") then insert(postqs, qualifier("semi-learned")) end if val("translit") then insert(postqs, qualifier("transliteration")) end if val("clq") then insert(postqs, qualifier("calque")) end if val("pclq") then insert(postqs, qualifier("partial calque")) end if val("sml") then insert(postqs, qualifier("semantic loan")) end if val("qq") then insert(postqs, require("Module:qualifier").format_qualifier(val("qq"))) end if index == 0 then local labels = split_and_process_raw_labels(val("lb"), lang) if labels then labels = require(labels_module).format_processed_labels { labels = labels, lang = lang }			if labels ~= "" then insert(postqs, "&mdash; " .. labels) end end end if #postqs > 0 then return " " .. concat(postqs, " ") else return "" end end

local function desc_or_desc_tree(frame, desc_tree) local params local boolean = {type = "boolean"} if desc_tree then params = { [1] = {required = true, type = "language", family = true, default = "gem-pro"}, [2] = {required = true, list = true, allow_holes = true, default = "*fuhsaz"}, ["notext"] = boolean, ["noalts"] = boolean, ["noparent"] = boolean, }	else params = { [1] = {required = true, type = "language", family = true, default = "en"}, [2] = {list = true, allow_holes = true}, ["alts"] = boolean }		-- If template namespace. if mw.title.getCurrentTitle.namespace == 10 then params[2].default = "word" end end for _, term_mod in ipairs(param_term_mods) do		add_regular_list_param(params, term_mod) end -- Handle gloss= specially because it's an alias. add_regular_list_param(params, "gloss", nil, "t") -- Handle sc= specially because the type is "script". add_regular_list_param(params, "sc", "script") for _, bortype in ipairs(bortypes) do		add_index_separated_list_param(params, bortype, "boolean") end for _, calque_alias in ipairs(calque_aliases) do		add_index_separated_list_param(params, calque_alias, "boolean", "clq") end for _, partial_calque_alias in ipairs(partial_calque_aliases) do		add_index_separated_list_param(params, partial_calque_alias, "boolean", "pclq") end for _, misc_list_param in ipairs(misc_list_params) do		add_index_separated_list_param(params, misc_list_param) end

-- Add other single params. params.sclang = boolean params.sclb = {type = "boolean", alias_of = "sclang"} params.nolang = boolean params.nolb = {type = "boolean", alias_of = "nolang"}

local namespace = mw.title.getCurrentTitle.nsText

local parent_args if frame.args[1] then parent_args = frame.args else parent_args = frame:getParent.args end

-- FIXME: Temporary error message. for arg, _ in pairs(parent_args) do		if type(arg) == "string" and arg:find("^tag[0-9]*$") then local lbarg = arg:gsub("^tag", "lb") error(("Use %s= instead of %s="):format(lbarg, arg)) end end

-- Error to catch most uses of old-style parameters. if ine(parent_args[4]) and not ine(parent_args[3]) and not ine(parent_args.tr2) and not ine(parent_args.ts2) and not ine(parent_args.t2) and not ine(parent_args.gloss2) and not ine(parent_args.g2) and not ine(parent_args.alt2) then error("You specified a term in 4= and not one in 3=. You probably meant to use t= to specify a gloss instead. "			.. "If you intended to specify two terms, put the second term in 3=.") end if not ine(parent_args[3]) and not ine(parent_args.alt2) and not ine(parent_args.tr2) and not ine(parent_args.ts2) and ine(parent_args.g2) then error("You specified a gender in g2= but no term in 3=. You were probably trying to specify two genders for "			.. "a single term. To do that, put both genders in g=, comma-separated.") end

local args = require("Module:parameters").process(parent_args, params)

local lang = args[1] local terms = args[2] local alts = args.alt if (namespace == "" or namespace == "Reconstruction") and (lang:hasType("appendix-constructed") and not lang:hasType("regular")) then error("Terms in appendix-only constructed languages may not be given as descendants.") end

local m_desctree if desc_tree or alts then m_desctree = require("Module:descendants tree") end if lang:getCode ~= lang:getFullCode then -- Special:WhatLinksHere/Wiktionary:Tracking/descendant/etymological track("etymological") track("etymological/" .. lang:getCode) end

local is_family = lang:hasType("family") local proxy_lang if is_family then -- Special:WhatLinksHere/Wiktionary:Tracking/descendant/family track("family") track("family/" .. lang:getCode) proxy_lang = require(languages_module).getByCode("und") else proxy_lang = lang end

local languageName if is_family then -- The display form for families includes the word "languages", which we probably don't want to -- display. languageName = lang:getCanonicalName else languageName = lang:getDisplayForm end local langtag if args.sclang then local sc = args.sc[1] if sc then langtag = sc:getDisplayForm else local term, alt = terms[1], alts[1] local best_sc if is_family then best_sc = require(scripts_module).findBestScriptWithoutLang(term or alt, "none is last resort") else best_sc = lang:findBestScript(term or alt) end langtag = best_sc:getDisplayForm end else langtag = languageName end -- Find the maximum index among any of the list parameters. local maxmaxindex = terms.maxindex for k, v in pairs(args) do		if type(v) == "table" and v.maxindex and v.maxindex > maxmaxindex then maxmaxindex = v.maxindex end end local parts = {} local descendants = {} local saw_descendants = false local seen_terms = {} local put local use_semicolon = false

local ind = 0 for i = 1, maxmaxindex do		local term = terms[i] if term ~= ";" then ind = ind + 1 local alt = args.alt[ind] local id = args.id[ind] local sc = args.sc[ind] local tr = args.tr[ind] local ts = args.ts[ind] local gloss = args.t[ind] local pos = args.pos[ind] local lit = args.lit[ind] local g = args.g[ind] and rsplit(args.g[ind], "%s*,%s*") or {} local link

local termobj =	{ lang = proxy_lang, }			-- Initialize `termobj` with indexed modifier params such as t1, t2, etc. and alt1, alt2, etc. Inline -- modifiers specified using the <...> notation override these. local function reinit_termobj(term) termobj.term = term termobj.sc = sc				termobj.track_sc = true termobj.term = term termobj.alt = alt termobj.id = id				termobj.tr = tr				termobj.ts = ts				termobj.genders = g				termobj.gloss = gloss termobj.pos = pos termobj.lit = lit end -- Construct a link out of `termobj`. local function get_link local link = "" -- If an individual term has a literal comma in it, use semicolons for all joiners. Otherwise we use -- semicolon only if the user specified a literal semicolon as a term. if termobj.term and termobj.term:find(",") then use_semicolon = true end if termobj.term ~= "-" then -- including term == nil link = require("Module:links").full_link(termobj, nil, true) elseif termobj.ts or termobj.gloss or #termobj.genders > 0 then -- Special:WhatLinksHere/Wiktionary:Tracking/descendant/no term track("no term") termobj.term = nil link = require("Module:links").full_link(termobj, nil, true) link = link :gsub(" %[Term%?%] ", "") :gsub(" %[Term%?%] ", "") :gsub("%[%[Category:[^%[%]]+ term requests%]%]", "") else -- display no link at all -- Special:WhatLinksHere/Wiktionary:Tracking/descendant/no term or annotations track("no term or annotations") end return link end

-- Check for new-style argument, e.g. מרים. But exclude HTML entry with, , -- or similar in it, caused by wrapping an argument in, or similar. Basically, -- all tags of the sort we parse here should consist of less-than + letters + greater-than, e.g., or -- less-than + letters + colon + arbitrary text with balanced angle brackets + greater-than, e.g. , -- so if we see a tag on the outer level that isn't in this format, we don't try to parse it. The -- restriction to the outer level is to allow generated HTML inside of e.g. qualifier tags, such as -- foo. --			-- FIXME! The last clause in the if-statement below checks for a situation like -- . The top-level check in the preceding clause is simplistic and works -- only up through the, which "passes" the restriction and thus we go ahead and parse for inline -- modifiers, which fails due to the. So we include the last clause, which allows a single balanced -- <...> expression preceding the non-modifier-tag-looking HTML. This is a hack and won't catch all -- top-level uses of HTML. We could write a single regular expression to do this if it were not for Lua's -- crippled patterns. As-is, we need (and should write) a function in Module:parse utilities to check -- this properly. if term and term:find("<") and not term:find("^[^<]*<%l*[^%l:>]") and not term:find("^[^<>]*%b<>[^<>]*<%l*[^%l:>]") then if not put then put = require(put_module) end local run = put.parse_balanced_segment_run(term, "<", ">") -- Split the non-modifier parts of an alternating run on comma, but not on comma+whitespace. local comma_separated_runs = put.split_alternating_runs_on_comma(run) local sub_links = {}

local function parse_err(msg) local parts = {} for _, run in ipairs(comma_separated_runs) do						insert(parts, concat(run)) end error(msg .. ": " .. (i + 1) .. "=" .. concat(parts, ",")) end for j, run in ipairs(comma_separated_runs) do					reinit_termobj(run[1]) local seen_mods = {} for k = 2, #run - 1, 2 do						if run[k + 1] ~= "" then parse_err("Extraneous text '" .. run[k + 1] .. "' after modifier") end local modtext = run[k]:match("^<(.*)>$") if not modtext then parse_err("Internal error: Modifier '" .. modtext .. "' isn't surrounded by angle brackets") end local prefix, arg = modtext:match("^(%l+):(.*)$") if prefix then if seen_mods[prefix] then parse_err("Modifier '" .. prefix .. "' occurs twice, second occurrence " .. run[k]) end seen_mods[prefix] = true if prefix == "t" or prefix == "gloss" then termobj.gloss = arg elseif prefix == "g" then termobj.genders = rsplit(arg, "%s*,%s*") elseif prefix == "sc" then termobj.sc = arg elseif param_term_mod_set[prefix] then termobj[prefix] = arg elseif misc_list_param_set[prefix] then if j < #comma_separated_runs then parse_err("Modifier " .. run[k] .. " should come after the last term") end args["part" .. prefix][ind] = arg elseif prefix == "tag" then -- FIXME: Remove support for  in favor of  error("Use  instead of ") else parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. run[k]) end elseif j < #comma_separated_runs then parse_err("Modifier " .. run[k] .. " should come after the last term") else if seen_mods[modtext] then parse_err("Modifier '" .. modtext .. "' occurs twice") end seen_mods[modtext] = true if bortype_set[modtext] then args["part" .. modtext][ind] = true elseif calque_alias_set[modtext] then args.partclq[ind] = true elseif partial_calque_alias_set[modtext] then args.partpclq[ind] = true else parse_err("Unrecognized modifier '" .. modtext .. "'") end end end local sub_link = get_link if sub_link ~= "" then insert(sub_links, sub_link) end end link = concat(sub_links, "/") elseif term and term:find(",") then local sub_terms = split_on_comma(term) local sub_links = {} for _, sub_term in ipairs(sub_terms) do					reinit_termobj(sub_term) local sub_link = get_link if sub_link ~= "" then insert(sub_links, sub_link) end end link = concat(sub_links, "/") else reinit_termobj(term) link = get_link end

local arrow = get_arrow(args, ind) local preqs = get_pre_qualifiers(args, ind, proxy_lang) local postqs = get_post_qualifiers(args, ind, proxy_lang) local alts

if desc_tree and term and term ~= "-" then if is_family then error("No support currently (and probably ever) for fetching a descendant tree when a family code instead of language code is given") end insert(seen_terms, term) -- This is what I (User:Benwing2) had in Nov 2020 when I first implemented this. -- Since then, User:Fytcha added `true` as the fourth param. -- descendants[ind] = m_desctree.getDescendants(entryLang, term, id, maxmaxindex > 1) descendants[ind] = m_desctree.getDescendants(lang, sc, term, id, true) if descendants[ind] then saw_descendants = true end end

descendants[ind] = descendants[ind] or ""

if term and (desc_tree and not args.noalts or not desc_tree and args.alts) then if is_family then error("No support currently (and probably ever) for fetching alternative forms when a family code instead of language code is given") end -- Special:WhatLinksHere/Wiktionary:Tracking/descendant/alts track("alts") alts = m_desctree.getAlternativeForms(lang, sc, term, id) else alts = "" end

local linktext = concat{preqs, link, alts, postqs} if not args.notext then linktext = arrow .. linktext end if linktext ~= "" then if i > 1 then insert(parts, terms[i - 1] == ";" and "; " or ", ") end insert(parts, linktext) end end end

if error_on_no_descendants and desc_tree and not saw_descendants then if #seen_terms == 0 then error("Template:desctree invoked but no terms to retrieve descendants from") elseif #seen_terms == 1 then error("No Descendants section was found in the entry " .. seen_terms[1] ..				" under the header for " .. lang:getFullName .. ".") else for i, term in ipairs(seen_terms) do seen_terms[i] = "" .. term .. "" end error("No Descendants section was found in any of the entries " ..				concat(seen_terms, ", ") .. " under the header for " .. lang:getFullName .. ".") end end

descendants = concat(descendants) if args.noparent then return descendants end

local initial_arrow = get_arrow(args, 0) local initial_preqs = get_pre_qualifiers(args, 0, proxy_lang) local final_postqs = get_post_qualifiers(args, 0, proxy_lang)

if use_semicolon then for i = 2, #parts - 1, 2 do			parts[i] = ";" end end

local all_linktext = initial_preqs .. concat(parts) .. final_postqs .. descendants

if args.notext then return all_linktext elseif args.nolang then return initial_arrow .. all_linktext else return concat{initial_arrow, langtag, ":", all_linktext ~= "" and " " or "", all_linktext} end end

function export.descendant(frame) return desc_or_desc_tree(frame, false) .. require("Module:TemplateStyles")("Module:etymology/style.css") end

function export.descendants_tree(frame) return desc_or_desc_tree(frame, true) end

return export