Module:category tree/poscatboiler/data/affixes and compounds

local labels = {} local raw_categories = {} local handlers = {}

- --                                                                        -- --                                  LABELS                                 -- --                                                                        -- -

labels["alliterative compounds"] = { description = " noun phrases composed of two or more stems that alliterate.", parents = {"compound terms", "alliterative phrases"}, }

labels["antonymous compounds"] = { description = " compounds in which one part is an antonym of the other.", parents = {"dvandva compounds", sort = "antonym"}, }

labels["bahuvrihi compounds"] = { description = " compounds in which the first part (A) modifies the second (B), and whose meaning follows a metonymic pattern: “ having a B that is A.”", parents = {"compound terms", "exocentric compounds"}, }

-- Add "compound POS" categories for various parts of speech.

local compound_poses = { "adjectives", "adverbs", "conjunctions", "determiners", "interjections", "nouns", "numerals", "particles", "postpositions", "prefixes", "prepositions", "pronouns", "proper nouns", "suffixes", "verbs", }

for _, pos in ipairs(compound_poses) do labels["compound " .. pos] = { description = " " .. pos .. " composed of two or more stems.", parents = {{name = "compound terms", sort = " "}, pos}, } end

labels["compound determinatives"] = { description = " determinatives composed of two or more stems.", parents = {"compound terms", "determiners"}, }

labels["compound terms"] = { description = " terms composed of two or more stems.", umbrella_parents = "Terms by etymology subcategories by language", parents = {"terms by etymology"}, }

labels["dvandva compounds"] = { description = " terms composed of two or more stems whose stems could be connected by an 'and'.", parents = {"compound terms"}, }

labels["dvigu compounds"] = { description = " tatpuruṣa compounds where the modifying member is a number", parents = {"tatpurusa compounds"}, }

labels["endocentric compounds"] = { description = " terms composed of two or more stems, one of which is the head of that compound.", parents = {"compound terms"}, }

labels["endocentric noun-noun compounds"] = { description = " terms composed of two or more stems, one of which is the head of that compound.", breadcrumb = "noun-noun", parents = {"endocentric compounds", "compound terms"}, }

labels["endocentric verb-noun compounds"] = { description = " compounds in which the first element is a verbal stem, the second a nominal stem and the head of the compound.", breadcrumb = "verb-noun", parents = {"endocentric compounds", "verb-noun compounds"}, }

labels["exocentric compounds"] = { description = " terms composed of two or more stems, none of which is the head of that compound.", parents = {"compound terms"}, }

labels["exocentric verb-noun compounds"] = { description = " compounds in which the first element is a transitive verb, the second a noun functioning as its direct object, and whose referent is the person or thing doing the action.", breadcrumb = "verb-noun", parents = {"exocentric compounds", "verb-noun compounds"}, }

labels["karmadharaya compounds"] = { description = " terms composed of two or more stems in which the main stem determines the case endings.", parents = {"tatpurusa compounds"}, }

labels["itaretara dvandva compounds"] = { description = " terms composed of two or more stems whose stems could be connected by an 'and'.", breadcrumb = "itaretara", parents = {"dvandva compounds"}, }

labels["rhyming compounds"] = { description = " noun phrases composed of two or more stems that rhyme.", parents = {"compound terms", "rhyming phrases"}, }

labels["samahara dvandva compounds"] = { description = " terms composed of two or more stems whose stems could be connected by an 'and'.", breadcrumb = "samahara", parents = {"dvandva compounds"}, }

labels["shitgibbons"] = { description = " terms that consist of a single-syllable expletive followed by a two-syllable trochee that serves as a nominalizer or intensifier.", parents = {"endocentric compounds"}, }

labels["synonymous compounds"] = { description = " compounds in which one part is a synonym of the other.", parents = {"dvandva compounds", sort = "synonym"}, }

labels["tatpurusa compounds"] = { description = " terms composed of two or more stems", parents = {"compound terms"}, }

labels["verb-noun compounds"] = { description = " compounds in which the first element is a transitive verb, the second a noun functioning as its direct object, and whose referent is the person or thing doing the action, or an adjective describing such a person or thing.", parents = {"verb-object compounds"}, }

labels["verb-object compounds"] = { description = " compounds in which the first element is a transitive verb, the second a term (usually but not always a noun) functioning as its (normally direct) object, and whose referent is the person or thing doing the action, or an adjective describing such a person or thing.", additional = "Examples in English are and .", parents = {"compound terms"}, }

labels["verb-verb compounds"] = { description = " compounds composed of two or more verbs in apposition, often either synonyms or antonyms, and whose referent refers to the result of performing those actions.", parents = {"compound terms"}, }

labels["vrddhi derivatives"] = { description = " terms derived from a Proto-Indo-European root by the process of vṛddhi derivation.", parents = {"terms by etymology"}, }

labels["vrddhi gerundives"] = { description = " gerundives derived from a Proto-Indo-European root by the process of vṛddhi derivation.", parents = {"vrddhi derivatives"}, }

labels["vyadhikarana compounds"] = { description = " terms composed of two or more stems in which the non-main stem determines the case endings.", parents = {"tatpurusa compounds"}, }

for _, fixtype in ipairs({"circumfix", "infix", "interfix", "prefix", "suffix",}) do labels["terms by " .. fixtype] = { description = " terms categorized by their " .. fixtype .. "es.", umbrella_parents = "Terms by etymology subcategories by language", parents = {{name = "terms by etymology", sort = fixtype}, fixtype .. "es"}, } end

-- Add 'umbrella_parents' key if not already present. for key, data in pairs(labels) do -- NOTE: umbrella.parents overrides umbrella_parents if both are given. if not data.umbrella_parents then data.umbrella_parents = "Types of compound terms by language" end end

- --                                                                        -- --                              RAW CATEGORIES                             -- --                                                                        -- -

raw_categories["Types of compound terms by language"] = { description = "Umbrella categories covering topics related to types of compound terms.", additional = "", parents = { "Umbrella metacategories", {name = "compound terms", is_label = true, sort = " "}, {name = "Terms by etymology subcategories by language", sort = " "}, }, }

- --                                                                        -- --                                 HANDLERS                                -- --                                                                        -- -

- -- Affix handlers --- -

table.insert(handlers, function(data)	local labelpref, pos, affixtype, term_and_id = data.label:match("^(([a-z -]+) ([a-z]+fix)ed with )(.+)$")	if affixtype then		local term, id = term_and_id:match("^(.+) %(([^]+)%)$")		term = term or term_and_id

-- Convert term/alt into affixes if needed local desc = { ["prefix"]		= "beginning with the prefix", ["suffix"]		= "ending with the suffix", ["circumfix"]	= "bookended with the circumfix", ["infix"]		= "spliced with the infix", ["interfix"]	= "joined with the interfix", -- Transfixes not supported currently. -- ["transfix"]	= "patterned with the transfix", }		if not desc[affixtype] then return nil end

-- Here, {LANG} is replaced with the actual language, {TERM_AND_ID} with the actual term (or with 'TERM'		-- if there is an ID), {BASE} with ' base ', {BASE2} with ' base2 ', {BASE_EXPL} with an		-- explanation of what "base" means, {BASE_BASE2_EXPL} with an explanation of what "base" and "base2" mean, and -- {POS} with '|pos=POS' if there is a `pos` other than "terms", otherwise a blank string. local what_categorizes = { ["prefix"] = "{LANG} or {LANG} (or the more specific and less-preferred equivalents pre or prefix), where {BASE_EXPL}", ["suffix"] = "{LANG} or {LANG} (or the more specific and less-preferred equivalents suf or suffix), where {BASE_EXPL}", ["circumfix"] = "{LANG} or {LANG}, where {BASE_EXPL}", ["infix"] = "{LANG}, where {BASE_EXPL}", ["interfix"] = "{LANG} or {LANG}, where {BASE_BASE2_EXPL}", }

local params = { ["alt"] = {}, ["sc"] = {}, ["sort"] = {}, ["tr"] = {}, ["ts"] = {}, }		local args = require("Module:parameters").process(data.args, params, nil, "category tree/poscatboiler/data/terms by etymology") local sc = data.sc or args.sc and require("Module:scripts").getByCode(args.sc, "sc") or nil local m_affix = require("Module:affix") -- Call make_affix to add display hyphens if they're not already present. local _, display_term, lookup_term = m_affix.make_affix(term, data.lang, sc, affixtype, nil, true) local _, display_alt = m_affix.make_affix(args.alt, data.lang, sc, affixtype) local _, display_tr = m_affix.make_affix(args.tr, data.lang, require("Module:scripts").getByCode("Latn"), affixtype) local _, display_ts = m_affix.make_affix(args.ts, data.lang, require("Module:scripts").getByCode("Latn"), affixtype) local m_script_utilities = require("Module:script utilities") local id_text = id and " (" .. id .. ")" or ""

-- Compute parents. local parents = {} if id then if pos == "words" then -- don't allow formerly-named categories with "words" return nil end if pos == "terms" then table.insert(parents, {name = labelpref .. term, sort = id, args = args}) else table.insert(parents, {name = "terms " .. affixtype .. "ed with " .. term_and_id, sort = id .. ", " .. pos, args = args}) table.insert(parents, {name = labelpref .. term, sort = id, args = args}) end elseif pos == "words" then -- don't allow formerly-named categories with "words" return nil elseif pos ~= "terms" then table.insert(parents, {name = "terms " .. affixtype .. "ed with " .. term, sort = pos, args = args}) end table.insert(parents, {name = "terms by " .. affixtype, sort = (data.lang:makeSortKey((data.lang:makeEntryName(args.sort or term))))})

-- If other affixes are mapped to this one, show them. local additional

if data.lang then local langcode = data.lang:getCode if m_affix.langs_with_lang_specific_data[langcode] then local langdata = mw.loadData(m_affix.affix_lang_data_module_prefix .. langcode) local variants = {} if langdata.affix_mappings then for variant, canonical in pairs(langdata.affix_mappings) do						-- Above, we converted the stripped link term as we received it to the lookup form, so we -- can look up the variants that are mapped to this term. Once we find them, map them to -- display form. local is_variant = false if type(canonical) == "table" then for _, canonical_v in pairs(canonical) do								if canonical_v == lookup_term then is_variant = true break end end else is_variant = canonical == lookup_term end if is_variant then local _, display_variant = m_affix.make_affix(variant, data.lang, sc, affixtype) table.insert(variants, "") end end if #variants > 0 then table.sort(variants) additional = ("This category also includes terms %sed with %s."):format(affixtype,							require("Module:table").serialCommaJoin(variants)) end end end end

if data.lang then local what_categorizes_msg = what_categorizes[affixtype] if not what_categorizes_msg then error(("Internal error: No what_categorizes value for affixtype '%s' for label '%s', lang '%s'"):					format(affixtype, data.label, data.lang:getCode)) end what_categorizes_msg = "Terms are placed in this category using " .. (what_categorizes_msg				:gsub("{LANG}", data.lang:getCode)				:gsub("{TERM_AND_ID}", require("Module:string utilities").replacement_escape(					id and ("%s"):format(term, id) or term))				:gsub("{POS}", require("Module:string utilities").replacement_escape(					pos == "terms" and "" or ("|pos=%s"):format(pos)))				:gsub("{BASE}", " base ")				:gsub("{BASE2}", " base2 ")				:gsub("{BASE_EXPL}", " is the base lemma from which this term is derived")				:gsub("{BASE_BASE2_EXPL}", "  and   are the " .. "base lemmas from which this term is derived")			) .. "."			if additional then additional = additional .. "\n\n" .. what_categorizes_msg else additional = what_categorizes_msg end end

return { description = " " .. pos .. " " .. desc[affixtype] .. " " .. require("Module:links").full_link({				lang = data.lang, term = display_term, alt = display_alt, sc = sc, id = id, tr = display_tr, ts = display_ts}, "term") .. ".",			additional = additional, breadcrumb = pos == "terms" and m_script_utilities.tag_text(display_alt or display_term, data.lang, sc, "term") .. id_text or pos, displaytitle = " " .. labelpref .. m_script_utilities.tag_text(term, data.lang, sc, "term") .. id_text, parents = parents, umbrella = false, }, true -- true = args handled end end)

return {LABELS = labels, RAW_CATEGORIES = raw_categories, HANDLERS = handlers}