Module:User:Erutuon/grc-headword/sandbox

local export = {}

local m_grc_utils = require("Module:grc-utilities") local tokenize = m_grc_utils.tokenize local find_ambig = m_grc_utils.findAmbig

local full_headword = require("Module:headword").full_headword local get_accent_term = require("Module:grc-accent").get_accent_term local m_table = require("Module:table") local serial_comma_join = m_table.serialCommaJoin local list_to_set = m_table.listToSet

local lang = require("Module:languages").getByCode("grc") local canonical_name = lang:getCanonicalName local sc = require("Module:scripts").getByCode("polytonic")

local NAMESPACE = mw.title.getCurrentTitle.nsText local PAGENAME = mw.title.getCurrentTitle.text local MAINSPACE = NAMESPACE == ""

local reconstructed_prefix = NAMESPACE == "Reconstruction" and "reconstructed " or ""

-- only for sandbox module if MAINSPACE then require("Module:debug").track("grc-headword/sandbox/mainspace") end

local ufind = mw.ustring.find local umatch = mw.ustring.match

local pos_functions = {}

local legal_declension = { ["first"] = true, ["second"] = true, ["Attic"] = true, ["third"] = true, ["irregular"] = true, }

-- gender_names and number_codes are used to validate genders, which must be -- in the form " - ". local gender_names = { ["m"]	= "masculine", ["f"]	= "feminine", ["n"]	= "neuter", ["c"]	= "common", ["?"]	= "unknown gender", }

local number_codes = list_to_set { "s", "d", "p" }

local function quote(text) return "“" .. text .. "”" end

local function glossary_link(anchor, text) if not anchor then error("Anchor required.") end text = text or anchor return "" .. text .. "" end

local function format(array, concatenater) if not array[1] then return "" else return "; ''" .. table.concat(array, concatenater) .. "''"	end end -- Process arg the way Module:parameters would. local function process_arg(val) if val == "" then val = nil end if val then val = mw.text.trim(val) end return val end

-- Matches any character in Greek and Coptic block except the first line: -- ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ local basic_Greek = "[\206-\207][\128-\191]"

-- Exactly matches entire Greek Extended block. local Greek_extended = "\225[\188-\191][\128-\191]"

-- Returns true if text contains one character from the Greek and Coptic or -- Greek Extended blocks. local function contains_Greek(text) return (string.find(text, basic_Greek) or string.find(text, Greek_extended)) ~= nil end

-- A cheaper version of makeEntryName. Doesn't remove underties, which should -- not appear in headwords, or convert curly apostrophes, spacing smooth -- breathings, and spacing coronides to straight apostrophes. local function remove_macron_breve(text) return mw.ustring.toNFD(text):gsub("\204[\132\134]", "") end

local function remove_links(text) if text:find("%[%[") then text = text :gsub("%[%([^%+)%]%]", "%1") :gsub("%[%[([^%]]+)%]%]", "%1") end return text end

local U = mw.ustring.char local macron = U(0x304) local breve = U(0x306) local rough = U(0x314) local smooth = U(0x313) local diaeresis = U(0x308) local acute = U(0x301) local grave = U(0x300) local circumflex = U(0x342) local subscript = U(0x345) local diacritic_patt = table.concat{ "[",	macron, breve, rough, smooth, diaeresis, acute, grave, circumflex, subscript, "]" }

-- Controls whether or not the headword can be provided in the first numbered parameter. local length_diacritic = "[" .. macron .. breve .. circumflex .. subscript .. "]" local aiu_diacritic = "^([αιυ])(" .. diacritic_patt .. "*)$"

local function needs_headword(text) text = remove_links(text) -- If page name has straight apostrophe, a headword with curly apostrophe should be provided. if text:find("'") then return true end -- breaks the word into units for _, token in ipairs(tokenize(text)) do		local vowel, diacritics = umatch(token, aiu_diacritic) if vowel and (diacritics == "" or				not ufind(diacritics, length_diacritic)) then return true end end return false end

-- Process numbered parameters before using Module:parameters, as -- Module:parameters converts several named parameters into arrays, which -- makes them more difficult to manipulate.

local function process_numbered_params(args, Greek_params, non_Greek_params) if not non_Greek_params then non_Greek_params = { false } end local max_Greek_param_index = #Greek_params -- Clone args table so that its values can be modified. args = require("Module:table").shallowClone(args) if args.head then -- Special:WhatLinksHere/Wiktionary:Tracking/grc-headword/head param require("Module:debug").track("grc-headword/head param") end local last_Greek_param_index = 0 for i, arg in ipairs(args) do		if arg == "-" or contains_Greek(arg) then last_Greek_param_index = i		else break end end local head_in_arg1 = false if last_Greek_param_index == max_Greek_param_index then if not MAINSPACE or needs_headword(PAGENAME) then head_in_arg1 = true else error(("The pagename does not have ambiguous vowels, so there cannot be " .. max_Greek_param_index .. " numbered parameter%s. See template documentation for more details.")					:format(max_Greek_param_index == 1 and "" or "s")) end elseif last_Greek_param_index > max_Greek_param_index then error("Too many numbered parameters containing Greek text or hyphens. There can be at most "				.. max_Greek_param_index .. ".") -- For indeclinable nouns: -- First parameter is headword if equal to pagename when macrons and breves are removed. elseif args[1] and remove_macron_breve(args[1]):gsub("’", "'") == mw.ustring.toNFD(PAGENAME) then if args.head then error("Parameter 1 appears to be the headword, so the head parameter " .. quote(args.head) .. " is not needed.") end args.head, args[1] = args[1], nil else table.remove(Greek_params, 1) -- Remove "head" parameter. end local function process_params(start_i, end_i, param_names) local i = 1 -- Index in the table of parameter names. for numbered = start_i, end_i do			local named = param_names[i] i = i + 1 if named then -- Process parameters, as they have not been processed by Module:parameters. args[numbered], args[named] = process_arg(args[numbered]), process_arg(args[named]) -- This should not happen, because the number of Greek parameters -- has already been checked. elseif args[numbered] then error("No purpose for parameter " .. numbered .. ".") end if args[numbered] then if named then -- This fixes an error caused by the kludgy way in which the -- numbered parameters of are handled. if numbered ~= named then if args[named] then error("Parameter " .. numbered .. " is not needed when parameter " .. named .. " is present.") end args[named], args[numbered] = args[numbered], nil end else error("Parameter " .. numbered .. ", " .. args[numbered] .. ", has no purpose.") end end end end process_params(1, last_Greek_param_index, Greek_params) process_params(last_Greek_param_index + 1, #Greek_params + #non_Greek_params, non_Greek_params) if args.head == "-" then error("The headword cannot be absent.") end return args end

local function process_heads(data, poscat) local suffix = data.heads[1]:find("^%*?%-") and true or false for _, head in ipairs(data.heads) do		if suffix and head:sub(1, 1) ~= "-" then error("The first headword has a hyphen, so headword #" .. i ..					", " .. quote(head) .. ", should as well.") end local accent = get_accent_term(head) if accent then table.insert(data.categories,				("%s %s terms"):format(canonical_name, accent)) else table.insert(data.categories,				("%s terms with irregular accent"):format(canonical_name)) end if MAINSPACE then local _, vowel_set = find_ambig(head, false) for vowel in pairs(vowel_set) do				require("Module:debug").track { "grc-headword/ambig", "grc-headword/ambig/" .. vowel }			end if not head:find(" ") and mw.ustring.toNFD(head):find(grave) then error("Head #" .. i .. ", " .. quote(head) ..					", contained a grave accent, but no space. Grave accent can only be used in multi-word terms.") end end end if suffix then data.pos_category = "suffixes" if not poscat:find "forms$" then table.insert(data.categories, canonical_name .. " " .. poscat .. "-forming suffixes") end end end

local function unlinked_form(label) return { label = label, { nolink = true, term = "—" } } end

local function add_gender_form(inflections, gender_arg, gender_name, allow_blank_forms) if gender_arg[1] then if allow_blank_forms and not gender_arg[2] and gender_arg[1] == "-" then table.insert(inflections, unlinked_form(gender_name)) else gender_arg.label = gender_name table.insert(inflections, gender_arg) end end end

local function adj_and_part_forms(total_forms, args, inflections, allow_blank_forms) if total_forms == 2 then add_gender_form(inflections, args.f, "feminine", allow_blank_forms) end add_gender_form(inflections, args.n, "neuter", allow_blank_forms) end

local function handle_degree_of_comparison(args, data, is_declined_form) if args.deg ~= nil then if args.deg == 'comp' then data.pos_category = reconstructed_prefix .. "comparative adjectives" elseif args.deg == 'super' then data.pos_category = reconstructed_prefix .. "superlative adjectives" else error('Adjective degree ' .. quote(args.deg) .. ' not recognized.') end if is_declined_form then data.pos_category = data.pos_category:gsub("adjectives", "adjective forms") end end end

function export.show(frame) local args = frame:getParent.args local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.") local subclass = frame.args[2] local data = { lang = lang, sc = sc, pos_category = reconstructed_prefix .. poscat, categories = {}, heads = {}, genders = {}, inflections = {} }	local appendix = {} if pos_functions[poscat] then pos_functions[poscat](args, data, appendix, poscat, subclass) end return full_headword(data) .. format(appendix, ", ") end

function export.test(frame_args, parent_args, pagename) PAGENAME = pagename local poscat = frame_args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.") local subclass = frame_args[2] local data = { pos_category = reconstructed_prefix .. poscat, categories = {}, heads = {}, genders = {}, inflections = {} }	local appendix = {} if pos_functions[poscat] then pos_functions[poscat](parent_args, data, appendix, poscat, subclass) end return data end

function validate_genders(gender_and_number_codes, poscat, categories) for _, gender_and_number_code in ipairs(gender_and_number_codes) do		local gender_code, number_code = gender_and_number_code:match("^([%a?]+)%-?(.*)$") local gender_name = gender_names[gender_code] if gender_name and (number_code == "" or number_codes[number_code]) then if poscat and categories then table.insert(categories,					("%s %s %s"):format(canonical_name, gender_name, poscat)) end else error("Gender " .. quote(gender_and_number_code) .. " is not an valid "				.. canonical_name .. " gender.") end end end

pos_functions["nouns"] = function(args, data, appendix, poscat) args = process_numbered_params(args, { "head", "gen" }, { "g", "decl" }) local params = { -- Numbered parameters 1, 2, 3, 4 handled above. head = { list = true, default = PAGENAME }, gen = { list = true }, g = { list = true, default = '?' }, dim = { list = true }, decl = { list = true }, }	args = require("Module:parameters").process(args, params) data.heads = args.head process_heads(data, "noun") validate_genders(args.g, poscat, data.categories) data.genders = args.g	if not args.gen[1] then table.insert(data.inflections, { label = glossary_link("indeclinable") }) table.insert(data.categories,			("%s indeclinable %s")				:format(canonical_name, poscat)) for _, g in ipairs(data.genders) do			table.insert(data.categories,				("%s %s indeclinable %s")					:format(canonical_name, gender_names[g], poscat)) end if args.decl[1] then error("Declension class " .. quote(args.decl[1])					.. " has been given, but no genitive form has been given, so the word cannot belong to a declension class.") end else if not args.gen[2] and args.gen[1] == "-" then table.insert(data.inflections, unlinked_form("genitive")) else args.gen.label = "genitive" table.insert(data.inflections, args.gen) end if args.decl[2] then table.insert(data.inflections, { label = 'variously declined' }) table.insert(data.categories,				("%s %s with multiple declensions")					:format(canonical_name, poscat)) elseif not args.decl[1] then table.insert(appendix, "? declension") end for _, decl_class in ipairs(args.decl) do			if legal_declension[decl_class] then local not_irregular = decl_class ~= "irregular" if not_irregular then table.insert(appendix,						("%s declension")							:format(canonical_name, decl_class, decl_class)) table.insert(data.categories,						("%s %s-declension %s")							:format(canonical_name, decl_class, poscat)) else table.insert(appendix,						("%s declension"):format(decl_class)) end for _, g in ipairs(data.genders) do					table.insert(data.categories,						not_irregular						and ("%s %s %s in the %s declension")							:format(canonical_name, gender_names[g], poscat, decl_class)						or ("%s irregular %s %s")							:format(canonical_name, gender_names[g], poscat)) end else error("Declension " .. quote(decl_class) .. " is not an legal " ..					canonical_name .. " declension. Choose “first”, “second”, “third”, or “irregular”.") end end end -- Check first-declension endings and gender. if args.decl[1] == "first" then local alpha = "α[" .. macron .. breve .. "]?[" .. acute .. circumflex .. "]?"		local eta = "η[" .. acute .. circumflex .. "]?"		local gender = args.g[1] local alpha_ending, eta_ending if gender == "f" then alpha_ending = alpha .. "$"			eta_ending = eta .. "$"		elseif gender == "m" then alpha_ending = alpha .. "ς$" eta_ending = eta .. "ς$" else gender = nil require("Module:debug").track("grc-noun/1st/incorrect or no gender") end if gender then for _, head in ipairs(data.heads) do				head = mw.ustring.toNFD(remove_links(head)) if not (mw.ustring.find(head, eta_ending) or mw.ustring.find(head, alpha_ending)) then require("Module:debug").track("grc-noun/1st/" .. gender .. " with incorrect ending") end end end end if args.dim[1] then args.dim.label = "diminutive" table.insert(data.inflections, args.dim) end end

pos_functions["proper nouns"] = pos_functions["nouns"]

pos_functions["verbs"] = function(args, data) args = process_numbered_params(args, { "head" }, { "type" })

local params = { head = { list = true, default = PAGENAME }, type = {}, }	local args = require("Module:parameters").process(args, params) data.heads = args.head if args.type then if args.type == "deponent" then table.insert(data.inflections, { label = glossary_link("deponent") }) else error("Invalid 'type': " .. tostring(args.type) .. ".") end end process_heads(data, "verb") end

pos_functions["adverbs"] = function(args, data) args = process_numbered_params(args, { "head", "comp", "super" }, { "type" }) local params = { head = { list = true, default = PAGENAME }, comp = { list = true }, super = { list = true }, type = { list = true }, }	local args = require("Module:parameters").process(args, params) data.heads = args.head process_heads(data, "adverb") -- Show comparative and superlative. If comparative or superlative is absent -- while the other form is present, show "no comparative" or "no superlative". if args.comp[1] then args.comp.label = 'comparative' table.insert(data.inflections, args.comp) elseif args.super[1] then table.insert(data.inflections, { label = 'no comparative' }) end if args.super[1] then args.super.label = 'superlative' table.insert(data.inflections, args.super) elseif args.comp[1] then table.insert(data.inflections, { label = 'no superlative' }) end if args.type[1] then local adverb_types = list_to_set { "demonstrative", "indefinite", "interrogative", "relative", }		for _, type in ipairs(args.type) do			if adverb_types[type] then table.insert(data.categories, canonical_name .. " " .. type .. " adverbs") else error(quote(type) .. " is not a valid subcategory of adverb.") end end end end

pos_functions["numerals"] = function(args, data) args = process_numbered_params(args, { "head", "f", "n" }) local params = { head = { list = true, default = PAGENAME }, f = { list = true }, n = { list = true }, car = { list = true }, ord = { list = true }, adv = { list = true }, coll = { list = true }, }	local args = require("Module:parameters").process(args, params) data.heads = args.head process_heads(data, "numeral") adj_and_part_forms(2, args, data.inflections, false) local num_type_names = { car = "cardinal", ord = "ordinal", adv = "adverbial", coll = "collective", }	for _, num_type in ipairs { "car", "ord", "adv", "coll" } do		if args[num_type][1] then args[num_type].label = num_type_names[num_type] table.insert(data.inflections, args[num_type]) end end end

pos_functions["participles"] = function(args, data, appendix, _, subclass) if subclass == "1&2" or subclass == "1&3" then pos_functions["part-" .. subclass](args, data, appendix) else error('Participle subclass ' .. quote(subclass) .. ' not recognized.') end end

pos_functions["part-1&2"] = function(args, data, appendix) args = process_numbered_params(args, { "head", "f", "n" }) local params = { -- Parameters 1, 2, and 3 handled above. head = { list = true, default = PAGENAME }, f = { list = true, required = true }, n = { list = true, required = true }, }	local args = require("Module:parameters").process(args, params) data.heads = args.head process_heads(data, "participle") table.insert(data.genders, "m") table.insert(appendix, "first/second declension") adj_and_part_forms(2, args, data.inflections, false) end

pos_functions["part-1&3"] = function(args, data, appendix) args = process_numbered_params(args, { "head", "f", "n" }) local params = { -- Parameters 1, 2, and 3 handled above. head = { list = true, default = PAGENAME }, f = { list = true, required = true }, n = { list = true, required = true }, }	local args = require("Module:parameters").process(args, params) data.heads = args.head process_heads(data, "participle") table.insert(data.genders, "m") table.insert(appendix, "first/third declension") adj_and_part_forms(2, args, data.inflections, false) end

pos_functions["adjectives"] = function(args, data, appendix, _, subclass) local subclasses = { ["1&2"] = true, ["1&3"] = true, ["2nd"] = true, ["3rd"] = true }	if subclasses[subclass] then pos_functions["adj-" .. subclass](args, data, appendix) else error('Adjective subclass ' .. quote(subclass) .. ' not recognized.') end end

pos_functions["adj-1&2"] = function(args, data, appendix) args = process_numbered_params(args, { "head", "f", "n" }) local params = { -- Parameters 1, 2, and 3 handled above. head = { list = true, default = PAGENAME }, f = { list = true, required = true }, n = { list = true, required = true }, deg = {}, }	local args = require("Module:parameters").process(args, params) data.heads = args.head process_heads(data, "adjective") table.insert(data.genders, "m") table.insert(appendix, "first/second declension") handle_degree_of_comparison(args, data, false) adj_and_part_forms(2, args, data.inflections, true) end

pos_functions["adj-1&3"] = function(args, data, appendix) args = process_numbered_params(args, { "head", "f", "n" }) local params = { -- Parameters 1, 2, and 3 handled above. head = { list = true, default = PAGENAME }, f = { list = true, required = true }, n = { list = true, required = true }, }	local args = require("Module:parameters").process(args, params) data.heads = args.head process_heads(data, "adjective") table.insert(data.genders, "m") table.insert(appendix, "first/third declension") adj_and_part_forms(2, args, data.inflections, true) end

pos_functions["adj-2nd"] = function(args, data, appendix) args = process_numbered_params(args, { "head", "n" }) local params = { -- Parameters 1 and 2 handled above. head = { list = true, default = PAGENAME }, n = { list = true, required = true }, }	local args = require("Module:parameters").process(args, params) data.heads = args.head process_heads(data, "adjective") table.insert(data.genders, "m") table.insert(data.genders, "f") table.insert(appendix, "second declension") adj_and_part_forms(1, args, data.inflections, true) end

pos_functions["adj-3rd"] = function(args, data, appendix) args = process_numbered_params(args, { "head", "n" }) local params = { -- Parameters 1 and 2 handled above. head = { list = true, default = PAGENAME }, n = { list = true, required = true }, deg = {}, }	local args = require("Module:parameters").process(args, params) data.heads = args.head process_heads(data, "adjective") table.insert(data.genders, "m") table.insert(data.genders, "f") table.insert(appendix, "third declension") handle_degree_of_comparison(args, data, false) adj_and_part_forms(1, args, data.inflections, true) end

local case_abbreviations = { nom = 'nominative', gen = 'genitive', dat = 'dative', acc = 'accusative', voc = 'vocative', }

pos_functions["prepositions"] = function(args, data, appendix) -- This allows up to 4 numbered parameters, which is the number of cases -- that can appear after prepositions. args = process_numbered_params(args, { "head" }, { 1, 2, 3 }) local params = { [1] = { list = true }, head = { list = true, default = PAGENAME }, }	local args = require("Module:parameters").process(args, params) data.heads = args.head process_heads(data, "preposition") if args[1][1] then local cases = {} for _, case in ipairs(args[1]) do			if case_abbreviations[case] then table.insert(data.categories, canonical_name .. " " .. case_abbreviations[case] .. " prepositions") table.insert(cases, glossary_link(case_abbreviations[case])) else error('Case abbreviation ' .. quote(case) ..						' not recognized. Please choose from ' ..						serial_comma_join( require("Module:fun").map(								quote,								{ "gen", "dat", "acc" }), { dontTag = true })						.. '.') end end table.insert(data.inflections, { label = 'governs the ' .. serial_comma_join(cases) }) end end

pos_functions["particles"] = function(args, data) local params = { head = { list = true, default = PAGENAME }, disc = { type = 'boolean' }, mod = { type = 'boolean' }, inter = { type = 'boolean' }, neg = { type = 'boolean' }, }	local args = require("Module:parameters").process(args, params) data.heads = args.head process_heads(data, "particles") for _, item in ipairs{ { "disc", "discourse" }, { "mod", "modal" }, { "inter", "interrogative" }, { "neg", "negative" } } do		if args[item[1]] then local descriptor = item[2] table.insert(data.categories, canonical_name .. " " .. descriptor .. " particles") table.insert(data.inflections, { label = descriptor .. ' particle' }) end end end

local valid_pos

setmetatable(pos_functions, {	__index = function (self, key)		if key:find(" forms$") then			valid_pos = valid_pos or list_to_set {				"noun", "proper noun", "verb", "adjective", "determiner", "pronoun",				"participle"			}			local pos = key:match("^(.+) forms$")			if valid_pos[pos] then				-- POS function for "noun forms", "verb forms", etc.				return function(args, data)					args = process_numbered_params(args, { "head" }, (pos == "noun" or pos == "proper noun") and { "g" })					local params = {						head = { list = true, default = PAGENAME },					}					if pos == "noun" or pos == "proper noun" then						params.g = { list = true }					elseif pos == "adjective" then						params.deg = {}					end					local args = require("Module:parameters").process(args, params)					data.heads = args.head					process_heads(data, key)					if args.g then						validate_genders(args.g)						data.genders = args.g					end					handle_degree_of_comparison(args, data, true)				end			end		end		error ("No function for the POS " .. quote(key) .. ".")	end })

return export