Module:User:GianWiki/sdc-common

local ex = {} -- normally called `export` but there are so many references to exported functions in this module

local put_module = "Module:parse utilities" local romut_module = "Module:romance utilities" local strutil_module = "Module:string utilities"

local m_str_utils = require(strutil_module)

local u = m_str_utils.char local rfind = m_str_utils.find local rsubn = m_str_utils.gsub local rsplit = m_str_utils.split local toNFC = mw.ustring.toNFC local toNFD = mw.ustring.toNFD

local prepositions = { -- others "cument' ", "cument'e ", "cumenti ", "cun ", "pa ", "pai ", "par ", "tra ", "fra ", }

-- version of rsubn that discards all but the first return value function ex.rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end

-- version of rsubn that returns a 2nd argument boolean indicating whether -- a substitution was made. function ex.rsubb(term, foo, bar) local retval, nsubs = rsubn(term, foo, bar) return retval, nsubs > 0 end

-- apply rsub repeatedly until no change function ex.rsub_repeatedly(term, foo, bar) while true do		local new_term = ex.rsub(term, foo, bar) if new_term == term then return term end term = new_term end end

-- Pronunciation -

ex.AC = u(0x301) ex.GR = u(0x300) ex.CFLEX = u(0x302) ex.DOTOVER = u(0x0307) -- dot over = ̇ = signal unstressed word ex.DOTUNDER = u(0x0323) -- dot under = ̣ = unstressed vowel with quality marker ex.LINEUNDER = u(0x0331) -- line under = ̱ = secondary-stressed vowel with quality marker ex.DIA = u(0x0308) -- diaeresis = ̈ ex.TIE = u(0x0361) -- tie = ͡ ex.stress = "ˈˌ" ex.stress_c = "[" .. ex.stress .. "]" ex.quality = ex.AC .. ex.GR ex.quality_c = "[" .. ex.quality .. "]" ex.accent = ex.stress .. ex.quality .. ex.CFLEX .. ex.DOTOVER .. ex.DOTUNDER .. ex.LINEUNDER ex.accent_c = "[" .. ex.accent .. "]"

-- Apply canonical Unicode decomposition to text, e.g. è → e + ◌̀. But recompose ö and ü so we can treat them as single -- vowels, and put ex.LINEUNDER/ex.DOTUNDER/ex.DOTOVER after acute/grave (canonical decomposition puts ex.LINEUNDER and ex.DOTUNDER -- first). function ex.decompose(text) text = toNFD(text) text = ex.rsub(text, "." .. ex.DIA, {		["o" .. ex.DIA] = "ö",		["O" .. ex.DIA] = "Ö",		["u" .. ex.DIA] = "ü",		["U" .. ex.DIA] = "Ü",	}) text = ex.rsub(text, "([" .. ex.LINEUNDER .. ex.DOTUNDER .. ex.DOTOVER .. "])(" .. ex.quality_c .. ")", "%2%1")	return text end

-- Apply canonical Unicode composition to text, e.g. e + ◌̀ → è. function ex.compose(text) return toNFC(text) end

-- Split into words. Hyphens separate words but not when used to denote affixes, i.e. hyphens between non-spaces -- separate words. Return value includes alternating words and separators. Use table.concat(words) to reconstruct -- the initial text. function ex.split_but_rejoin_affixes(text) if not rfind(text, "[%s%-]") then return {text} end -- First replace hyphens separating words with a special character. Remaining hyphens denote affixes and don't	-- get split. After splitting, replace the special character with a hyphen again. local TEMP_HYPH = u(0xFFF0) text = ex.rsub_repeatedly(text, "([^%s])%-([^%s])", "%1" .. TEMP_HYPH .. "%2") local words = rsplit(text, "([%s" .. TEMP_HYPH .. "]+)")	for i, word in ipairs(words) do		if word == TEMP_HYPH then words[i] = "-" end end return words end

function ex.remove_secondary_stress(text) local words = ex.split_but_rejoin_affixes(text) for i, word in ipairs(words) do		if (i % 2) == 1 then -- an actual word, not a separator -- Remove unstressed quality marks. word = ex.rsub(word, ex.quality_c .. ex.DOTUNDER, "") -- Remove secondary stresses. Specifically: -- (1) Remove secondary stresses marked with ex.LINEUNDER if there's a previously stressed vowel. -- (2) Otherwise, just remove the ex.LINEUNDER, leaving the accent mark, which will then be removed if there's --    a following stressed vowel, but left if it's the only stress in the word, as in có̱lle = con le. --    (In the process, we remove other non-stress marks.) -- (3) Remove stress mark if there's a following stressed vowel. word = ex.rsub_repeatedly(word, "(" .. ex.quality_c .. ".*)" .. ex.quality_c .. ex.LINEUNDER, "%1") word = ex.rsub(word, "[" .. ex.CFLEX .. ex.DOTOVER .. ex.DOTUNDER .. ex.LINEUNDER .. "]", "") word = ex.rsub_repeatedly(word, ex.quality_c .. "(.*" .. ex.quality_c .. ")", "%1")			words[i] = word end end return table.concat(words) end

-- Remove all accents. NOTE: `text` on entry must be decomposed using decompose. function ex.remove_accents(text) return ex.rsub(text, ex.accent_c, "") end

-- Remove non-word-final accents. NOTE: `text` on entry must be decomposed using decompose. function ex.remove_non_final_accents(text) local words = ex.split_but_rejoin_affixes(text) for i, word in ipairs(words) do		if (i % 2) == 1 then -- an actual word, not a separator word = ex.rsub_repeatedly(word, ex.accent_c .. "(.)", "%1") words[i] = word end end return table.concat(words) end

-- References -

function ex.parse_abbreviated_references_spec(spec) local spec_before_modifiers, modifiers = spec:match("^(.-)(<<.*>>)$") if spec_before_modifiers then spec = spec_before_modifiers else modifiers = "" end local template_name, props = spec:match("^([^:]+):(.*)$") if not template_name then template_name = spec props = "" else if props:find(",%s") then props = require(put_module).split_on_comma(props) else props = rsplit(props, ",") end for i, prop in ipairs(props) do			if prop:find("#") then local param, val = prop:match("^(.-)#(.*)$") props[i] = "|" .. param .. "=" .. val else props[i] = "|" .. prop end end props = table.concat(props) end if template_name == "" and props == "" then return modifiers else return mw.getCurrentFrame:preprocess((""):format(template_name, props)) .. modifiers end end

-- Inflection -

-- Given a term `term`, if the term is multiword (either through spaces or hyphens), handle inflection of the term by -- calling handle_multiword in Module:romance utilities. `special` indicates which parts of the multiword term to -- inflect, and `inflect` is a function of one argument to inflect the individual parts of the term. As an optimization, -- if the term is not multiword and `special` is not given, do nothing. local function call_handle_multiword(term, special, inflect) if not special and not term:find("[ %-]") then return nil end local retval = require(romut_module).handle_multiword(term, special, inflect, prepositions) if retval and #retval > 0 then if #retval ~= 1 then error("Internal error: Should have only one return value from inflection function: " .. table.concat(retval, ",")) end return retval[1] end return nil end

-- Generate a default plural form, which is correct for most regular nouns and adjectives. function ex.make_plural(term, gender, special) local plspec if special == "cap*" or special == "cap*+" then plspec = special special = nil end local retval = call_handle_multiword(term, special, function(term) return ex.make_plural(term, gender, plspec) end) if retval then return retval end

local function check_no_mf if gender == "mf" or gender == "mfbysense" or gender == "?" then error("With gender=" .. gender .. ", unable to pluralize term '" .. term .. "'"				.. (special and " using special=" .. special or "") .. " because its plural is gender-specific") end end

if plspec == "cap*" or plspec == "cap*+" then check_no_mf if not term:find("^capu") then error("With special=" .. plspec .. ", term '" .. term .. "' must begin with capu-") end if gender == "m" then term = term:gsub("^capu", "capi") end if plspec == "cap*" then return term end end

if term:find("iu$") then term = term:gsub("iu$", "i") elseif term:find("ologu$") then term = term:gsub("u$", "i") elseif term:find("[ia]cu$") then term = term:gsub("u$", "i") -- Of adjectives in -co but not in -aco or -ico, there are several in -esco that take -eschi, and various -- others that take -chi: adunco, anficerco, azteco, bacucco, barocco, basco, -- bergamasco, berlusco, bianco, bieco, bisiacco, bislacco, bisulco, brigasco, -- brusco, bustocco, caduco, ceco, cecoslovacco, cerco, chiavennasco, cieco, -- ciucco, comasco, cosacco, cremasco, crucco, dificerco, dolco, eterocerco, -- etrusco, falisco, farlocco, fiacco, fioco, fosco, franco, fuggiasco, giucco, -- glauco, gnocco, gnucco, guatemalteco, ipsiconco, lasco, livignasco, losco, -- manco, monco, monegasco, neobarocco, olmeco, parco, pitocco, pluriconco, -- poco, polacco, potamotoco, prebarocco, prisco, protobarocco, rauco, ricco, -- risecco, rivierasco, roco, roiasco, sbieco, sbilenco, sciocco, secco, -- semisecco, slovacco, somasco, sordocieco, sporco, stanco, stracco, staricco, -- taggiasco, tocco, tosco, triconco, trisulco, tronco, turco, usbeco, uscocco, -- uto-azteco, uzbeco, valacco, vigliacco, zapoteco. --	-- Only the following take -ci: biunivoco, dieco, equivoco, estrinseco, greco, inequivoco, -- intrinseco, italigreco, magnogreco, meteco, neogreco, osco (either -ci or -chi), -- petulco (either -chi or -ci), plurivoco, porco, pregreco, reciproco, stenoeco, -- tagicco, univoco, volsco. elseif term:find("[cg]u$") then term = term:gsub("u$", "hi") elseif term:find("u$") then term = term:gsub("u$", "i") elseif term:find("[cg]a$") then check_no_mf term = term:gsub("a$", (gender == "m" and "hi")) elseif term:find("logia$") then if gender ~= "f" then error("Term '" .. term .. "' ending in -logia should have gender=f if it is using the default plural") end term = term:gsub("a$", "i") elseif term:find("[cg]ia$") then check_no_mf term = term:gsub("ia$", (gender == "m" and "i")) elseif term:find("a$") then check_no_mf term = term:gsub("a$", (gender == "m" and "i")) elseif term:find("i$") then term = term:gsub("i$", "i") else return nil end return term end

-- Generate a default feminine form. function ex.make_feminine(term, special) local retval = call_handle_multiword(term, special, ex.make_feminine) if retval then return retval end

-- Don't directly return gsub because then there will be multiple return values. if term:find("u$") then term = term:gsub("u$", "a") elseif term:find("oni$") then term = term:gsub("oni$", "ona") end

return term end

-- Generate a default masculine form. function ex.make_masculine(term, special) local retval = call_handle_multiword(term, special, ex.make_masculine)

-- Don't directly return gsub because then there will be multiple return values. if term:find("a$") then term = term:gsub("a$", "u") elseif term:find("trici$") then term = term:gsub("trici$", "tori") end

return term end

return ex