Module:zlw-lch-IPA

--[[

TODO: Decide on whether we want the Northern Borderlands dialect. The general consensus is to including it by doing the consonant subsitution and put the transcription in brackets Also the SBD should be included

--]]

local export = {}

local u = mw.ustring.char local rfind = mw.ustring.find local rsubn = mw.ustring.gsub local ulen = mw.ustring.len local ulower = mw.ustring.lower local uupper = mw.ustring.upper local usub = mw.ustring.sub

-- version of rsubn that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end

local OVERTIE = u(0x361) -- COMBINING DOUBLE INVERTED BREVE

--	As can be seen from the last lines of the function, this returns a table of transcriptions,	and if do_hyph, also a string being the hyphenation. These are based on a single spelling given,	so the reason why the transcriptions are multiple is only because of the -yka alternating stress	et sim. This only accepts single-word terms. Multiword terms are handled by multiword. -- local function phonemic(txt, do_hyph, lang, is_prep, period) local ante = 0 local unstressed = is_prep or false local colloquial = true

function tsub(s, r)		txt, c = rsubn(txt, s, r)		return c > 0 end function lg(s) return s[lang] or s[1] end function tfind(s) return rfind(txt, s) end

-- Save indices of uppercase characters before setting everything lowercase. local uppercase_indices if do_hyph then uppercase_indices = {} local capitals = ("[A-Z%s]"):format(lg {			pl = "ĄĆĘŁŃÓŚŹŻ",			mpl = "ĄÁÅĆĘÉŁḾŃÓṔŚẂŹŻ",			szl = "ÃĆŁŃŌŎÔÕŚŹŻ",			csb = "ÔÒÃËÙÉÓĄŚŁŻŹĆŃ",			slv = "ÃÉËÊÓÕÔÚÙŃŻ",			mas = "ÁÄÉŁŃÓÔŚÛŸŻŹ"		}) if tfind(capitals) then local i = 1 local str = rsub(txt, "[.'^*+]", "") while rfind(str, capitals, i) do				local r, _ = rfind(str, capitals, i)				table.insert(uppercase_indices, r)				i = r + 1 end end if #uppercase_indices == 0 then uppercase_indices = nil end end

txt = ulower(txt)

-- Prevent palatisation of the special case kwazi-. tsub("^kwazi", "kwaz-i")

-- falling diphthongs  and , and diacriticised variants tsub(lg { "([ae])u", mpl = "([aáåeé])u", csb = "([ae])ù" }, "%1U")

-- rising diphthongs with  local V = lg { pl = "aąeęioóuy", mpl = "aąáåeęéioóuy", szl = "aãeéioōŏôõuy", csb = "ôòãëùéóąeyuioa", slv = "aãeéëêioóõôuúùyăĭŏŭŭùy̆ā", mas="aáäeéioóôuûÿ" } tsub(("([^%s])" .. (lang == "slv" and "j" or "i") .. "([%s])"):format(V, V), "%1I%2")

if txt:find("^*") then -- The symbol <*> before a word indicates it is unstressed. unstressed = true txt = txt:sub(2) elseif txt:find("^%^+") then -- The symbol <^> before a word indicates it is stressed on the ante-penult, -- <^^> on the ante-ante-penult, etc.		ante = txt:gsub("(%^).*", "%1"):len txt = txt:sub(ante + 1) elseif txt:find("^%+") then -- The symbol <+> indicates the word is stressed regularly on the penult. This is useful -- for avoiding the following checks to come into place. txt = txt:sub(2) else if tfind(".+[łlb][iy].+") then -- Some words endings trigger stress on the ante-penult or ante-ante-penult regularly. if tfind("liśmy$") or tfind("[bł]yśmy$") or tfind("liście$") or tfind("[bł]yście$") then ante = 2 elseif tfind("by[mś]?$") and not tfind("ła?by[mś]?$") then ante = 1 colloquial = false end end if tfind(".+[yi][kc].+") then local endings = lg { { "k[aąęio]", "ce", "kach", "kom" }, szl = { "k[aãio]", "ce", "kacj", "kōm", "kach" }, csb = { "k[aeąãùóoô]", "ce", "kacj", "kóm", "kama", "kach" }, slv = { "k[aãêúóoôõ]", "cê", "kacj", "kji", "kóm", "kóma", "kamy", "kach" } }			for _, v in ipairs(endings) do				if tfind(("[yi]%s$"):format(v)) then ante = 1 end end end if lang == "mpl" then if tfind(".+[yi]j.+") then local endings = lg { mpl = { "[ąåéo]", "[ée]j", "[áo]m", "ach" }, }				for _, v in ipairs(endings) do					if tfind(("[yi]j%s$"):format(v)) then ante = 1 end end end end end

-- TODO: mpl, csb, szl, slv, mas if not txt:find("%.") then -- Don't recognise affixes whenever there's only one vowel (or dipthong). local _, n_vowels = rsubn(txt, ("[%s]"):format(V), "") if n_vowels > 1 then

-- syllabify common prefixes as separate local prefixes = { "do", "wy", "za", "aktyno", "akusto", "akwa", "anarcho", "andro", "anemo", "antropo", "arachno", "archeo", "archi", "arcy", "areo", "arytmo", "audio", "awio", "balneo", "biblio", "brachy", "broncho", "ceno", "centro", "centy", "chalko", "chiro", "chloro", "chole", "chondro", "choreo", "chromato", "chrysto", "cyber", "cyklo", "cztero", "ćwierć", "daktylo", "decy", "deka", "dendro", "dermato", "diafano", "dwu", "dynamo", "egzo", "ekstra", "elektro", "encefalo", "endo", "entero", "entomo", "ergo", "erytro", "etno", "farmako", "femto", "ferro", "fizjo", "flebo", "franko", "ftyzjo", "galakto", "galwano", "germano", "geronto", "giganto", "giga", "gineko", "giro", "gliko", "gloso", "glotto", "grafo", "granulo", "grawi", "haplo", "helio", "hemato", "hepta", "hetero", "hiper", "histo", "hydro", "info", "inter", "jedno", "kardio", "kortyko", "kosmo", "krypto", "kseno", "logo", "magneto", "między", "niby", "nie", "nowo", "około", "oksy", "onto", "ornito", "para", "pierwo", "pięcio", "pneumo", "poli", "ponad", "post", "poza", "proto", "pseudo", "psycho", "radio", "samo", "sfigmo", "sklero", "staro", "stereo", "tele", "tetra", "wice", "zoo", "żyro", "am[bf]i", "ang[il]o", "ant[ey]", "a?steno", "[be]lasto", "chro[mn]o", "cys?to", "de[rs]mo", "h?ekto", "[gn]eo", "hi[ge]ro", "kontra?", "me[gt]a", "mi[nl]i", "a[efg]ro", "[pt]rzy", "przed?", "wielk?o", "mi?elo", "eur[oy]", "ne[ku]ro", "allo", "astro", "atto", "brio", "heksa", "all?o", "at[mt]o", "a[rs]tro", "br?io", "heksa?", "pato", "ba[tr][oy]", "izo", "myzo", "m[ai]kro", "mi[mzk]o", "chemo", "gono", "kilo", "lipo", "nano", "kilk[ou]", "hem[io]", "home?o", "fi[lt]o", "ma[łn]o", "h[ioy]lo", "hip[ns]?o", "[fm]o[nt]o", --  would hit too many false positives }			for _, v in ipairs(prefixes) do				if tfind("^"..v) then local _, other_vowels = rsubn(v, ("[%s]"):format(V), "") if (n_vowels - other_vowels) > 0 then tsub(("^(%s)"):format(v), "%1.") break end end end

if do_hyph then

-- syllabify common suffixes as separate -- TODO: mpl, csb, szl, slv, mas local suffixes = lg { pl = { "nąć", "[sc]tw[aou]", "[sc]twie", "[sc]tw[eo]m", "[sc]twami", "[sc]twach", "dztw[aou]", "dztwie", "dztw[eo]m", "dztwami", "dztwach", "dł[aou]", "dł[eo]m", "dłami", "dłach", "[czs]j[aeięąo]", "[czs]jom", "[czs]jami", "[czs]jach", }, szl = { "nōńć", "dło", }, csb = { "nąc", "dło" }, slv = { "nõc", "dlô" }, mas = { "nóncz", "dło" }				}

for _, v in ipairs(suffixes) do					if tsub(("(%s)$"):format(v), ".%1") then break end end

-- syllabify as /ist.ka/ if txt:find("[iy]st[kc]") then local endings = lg { { "k[aąęio]", "ce", "kach", "kom", "kami" }, szl = { "k[aãio]", "ce", "kami", "kacj", "kacach", "kōma" }, csb = { "k[aãąioôòùó]", "ce", "kami", "kacj", "kacach", }, }					for _, v in ipairs(endings) do						if tsub(("([iy])st(%s)$"):format(v), "%1st.%2") then break end end end end end end

-- syllabification for _ = 0, 1 do		tsub(("([%sU])([^%sU.']*)([%s])"):format(V, V, V), function (a, b, c)			local function find(x) return rfind(b, x) end			local function is_diagraph(thing)				local r = find(thing:format("[crsd]z")) or find(thing:format("ch")) or find(thing:format("d[żź]"))				if lang == "mpl" then return r or find(thing:format("b́")) end				if lang == "slv" then return r or find(thing:format("gh")) end				if lang == "mas" then return r or find(thing:format("rż")) end				return r			end			if ((ulen(b) < 2) or is_diagraph("^%s$")) then				b = "."..b			else				local i = 2				if is_diagraph("^%s") then i = 3 end				if usub(b, i, i):find("^[rlłI-]$") then					b = "."..b				else					b = ("%s.%s"):format(usub(b, 0, i - 1), usub(b, i))				end			end			return ("%s%s%s"):format(a, b, c)		end) end

local hyph if do_hyph then -- Ignore certain symbols and diacritics for the hyphenation. hyph = txt:gsub("'", "."):gsub("-", "") if lang == "slv" then local BREVE = u(0x306) hyph = rsubn(hyph, "[Iăĭŏŭ" .. BREVE .. "ā]", {				["I"] = "j",				["ă"] = "a", ["ĭ"] = "i", ["ŏ"] = "o",				["ŭ"] = "u", [BREVE] = "", ["ā"] = "a",			}) end hyph = hyph:lower -- Restore uppercase characters. if uppercase_indices then -- str_i loops through all the characters of the string -- list_i loops as above but doesn't count dots -- array_i loops through the indices at which the capital letters are local str_i, list_i, array_i = 1, 1, 1 function h_sub(x, y) return usub(hyph, x, y) end while array_i <= #uppercase_indices do if h_sub(str_i, str_i) ~= "." then if list_i == uppercase_indices[array_i] then hyph = ("%s%s%s"):format(h_sub(1,str_i-1), uupper(h_sub(str_i,str_i)), h_sub(str_i+1)) array_i = array_i + 1 end list_i = list_i + 1 end str_i = str_i + 1 end end end

tsub("'", "ˈ")

-- handle digraphs tsub("ch", "x") tsub("[crs]z", { ["cz"]="t_ʂ", ["sz"]="ʂ" }) if lang ~= "mas" then tsub("rz", "R") end tsub("d([zżź])", "d_%1") if lang == "mpl" then tsub("b́", "bʲ") end if lang == "slv" then tsub("gh", "ɣ") end if lang == "mas" then tsub("rż", "R") end -- basic orthographical rules -- not using lg here for speed if lang == "pl" then tsub(".", {			-- vowels			["e"]="ɛ", ["o"]="ɔ",			["ą"]="ɔN", ["ę"]="ɛN",			["ó"]="u", ["y"]="ɨ",			-- consonants			["c"]="t_s", ["ć"]="t_ɕ",			["ń"]="ɲ", ["ś"]="ɕ", ["ź"]="ʑ",			["ł"]="w", ["w"]="v", ["ż"]="ʐ",			["g"]="ɡ", ["h"]="x", ["y"]="ɘ"		}) elseif lang == "mpl" then tsub(".", {			-- vowels			["á"]="ɒ", ["å"]="ɒ",			["ę"]="ɛ̃", ["ą"]="ɔ̃",			["e"]="ɛ", ["o"]="ɔ",			["é"]="e", ["ó"]="o",			["y"]="ɨ",			-- consonants			["ṕ"]="pʲ", --  has no unicode character and is hence handled above			["ḿ"]="mʲ", ["ẃ"]="vʲ",			["c"]="t_s", ["ć"]="t_ɕ",			["ń"]="ɲ", ["ś"]="ɕ", ["ź"]="ʑ",			["ł"]="ɫ", ["w"]="v", ["ż"]="ʐ",			["g"]="ɡ", ["h"]="x",		}) elseif lang == "szl" then tsub(".", {			-- vowels			["e"]="ɛ", ["o"]="ɔ",			["ō"]="o", ["ŏ"]="O",			["ô"]="wɔ", ["y"]="ɨ",			["õ"] = "ɔ̃", ["ã"] = "ã",			["y"] = "ɪ",			-- consonants			["c"]="t_s", ["ć"]="t_ɕ",			["ń"]="ɲ", ["ś"]="ɕ", ["ź"]="ʑ",			["ł"]="w", ["w"]="v", ["ż"]="ʐ",			["g"]="ɡ", ["h"]="x",		}) elseif lang == "csb" then tsub(".", {			-- vowels			["e"]="ɛ", ["é"]="e", ["o"]="ɔ",			["ó"]="o", ["ô"]="ɞ", ["ë"]="ɜ",			["ò"]="wɛ", ["y"]="Y", ["ù"]="wu",			["ą"] = "ɔ̃", ["ã"] = "ã",			-- consonants			["c"]="t_s", ["ć"]="t_ɕ",			["ń"]="ɲ", ["ś"]="ɕ", ["ź"]="ʑ",			["ł"]="w", ["w"]="v", ["ż"]="ʒ",			["g"]="ɡ", ["h"]="x",		}) elseif lang == "slv" then tsub(".", {			-- vowels			["e"]="ɛ", ["é"]="e", ["o"]="ɔ",			["ó"]="o", ["ô"]="ɵ", ["ë"]="ə", ["ê"]="E",			["y"]="ɪ", ["ú"]="ʉ", ["ù"]="y",			["õ"] = "ɔ̃", ["ã"] = "ã",			["ă"]="ă",	["ĭ"]="ĭ", ["ŏ"]="ŏ", ["ŭ"]="ŭ", ["ù̆"]="y̆", ["ā"]="aː",			-- consonants			["c"]="t_s",			["ń"]="n",			["w"]="v", ["ż"]="ʒ",			["g"]="ɡ", ["h"]="x",		}) elseif lang == "mas" then tsub(".", {			-- vowels			["á"]="ɒ", ["ä"]="æ", ["e"]="ɛ", ["é"]="e",			["o"]="ɔ", ["ó"]="o", ["ô"]="wɔ",			["ÿ"]="Y", ["û"]="wu",			-- consonants			["c"]="t_s",			["ń"]="ɲ", ["ś"]="ʃ",			["ł"]="w", ["w"]="v", ["ż"]="ʒ",			["g"]="ɡ", ["h"]="x", ["ź"]="ʑ",		}) end

if lang == "csb" then tsub("ʂ", "ʃ") tsub("ʐ", "ʒ") end

if lang == "slv" then tsub("ʂ", "ʃ") tsub("ʐ", "ʒ") end

if lang == "mas" then tsub("ʂ", "ʃ") tsub("ʐ", "ʒ") end

-- palatalisation local palatise_into = { ["n"] = "ɲ", ["s"] = "ɕ", ["z"] = "ʑ" } tsub("([nsz])I", function (c) return palatise_into[c] end) tsub("([nsz])i", function (c) return palatise_into[c] .. "i" end) -- voicing and devoicing

local T = "pftsʂɕkxʃx" local D = "bdzʐʑɡʒɣ"

tsub(("([%s][.ˈ]?)v"):format(T), "%1f") tsub(("([%s][.ˈ]?)R"):format(T), "%1S")

if lang == "slv" then tsub(("([%s][.ˈ]?)ɣ"):format(T), "%1x") tsub(("([%s][.ˈ]?)x"):format(D), "%1ɣ") end

local function arr_list(x) local r = "" for i in pairs(x) do r = r .. i		end return r	end

local devoice = { ["b"] = "p", ["d"] = "t", ["ɡ"] = "k", ["z"] = "s", ["v"] = "f", ["ʑ"] = "ɕ", ["ʐ"] = "ʂ", ["R"] = "S", ["ʒ"] = "ʃ" }	if lang == "slv" then devoice["ɣ"] = "x" end

local mpl_J = lg { "", mpl = "ʲ?" }

local arr_list_devoice = arr_list(devoice)

if not is_prep then tsub(("([%s])(%s)$"):format(arr_list_devoice, mpl_J), function (a, b)			return devoice[a] .. (type(b) == "string" and b or "")		end) end

if lang == "csb" then tsub("Y", "i") tsub("S", "r̝") tsub("R", "r̝") tsub("po", "pwo") tsub("bo", "bwo") tsub("mo", "mwo") tsub("fo", "fwo") tsub("vo", "vwo") tsub("ko", "kwo") tsub("ɡo", "ɡwo") tsub("xo", "xwo") tsub("vw", "w") end

if lang == "slv" then local V = "aɛeɔ̃oɵəEɪiʉuyã" tsub("S", "r̝") tsub("R", "r̝") tsub("nj$", "n") tsub("nj([^" .. V .. "])", "n%1") tsub("ɲ([" .. V .. "])", "nj%1") tsub("ɛ$", "ə") end

if lang == "mas" then tsub("Y", "i") tsub("S", "r̝") tsub("R", "r̝") end if lang ~= "mpl" then tsub("S", "ʂ") tsub("R", "ʐ") end

local voice = {} for i, v in pairs(devoice) do		voice[v] = i	end

local new_text local devoice_string = ("([%s])(%s[._ˈ]?[%s])"):format(arr_list_devoice, mpl_J, T)	local voice_string = ("([%s])(%s[._ˈ]?[%s])"):format(arr_list(voice), mpl_J, D) local function devoice_func(a, b) return devoice[a] .. b end local function voice_func(a, b) return voice[a] .. b end while txt ~= new_txt do		new_txt = txt tsub(devoice_string, devoice_func) tsub(voice_string, voice_func) end

if lang == "pl" then -- nasal vowels tsub("N([.ˈ]?[pb])", "m%1") tsub("N([.ˈ]?[ɕʑ])", "ɲ%1") tsub("N([.ˈ]?[td]_[ɕʑ])", "ɲ%1") tsub("N([.ˈ]?[tdsz])", "n%1") tsub("N([.ˈ]?[wl])", "%1") tsub("ɛN$", "ɛ") tsub("N", "w̃") end

-- Hyphen separator, e.g. to prevent palatisation of . tsub("-", "")

tsub("_", OVERTIE) tsub("I", "j") tsub("U", "w")

-- stress local function add_stress(a) local s = "" for _ = 0, a do s = s .. "[^.]+%."		end local r = rsub(txt, ("%%.(%s[^.]+)$"):format(s), "ˈ%1") if not rfind(r, "ˈ") then r = "ˈ" .. r		end return (r:gsub("%.", "")) end

local should_stress = not (unstressed or txt:find("ˈ")) local prons = should_stress and add_stress(ante) or txt

if is_prep then prons = prons .. "$"	end

if lang == "pl" then if should_stress and ante > 0 and colloquial then local thing = add_stress(0) if thing ~= prons then prons = { prons, thing } end end elseif lang == "mpl" then if tfind("[RS]") then local mp_early = prons:gsub("[RS]", "r̝") local mp_late = prons:gsub("R", "ʐ"):gsub("S", "ʂ") if period == "early" then prons = mp_early elseif period == "late" then prons = mp_late elseif not period then prons = { mp_early, mp_late, }			else error(("'%s' is not a supported Middle Polish period, try with 'early' or 'late'."):format(period)) end end elseif lang == "szl" then if tfind("O") then prons = { prons:gsub("O", "ɔ"), prons:gsub("O", "ɔw"), prons:gsub("O", "ɛw"), }		end elseif lang == "slv" then if tfind("E") then local V = "aɛeɔ̃oɵəEɪiʉuyã" prons = prons:gsub("ˈ([^" .. V .. "]*)E", "ˈ%1i̯ɛ") prons = prons:gsub("E$", "ə") prons = prons:gsub("E", "ɛ") end end

if do_hyph then return prons, hyph else return prons end

end

-- TODO: This might slow things down if used too much? local function table_insert_if_absent(t, s)	for _, v in ipairs(t) do		if v == s then return end end table.insert(t, s) end

-- Returns rhyme from a transcription. local function do_rhyme(pron, lang) local V = ({ pl = "aɛiɔuɨ", szl = "aãɛiɔɔ̃ouɪ", csb = "aãɛeɜiɔoõɞu", slv = "aãɛəei̯iɔoõɵuʉyɪăĭŏŭŭùy̆ā", mas = "aɒæɛeiɔou"})[lang] local num_syl = select(2, rsubn(pron, ("[%s]"):format(V), "")) return { rhyme = rsub(rsub(rsub(pron, "^.*ˈ", ""), ("^[^%s]-([%s])"):format(V, V), "%1"), "%.", ""), num_syl = num_syl } end

--	Handles a single input, returning a table of transcriptions. Returns also a string of	hyphenation and a table of rhymes if it is a single-word term. -- local function multiword(term, lang, period) if term:find("^%[.+%]$") then return { phonetic = term } elseif term:find(" ") then

-- TODO: repeated function lg(s) return s[lang] or s[1] end

local prepositions = lg { {				"beze?", "na", "dla", "do", "ku", "nade?", "o", "ode?", "po", "pode?", "przede?", "przeze?", "przy", "spode?", "u", "we?", "z[ae]?", "znade?", "zza", }, szl = { "bezy?", "na", "dlŏ", "d[oō]", "ku", "nady?", "ô", "ôdy?", "po", "pody?", "przedy?", "przezy?", "przi", "spody?", "u", "w[ey]?", "z[aey]?", "[śs]", "znady?" }, csb = { "beze?", "na", "dlô", "do", "kù", "nade?", "ò", "òde?", "pò", "pòde?", "przede?", "przeze?", "przë", "spòde?", "ù", "we?", "wew", "z[ae]?", "zez", "zeza", "zó", "znade?" }, slv= { "dlo", "dô", "na", "nade?", "przêde?", "przêze?", "przë", "pô", "pôde?", "sê?", "vô", "we?", "wôde?", "wù", "za" }, mas= { "dlá", "do", "ku", "na", "nade?", "po", "pode?", "ponade?", "poza", "prżede?", "prżeze", "prżi", "we?", "ze?", "za", "ô", "ôde?", "û", "beze?" }		}

local p		local contains_preps = false

for word in term:gmatch("[^ ]+") do			local is_prep = false for _, prep in ipairs(prepositions) do				if (rfind(word, ("^%s$"):format(prep))) then is_prep = true contains_preps = true break end end local v = phonemic(word, false, lang, is_prep, period) local sep = "%s %s" if p == nil then p = v			elseif type(p) == "string" then if type(v) == "string" then p = sep:format(p, v)				else p = { sep:format(p, v[1]), sep:format(p, v[2]) } end else if type(v) == "string" then p = { sep:format(p[1], v), sep:format(p[2], v) } else p = { sep:format(p[1], v[1]), sep:format(p[2], v[2]) } end end end

local function assimilate_preps(str) local function assim(from, to, before) str = rsub(str, ("%s(%%$ ˈ?[%s])"):format(from, before), to.."%1") end local T = "ptsʂɕkx" assim("d", "t", T)			assim("v", "f", T)			assim("z", "s", T)			if lang == "szl" then local D = "bdzʐʑɡ" assim("s", "z", D)				assim("ɕ", "ʑ", D)			end return rsub(str, "%$", "") end

if contains_preps then if type(p) == "string" then p = assimilate_preps(p) else p[1] = assimilate_preps(p[1]) p[2] = assimilate_preps(p[2]) end end

return p

else return phonemic(term, lang ~= "mpl", lang, false, period) end

end

-- This handles all the magic characters <*>, <^>, <+>, <.>, <#>. local function normalise_input(term, title)

local function check_af(str, af, reg, repl, err_msg) reg = reg:format(af) if not rfind(str, reg) then error(("the word does not %s with %s!"):format(err_msg, af)) end return str:gsub(reg, repl) end

local function check_pref(str, pref) return check_af(str, pref, "^(%s)", "%1.", "start") end local function check_suf(str, suf) return check_af(str, suf, "(%s)$", ".%1", "end") end

if term == "#" then -- The diesis stands simply for. return title elseif (term == "+") or term:find("^%^+$") or (term == "*") then -- Inputs that are just '+', '*', '^', '^^', etc. are treated as -- if they contained the title with those symbols preceding it. return term .. title -- Handle syntax like , <.ka> and . This allows to not respell -- the entire word when all is needed is to specify syllabification of a prefix -- and/or a suffix. elseif term:find(".+%.$") then return check_pref(title, term:sub(1, -2)) elseif term:find("^%..+") then return check_suf(title, term:sub(2)) elseif term:find(".+%.%..+") then return check_suf(check_pref(title, term:gsub("%.%..+", "")), term:gsub(".+%.%.", "")) end

return term

end

local function sort_things(lang, title, args_terms, args_quals, args_refs, args_period)

local pron_list, hyph_list, rhyme_list, do_hyph = { {}, {}, {} }, { }, { {}, {}, {} }, false

for index, term in ipairs(args_terms) do		term = normalise_input(term, title) local pron, hyph = multiword(term, lang, args_period) local qualifiers = {} if args_quals[index] then for qual in args_quals[index]:gmatch("[^;]+") do				table.insert(qualifiers, qual) end end local function new_pron(p, additional, dont_refs) local ret = { pron = ("/%s/"):format(p), qualifiers = qualifiers, refs = not dont_refs and {args_refs[index]}, }			if additional then local new_qualifiers = {} for _, v in ipairs(qualifiers) do					table.insert(new_qualifiers, v)				end table.insert(new_qualifiers, additional) ret.qualifiers = new_qualifiers end return ret end local should_rhyme = lang ~= "mpl" if type(pron) == "string" then table.insert(pron_list[1], new_pron(pron)) if should_rhyme then table.insert(rhyme_list[1], do_rhyme(pron, lang)) end elseif pron.phonetic then table.insert(pron_list[1], {				pron = pron.phonetic,				qualifiers = qualifiers,				refs = {args_refs[index]},			}) else local double_trancript = ({				pl = { "prescribed", "casual" },				mpl = { "16th c.", "17th–18th c." },				szl = { nil, "Western", "Głogówek"},			})[lang] table.insert(pron_list[2], new_pron(pron[1], double_trancript[1])) table.insert(pron_list[3], new_pron(pron[2], double_trancript[2], true)) if should_rhyme then table.insert(rhyme_list[2], do_rhyme(pron[1], lang)) table.insert(rhyme_list[3], do_rhyme(pron[2], lang)) end end if hyph then do_hyph = true if hyph:gsub("%.", "") == title then table_insert_if_absent(hyph_list, hyph) end end end

-- TODO: looks rather slow. local function merge_subtables(t) local r = {} if #t[2] + #t[3] == 0 then return t[1] end for _, subtable in ipairs(t) do			for _, value in ipairs(subtable) do				table.insert(r, value) end end return r	end

pron_list = merge_subtables(pron_list) rhyme_list = merge_subtables(rhyme_list)

return pron_list, hyph_list, rhyme_list, do_hyph end

function export.mpl_IPA(frame) local args = require("Module:parameters").process(frame:getParent.args, {

[1] = { list = true }, ["qual"] = { list = true, allow_holes = true }, ["q"] = { list = true, allow_holes = true, alias_of = "qual" }, ["period"] = {}, ["ref"] = { list = true, allow_holes = true },

["title"] = {}, -- for debugging or demonstration only

})

local terms = args[1]

if #terms == 0 then terms = { "#" } end

local lang = require("Module:languages").getByCode("pl") return ("* %s %s"):format(require("Module:accent qualifier").format_qualifiers(lang, {"Middle Polish"}),		require("Module:IPA").format_IPA_full {			lang = lang,			items = (sort_things(				"mpl",				args.title or mw.title.getCurrentTitle.text,				terms,				args.qual,				args.ref,				args.period			)),		}	)

end

function export.IPA(frame)

local arg_lang = frame.args.lang

local process_args = {

[1] = { list = true },

["qual"] = { list = true, allow_holes = true }, ["q"] = { list = true, allow_holes = true, alias_of = "qual" }, ["hyphs"] = {}, ["h"] = { alias_of = "hyphs" }, ["rhymes"] = {}, ["r"] = { alias_of = "rhymes" }, ["audios"] = {}, ["a"] = { alias_of = "audios" }, ["homophones"] = {}, ["hh"] = { alias_of = "homophones" }, ["ref"] = { list = true, allow_holes = true },

["title"] = {}, -- for debugging or demonstration only

}

if arg_lang == "pl" then process_args["mp"] = { list = true } process_args["mp_qual"] = { list = true, allow_holes = true } process_args["mp_q"] = { list = true, allow_holes = true, alias_of = "mp_qual" } process_args["mp_period"] = {} process_args["mp_ref"] = { list = true, allow_holes = true } end

local args = require("Module:parameters").process(frame:getParent.args, process_args)

local lang = require("Module:languages").getByCode((arg_lang == "slv" or arg_lang == "mas") and ("zlw-" .. arg_lang) or arg_lang)

local terms = args[1] local title = args.title or mw.title.getCurrentTitle.text

if #terms == 0 then terms = { "#" } end

local pron_list, hyph_list, rhyme_list, do_hyph = sort_things(arg_lang, title, terms, args.qual, args.ref)

local mp_prons

if arg_lang == "pl" then if #args.mp > 0 then mp_prons = (sort_things("mpl", title, args.mp, args.mp_qual, args.mp_ref, args.mp_period)) end end

if args.hyphs then if args.hyphs == "-" then do_hyph = false else hyph_list = {} for v in args.hyphs:gmatch("[^;]+") do				table.insert(hyph_list, v)			end do_hyph = true end end

if args.rhymes then if args.rhymes == "-" then rhyme_list = {} elseif args.rhymes ~= "+" then rhyme_list = {} for v in args.rhymes:gmatch("[^;]+") do				if rfind(v, ".+/.+") then table.insert(rhyme_list, {						rhyme = rsub(v, "/.+", ""),						num_syl = tonumber(rsub(v, ".+/", "")),					}) else error(("The manual rhyme %s did not specify syllable number as RHYME/NUM_SYL."):format(v)) end end end end

for ooi, oov in ipairs(rhyme_list) do		oov.num_syl = { oov.num_syl } for coi = ooi + 1, #rhyme_list do			local cov = rhyme_list[coi] if oov.rhyme == cov.rhyme then local add_ns = true for _, onv in ipairs(oov.num_syl) do					if cov.num_syl == onv then add_ns = false break end end if add_ns then table.insert(oov.num_syl, cov.num_syl) end table.remove(rhyme_list, coi) end end end

local m_IPA_format = require("Module:IPA").format_IPA_full local ret = "*" .. m_IPA_format { lang = lang, items = pron_list }

if mp_prons then ret = ("%s\n*%s %s"):format(ret,			require("Module:accent qualifier").format_qualifiers(lang, { "Middle Polish" }),			m_IPA_format { lang = lang, items = mp_prons }		) end

if args.audios then for v in args.audios:gmatch("[^;]+") do -- TODO: can I expand a template or is it a bad thing to do? ret = ("%s\n*%s"):format(ret, require("Module:audio").format_audio {				lang = lang,				file = v:gsub("#", title),			}) end end

if #rhyme_list > 0 then ret = ("%s\n*%s"):format(ret, require("Module:rhymes").format_rhymes({ lang = lang, rhymes = rhyme_list })) end

if do_hyph then ret = ret .. "\n*" if #hyph_list > 0 then local hyphs = {} for hyph_i, hyph_v in ipairs(hyph_list) do				hyphs[hyph_i] = { hyph = {} } for syl_v in hyph_v:gmatch("[^.]+") do					table.insert(hyphs[hyph_i].hyph, syl_v) end end ret = ret..require("Module:hyphenation").format_hyphenations { lang = lang, hyphs = hyphs, caption = "Syllabification" }		else ret = ret.."Syllabification: [please specify syllabification manually] " if mw.title.getCurrentTitle.nsText == "" then ret = ("%s"):format(ret, arg_lang) end end end

if args.homophones then local homophone_list = {} for v in args.homophones:gmatch("[^;]+") do			if v:find("<.->$") then table.insert(homophone_list, {					term = v:gsub("<.->$", ""),					qualifiers = { (v:gsub(".+<(.-)>$", "%1")) },				}) else table.insert(homophone_list, { term = v }) end end ret = ("%s\n*%s"):format(ret, require("Module:homophones").format_homophones {			lang = lang,			homophones = homophone_list,		}) end

return ret end

return export