Module:fi-pronunciation

local export = {}

local m_IPA = require("Module:IPA") local m_fi_IPA = require("Module:fi-IPA") -- <= the module you want to edit if the IPA transcription is wrong local m_hyph = require("Module:fi-hyphenation") -- <= the module you want to edit if the automatic hyphenation is wrong

local bit32 = require("bit32")

local langcode = "fi" local lang = require("Module:languages").getByCode(langcode)

local vowels = "aeiouyåäö" local vowel = "[" .. vowels .. "]" local consonants = "bcdfghjklmnpqrstvwxzšžʔ" local consonant = "[" .. consonants .. "]" local apostrophe = "'" local tertiary = m_fi_IPA.tertiary local ipa_symb = "ˣˈˌ"..tertiary.."̯̝̞̠̪" -- include ˣ because final gemination does not affect rhymes

local function cleanup_title(x) return mw.ustring.lower(mw.ustring.gsub(x, "–", "-")) end

local function cartesian_make(parts, n)	local result = parts[1][1] local k = 1 for i = 2, #parts do		if bit32.band(n, k) > 0 then result = result .. parts[i - 1][3] .. parts[i][1] else result = result .. parts[i - 1][2] .. parts[i][1] end k = bit32.lshift(k, 1) end return result end

local function cartesian_combine(parts) local n = bit32.lshift(1, #parts - 1) local results = {} for i = 0, n - 1 do		table.insert(results, cartesian_make(parts, i)) end return results end

local function split_by_optional_break(word) local parts = {} local i = 1 local found = false

while true do		local j, je = mw.ustring.find(word, "%([.-]%)", i)		if j == nil then break end found = true table.insert(parts, { mw.ustring.sub(word, i, j - 1), "", mw.ustring.sub(word, j + 1, j + 1) }) i = je + 1 end

if not found then return { word } end

table.insert(parts, { mw.ustring.sub(word, i), "", "" })

return cartesian_combine(parts) end

local function get_autohyphenate_forms(word, title) word = mw.ustring.gsub(word, "%([*ˣ:ː]%)", "") word = mw.ustring.gsub(word, "(.)ː", "%1%1") word = mw.ustring.gsub(word, "[" .. ipa_symb .. "ˣ*]", "") word = mw.ustring.gsub(word, "[/+]", "-") word = mw.ustring.gsub(word, "^-", "") word = mw.ustring.gsub(word, "ŋn", "gn")

if mw.ustring.lower(title) == title then word = mw.ustring.lower(word) else -- find letters in title local letters = {} for letter in mw.ustring.gmatch(title, "%a") do			table.insert(letters, letter) end

local respelled = "" local letter_index = 1

for character in mw.ustring.gmatch(word, ".") do			if mw.ustring.match(character, "%a") then local next_letter = letters[letter_index] if mw.ustring.lower(next_letter) == mw.ustring.lower(character) then respelled = respelled .. next_letter letter_index = letter_index + 1 else respelled = respelled .. character end else respelled = respelled .. character end end

word = respelled end

return split_by_optional_break(word) end

-- applies gemination mid-word for rhymes local function apply_gemination(word) word = mw.ustring.gsub(word, "[*ˣ](" .. vowel .. ")", "ʔ%1") word = mw.ustring.gsub(word, "[*ˣ](" .. consonant .. ")", "%1ː") return word end

local function get_autorhyme_forms(word) word = mw.ustring.lower(word) word = mw.ustring.gsub(word, "%([*ˣ:ː]%)", "") word = apply_gemination(word) word = mw.ustring.gsub(word, "(.)ː", "%1%1") word = mw.ustring.gsub(word, "[" .. ipa_symb .. "]", "") word = mw.ustring.gsub(word, "[/+]", "-") return split_by_optional_break(word) end

function export.generate_rhyme(word) -- convert syllable weight to hyphen for next routine -- (just in case these are included manually... even if they shouldn't be) local fmtword = mw.ustring.gsub(word, "[ˈˌ"..tertiary.."]", "-") fmtword = mw.ustring.gsub(word, "'", ".") local sylcount = #m_hyph.generate_hyphenation(fmtword, ".") -- get final part of a compound word local last_hyph = mw.ustring.find(fmtword, "%-[^%-]*$") or 0 local last_part = mw.ustring.sub(fmtword, last_hyph + 1) -- split to syllables, keep. in case we have a syllable break local hyph = m_hyph.generate_hyphenation(last_part, ".") local last_index = #hyph local last_stressed = 1 local prev_stress = false -- find last stressed syllable for index, syllable in ipairs(hyph) do		local stressed = false if index == 1 then stressed = true elseif not prev_stress and index < last_index then -- shift stress if current syllable light and a heavy syllable occurs later stressed = index == last_index - 1 or not m_fi_IPA.is_light_syllable(syllable) or not m_fi_IPA.has_later_heavy_syllable(hyph, index + 1) end if stressed then last_stressed = index end prev_stress = stressed end local res = {} for i = last_stressed, #hyph, 1 do 		table.insert(res, hyph[i]) end res = table.concat(res) -- remove initial consonants, convert to IPA, remove IPA symbols res = mw.ustring.gsub(res, "^%.", "") res = mw.ustring.gsub(res, "^" .. consonant .. "+", "") res = m_fi_IPA.IPA_wordparts(res, false) res = mw.ustring.gsub(res, "[" .. ipa_symb .. "]", "") res = mw.ustring.gsub(res, "^%.", "") return res, sylcount end

local function pron_equal(title, pron) if not pron or pron == "" then return true end -- handle slashes and pluses as hyphens pron = mw.ustring.gsub(pron, "[/+]", "-") -- remove optional lengthening/shortening/syllable break/gemination, should not cause any issues pron = mw.ustring.gsub(pron, "%([*ˣ.:ː-]%)", "") -- remove gemination asterisks and syllable separating dots pron = mw.ustring.gsub(pron, "*", "") pron = mw.ustring.gsub(pron, "%.", "") -- map existing glottal stops to apostrophes pron = mw.ustring.gsub(pron, "%(?ʔ%)?", apostrophe) -- /ŋn/ for /gn/ is fine pron = mw.ustring.gsub(pron, "ŋn", "gn") -- remove hyphens but also apostrophes right after hyphens -- (so that glottal stop is allowed after hyphen separating two same vowels) pron = mw.ustring.gsub(pron, "-" .. apostrophe .. "?", "") title = mw.ustring.gsub(cleanup_title(title), "-", "") return pron == mw.ustring.lower(title) end

local function pron_equal_special_cases(title) -- very common exception - support it	return mw.ustring.gsub(title, "ruoan", "ruuan") end

function export.show(frame) local title = mw.title.getCurrentTitle.text local pronunciation = { "" } local ipa = { nil } local rhymes = { nil } local hyphenation = { nil } local audio = { } local qualifiers = { } local hyphlabels = { } local rhymlabels = { } local homophones = { } local homophonelabels = { } local nohyphen = false local norhymes = false local csuffix = false local categories = { } if type(frame) == "table" then local params = { [1] = { list = true, default = "", allow_holes = true }, ["ipa"] = { list = true, default = nil, allow_holes = true }, ["h"] = { list = true, default = nil, allow_holes = true }, ["hyphen"] = {}, ["r"] = { list = true, default = nil, allow_holes = true }, ["rhymes"] = {}, ["a"] = { list = true, default = nil }, ["audio"] = {}, ["ac"] = { list = true, default = nil }, ["caption"] = {}, ["hh"] = { default = "" }, ["homophones"] = {}, ["q"] = { list = true, default = nil, allow_holes = true }, ["hp"] = { list = true, default = nil, allow_holes = true }, ["rp"] = { list = true, default = nil, allow_holes = true }, ["hhp"] = { list = true, default = nil, allow_holes = true }, ["nohyphen"] = { type = "boolean", default = false }, ["norhymes"] = { type = "boolean", default = false }, ["csuffix"] = { type = "boolean", default = false }, ["title"] = {}, -- for debugging or demonstration only }		local args, further = require("Module:parameters").process(frame:getParent.args, params, true) title = args["title"] or title pronunciation = args[1] ipa = args["ipa"] hyphenation = args["h"] rhymes = args["r"] qualifiers = args["q"] hyphlabels = args["hp"] rhymlabels = args["rp"] nohyphen = args["nohyphen"] norhymes = args["norhymes"] csuffix = args["csuffix"] homophones = mw.text.split(args["hh"], ",") homophonelabels = args["hhp"] -- hacks if args[2] and args[1] == nil then args[1] = "" end if ipa[2] and ipa[1] == nil then ipa[1] = "" end if #homophones == 1 and homophones[1] == "" then homophones = {} end if args["hyphen"] then hyphenation[1] = args["hyphen"] end if args["rhymes"] then rhymes[1] = args["rhymes"] end if args["homophones"] then homophones = mw.text.split(args["homophones"], ",") end local audios = args["a"] local captions = args["ac"] if args["audio"] then audios[1] = args["audio"] end if args["captions"] then captions[1] = args["caption"] end for i, audiofile in ipairs(audios) do			if audiofile then table.insert(audio, {lang = lang, file = audiofile, caption = captions[i]}) end end end

for i, p in ipairs(pronunciation) do		if p == "" or p == "^" then pronunciation[i] = cleanup_title(title) elseif p == "*" or p == "(*)" then pronunciation[i] = cleanup_title(title) .. p		elseif mw.ustring.find(p, "[!#]") then p = mw.ustring.gsub(p, "t!s", "ts") p = mw.ustring.gsub(p, "t#s", "ts") pronunciation[i] = p		end end -- make sure #pronunciation >= #IPA for i, p in ipairs(ipa) do		if not pronunciation[i] then pronunciation[i] = "" end end local manual_hr = false local ripa = {} local model_pronunciation = pronunciation[1] local autohyph = false local autorhyme = false

-- preprocessing local i = 1 while i <= #pronunciation do		if mw.ustring.find(pronunciation[i], "%", 1, true) then local original = pronunciation[i] local short = mw.ustring.gsub(original, "%%", "") local long = mw.ustring.gsub(original, "(.)%%", "%1%1") pronunciation[i] = short if model_pronunciation == original then model_pronunciation = long end i = i + 1 table.insert(pronunciation, i, long) end i = i + 1 end local has_spaces = mw.ustring.match(title, " ") or (pronunciation[1] and mw.ustring.match(pronunciation[1], " ")) local is_suffix = mw.ustring.match(title, "^-") local is_prefix_or_suffix = not csuffix and (mw.ustring.match(title, "-$") or is_suffix) for i, p in ipairs(pronunciation) do		local qual = qualifiers[i] or "" if #qual > 0 then qual = " " .. require("Module:qualifier").format_qualifier(qualifiers[i]) end if ipa[i] and ipa[i] ~= "" then table.insert(ripa, "* " .. m_IPA.format_IPA_full {				lang = lang,				items = ,				no_count = has_spaces,			} .. qual) manual_hr = true else if mw.ustring.find(p, ":") then p = mw.ustring.gsub(p, ":", "ː") end if mw.ustring.find(p, "%+") then p = mw.ustring.gsub(p, "%+", "-") end local IPA_narrow = m_fi_IPA.IPA_wordparts(p, true) local IPA = m_fi_IPA.IPA_wordparts(p, false) -- multi-word stress if has_spaces then IPA_narrow = mw.ustring.gsub(IPA_narrow, " ([^ˈˌ"..tertiary.."])", " ˈ%1") IPA = mw.ustring.gsub(IPA, " ([^ˈˌ"..tertiary.."])", " ˈ%1") end -- remove initial stress if suffix if is_suffix then if csuffix then IPA_narrow = mw.ustring.gsub(IPA_narrow, "^(%-?)ˈ", "%1ˌ") IPA = mw.ustring.gsub(IPA, "^(%-?)ˈ", "%1ˌ") else IPA_narrow = mw.ustring.gsub(IPA_narrow, "^(%-?)ˈ", "%1") IPA = mw.ustring.gsub(IPA, "^(%-?)ˈ", "%1") end end table.insert(ripa, "* " .. m_IPA.format_IPA_full {				lang = lang,				items = {{pron = "/" .. IPA .. "/"}, {pron = "[" .. IPA_narrow .. "]"}},				no_count = has_spaces,			} .. qual) end end local results = mw.clone(ripa) manual_hr = manual_hr or has_spaces or is_prefix_or_suffix or not (pron_equal(title, mw.ustring.lower(model_pronunciation)) or pron_equal(pron_equal_special_cases(title), mw.ustring.lower(model_pronunciation))) if not hyphenation[1] and not manual_hr then autohyph = true local forms = get_autohyphenate_forms(model_pronunciation, title) local seenhyphs = {} local i = 1 for _, form in ipairs(forms) do			if hyphenation[i] then break end local genhyph = m_hyph.generate_hyphenation(form, false) local genhyphj = table.concat(genhyph, "\n") if not seenhyphs[genhyphj] then hyphenation[i] = genhyph seenhyphs[genhyphj] = true i = i + 1 end end elseif #hyphenation == 1 and hyphenation[1] == "-" then hyphenation = {} end

if not rhymes[1] and not manual_hr then autorhyme = true local forms = get_autorhyme_forms(model_pronunciation) for i, form in ipairs(forms) do			if rhymes[i] then break end rhymes[i] = { export.generate_rhyme(form) } end elseif #rhymes == 1 and rhymes[1] == "-" then rhymes = {} end

if not has_spaces and not is_prefix_or_suffix and not (hyphenation[1] and rhymes[1]) then table.insert(categories, "fi-pronunciation missing hyphenation or rhymes") end for i, h in ipairs(hyphenation) do		if type(h) == "string" then hyphenation[i] = mw.text.split(h, '[' .. m_hyph.sep_symbols .. ']') end end for i, a in ipairs(audio) do table.insert(results, "* " .. require("Module:audio").format_audio(a)) end if not norhymes then if #rhymes > 0 then -- merge rhymes if they have identical labels local last_label = false local new_rhymes = {} local new_labels = {} local current_list = {} for i, r in ipairs(rhymes) do				local label = rhymlabels[i] if last_label == label then table.insert(current_list, r)				else if #current_list > 0 then table.insert(new_rhymes, current_list) end if last_label ~= false then table.insert(new_labels, last_label) end current_list = { r } last_label = label end end table.insert(new_rhymes, current_list) table.insert(new_labels, last_label) rhymes = new_rhymes rhymlabels = new_labels end for i, r in ipairs(rhymes) do			local label = "" if rhymlabels[i] then label = " " .. require("Module:qualifier").format_qualifier(rhymlabels[i]) end if #r >= 1 then local sylcounts = nil local rhymeobjs = {} local rhymesseen = {} local explicitsylcounts = true for _, rhyme in ipairs(r) do					if type(rhyme) == "table" then local rhymeis, sylcount = unpack(rhyme) local rhymeobj = rhymesseen[rhymeis] if not rhymeobj then local newrhyme = { rhyme = rhymeis, num_syl = {sylcount} } table.insert(rhymeobjs, newrhyme) rhymesseen[rhymeis] = { [sylcount] = true, object = newrhyme } elseif not rhymeobj[sylcount] then table.insert(rhymeobj.object.num_syl, sylcount) rhymeobj[sylcount] = true end else explicitsylcounts = false if not rhymesseen[rhyme] then local newrhyme = { rhyme = rhyme } table.insert(rhymeobjs, newrhyme) rhymesseen[rhyme] = { object = newrhyme } end end end if not explicitsylcounts then sylcounts = {} local sylkeys = {} -- get all possible syllable counts from syllabifications for i, h in ipairs(hyphenation) do						local hl = #h if hl > 0 and not sylkeys[hl] then table.insert(sylcounts, hl) sylkeys[hl] = true end end end table.insert(results, "* " .. require("Module:rhymes").format_rhymes( { lang = lang, rhymes = rhymeobjs, num_syl = sylcounts }) .. label) end end end if #homophones > 0 then local homophonedata = {} for i, h in ipairs(homophones) do			table.insert(homophonedata, { ["term"] = h, ["qualifiers"] = homophonelabels[i] and { homophonelabels[i] } or nil }) end table.insert(results, "* " .. require("Module:homophones").format_homophones( { lang = lang, homophones = homophonedata })) end if not nohyphen and #hyphenation > 0 then local hyphs = {} for i, h in ipairs(hyphenation) do			table.insert(hyphs, { ["hyph"] = h, ["qualifiers"] = hyphlabels[i] and { hyphlabels[i] } or nil }) end table.insert(results, "* " .. require("Module:hyphenation").format_hyphenations( { lang = lang, hyphs = hyphs, caption = "Syllabification(key)" })) end return table.concat(results, "\n") .. require("Module:utilities").format_categories(categories, lang) end

return export