Module:izh-pronunciation

local export = {} local m_izh = require("Module:izh") local m_IPA = require("Module:IPA") local gsub_lookahead = require("Module:gsub lookahead")

local lang = m_izh.lang local U = mw.ustring.char

--- <<< DATA START >>> ---

local LONG = "ː" local SEMILONG = "ˑ" local STRESS_PRIMARY = "ˈ" local STRESS_SECONDARY = "ˌ" local FRONTAL = U(0x0308) local NONSYLLABIC = U(0x032F) local TIE = U(0x0361) local VERYSHORT = U(0x0306) local PALATAL = "ʲ" local IPA_VOWELS = "ɑeiouyæøɨə" local AUTO_STRESS = U(0xEEEE) local VIRTUAL_BREAK = U(0xEEEC) local J_PALATALIZE = U(0xEEEA) local REALLY_JUST_PALATAL = U(0xEEE8) local VIRTUAL_BREAK_UNGEMINATE = U(0xEEE6)

local IPA_CONSONANTS = m_izh.consonants .. "ɫʃʒ" local IPA_CONSONANTS_GEMINATABLE = m_izh.consonants_geminatable .. "ɫ" .. "z" .. "ž"

local PALATALIZE = "'" local UNGEMINATE = "/" local ANY_DIACRITICS = "[" .. U(0x0300) .. "-" .. U(0x036F) .. "]*" local SOME_DIACRITICS = "[" .. U(0x0300) .. "-" .. U(0x036F) .. "]+"

--- <<< DATA END >>> ---

--- <<< COMMON START >>> ---

local function split_syllables(word, keep_sep_symbols) local consonant = "[" .. IPA_CONSONANTS .. "]"	local consonant_diacritic = "[" .. U(0x030A) .. U(0x0325) .. "]"	local vowel = m_izh.vowel local consonants_geminatable = IPA_CONSONANTS_GEMINATABLE local sep_symbols = m_izh.sep_symbols .. VIRTUAL_BREAK local vowel_sequences = m_izh.vowel_sequences

local res = {} local syllable = "" local pos = 1 local found_vowel = false

while pos <= #word do if mw.ustring.find(mw.ustring.lower(word), "^" .. consonant .. consonant_diacritic .. "*[" .. PALATALIZE .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. J_PALATALIZE .. PALATAL .. "]*" .. vowel, pos) then -- CV: end current syllable if we have found a vowel if found_vowel then if #syllable > 0 then table.insert(res, syllable) end found_vowel = false syllable = "" end syllable = syllable .. mw.ustring.sub(word, pos, pos) pos = pos + 1 elseif mw.ustring.find(mw.ustring.lower(word), "^" .. consonant, pos) then if mw.ustring.find(mw.ustring.lower(word), "^" .. consonant .. TIE .. consonant, pos) then -- /t͡s/ if found_vowel and #syllable > 0 then table.insert(res, syllable) end syllable = mw.ustring.sub(word, pos, pos + 2) pos = pos + 3 found_vowel = false else -- C: continue syllable = syllable .. mw.ustring.sub(word, pos, pos) pos = pos + 1 end elseif mw.ustring.find(mw.ustring.lower(word), "^" .. vowel, pos) then if found_vowel then -- already found a vowel, end current syllable if #syllable > 0 then table.insert(res, syllable) end syllable = "" end found_vowel = true

-- check for diphthongs or long vowels local seq_ok = false local search_from = mw.ustring.gsub(mw.ustring.lower(mw.ustring.sub(word, pos)), "[" .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. "]", "") for k, v in pairs(vowel_sequences) do if mw.ustring.find(search_from, "^" .. v) then seq_ok = true break end end

if seq_ok then local total = mw.ustring.len(select(3, mw.ustring.find(mw.ustring.lower(word), "^(" .. vowel .. "[" .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. "]*" .. vowel .. ")", pos))) syllable = syllable .. mw.ustring.sub(word, pos, pos + total - 1) pos = pos + total else syllable = syllable .. mw.ustring.sub(word, pos, pos) pos = pos + 1 end elseif mw.ustring.find(mw.ustring.lower(word), "^[" .. sep_symbols .. "]", pos) then -- separates syllables if #syllable > 0 then table.insert(res, syllable) end

local sepchar = mw.ustring.sub(word, pos, pos) syllable = (keep_sep_symbols == true or (type(keep_sep_symbols) == "string" and keep_sep_symbols:find(mw.ustring.sub(word, pos, pos)))) and sepchar or "" pos = pos + 1 found_vowel = false else -- ?: continue syllable = syllable .. mw.ustring.sub(word, pos, pos) pos = pos + 1 end end

if #syllable > 0 then table.insert(res, syllable) end

return res end

local function zeroth_round_of_common_replacements(text) text = mw.ustring.gsub(text, "ts", "t͡s") text = mw.ustring.gsub(text, "([" .. m_izh.vowels .. m_izh.consonants .. "])(" .. m_izh.vowel .. ")" .. UNGEMINATE .. "i", "%1%2" .. VIRTUAL_BREAK_UNGEMINATE .. "i") return text end

local function first_round_of_common_replacements(text) text = mw.ustring.gsub(text, "n[kg]", {		["nk"] = "ŋk",		["ng"] = "ŋg",	}) text = mw.ustring.gsub(text, "[aäövь’]", {		["a"] = "ɑ",		["ä"] = "æ",		["ö"] = "ø",		["v"] = "ʋ",		["ь"] = "ɨ",		["’"] = ".", --		["-"] = STRESS_SECONDARY,	})

return text end

local function second_round_of_common_replacements(text, do_palatal_repls) text = mw.ustring.gsub(text, "[cšž]", {		["c"] = "t͡ʃ",		["š"] = "ʃ",		["ž"] = "ʒ"	}) text = mw.ustring.gsub(text, "h([" .. LONG .. SEMILONG .. "])", "x%1") if do_palatal_repls then text = mw.ustring.gsub(text, "([nʃʒ])" .. PALATAL, {			["n"] = "ɲ",			["ʃ"] = "ɕ",			["ʒ"] = "ʑ"		}) end text = mw.ustring.gsub(text, "ɫ" .. PALATAL, "l" .. PALATAL) text = mw.ustring.gsub(text, "g", "ɡ") return text end

local function automatic_palatalization(text, filter) return mw.ustring.gsub(text, "(" .. filter .. ")j(.?)", function (prev, next)		if next == PALATALIZE then			return prev .. PALATAL .. next		else			return prev .. PALATAL .. LONG .. next		end	end) end

local function manual_palatalization(text) if mw.ustring.find(text, PALATALIZE) then text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "])" .. PALATALIZE, "%1" .. PALATAL) text = mw.ustring.gsub(text, PALATALIZE, "") text = mw.ustring.gsub(text, PALATAL .. PALATAL, PALATAL) end text = mw.ustring.gsub(text, "(t)([" .. STRESS_SECONDARY .. AUTO_STRESS .. ".])(t" .. PALATAL .. ")", "%1" .. PALATAL .. "%2%3") return text end

local IPA_diphthongs = { "[ɑeouyæø]i", "[ɑeio]u", "[æeiø]y" } local function long_vowels_and_diphthongs(text) text = mw.ustring.gsub(text, "([" .. IPA_VOWELS .. "])%1", "%1" .. LONG) for _, diphthong in ipairs(IPA_diphthongs) do		local mod_diphthong if mw.ustring.find(diphthong, "%]$") then mod_diphthong = mw.ustring.gsub(diphthong, "(.)(%^%-%])", "%1" .. VERYSHORT .. "?%2") mod_diphthong = mw.ustring.gsub(diphthong, "(%^%-%])(%^%-%])", "%1" .. VERYSHORT .. "?%2") else mod_diphthong = mw.ustring.sub(diphthong, 1, -2) .. VERYSHORT .. "?" .. mw.ustring.sub(diphthong, -1, -1) end text = mw.ustring.gsub(text, "(" .. mod_diphthong .. ")", "%1" .. NONSYLLABIC) end return text end

local function long_consonants(text) text = mw.ustring.gsub(text, "(%a)%1", "%1" .. LONG) text = mw.ustring.gsub(text, LONG .. PALATAL, PALATAL .. LONG) return text end

local function standard_sandhi(text) text = mw.ustring.gsub(text, "n([%s" .. AUTO_STRESS .. "-]*[mpb])", "m%1") text = mw.ustring.gsub(text, "n([%s" .. AUTO_STRESS .. "-]*[kgɡ̊])", "ŋ%1") return text end

local function add_primary_stress(text) text = mw.ustring.gsub(text, AUTO_STRESS, "-") text = mw.ustring.gsub(text, "-%.", "-") text = mw.ustring.gsub(text, "-", STRESS_SECONDARY) return mw.ustring.toNFC(STRESS_PRIMARY .. mw.ustring.gsub(text, " ", " " .. STRESS_PRIMARY)) end

local function is_stressed_syllable(syllable) return mw.ustring.find(syllable, "^[ " .. AUTO_STRESS .. "-]") end

local function add_secondary_stress(syllables) local distance = 0 for index, syllable in ipairs(syllables) do		if index == #syllables then break end local stressed = index == 1 or is_stressed_syllable(syllable) if stressed then distance = 0 else distance = distance + 1 if distance == 2 then distance = 0 if not is_stressed_syllable(syllables[index + 1]) then syllables[index] = AUTO_STRESS .. syllable end end end end end

local function clean_virtual_break(text) if mw.ustring.find(text, "[" .. VIRTUAL_BREAK .. VIRTUAL_BREAK_UNGEMINATE .. "]") then local cleaned = mw.ustring.gsub(mw.ustring.gsub(text, "[" .. AUTO_STRESS .. VIRTUAL_BREAK .. "]", ""), VIRTUAL_BREAK_UNGEMINATE, UNGEMINATE) local syllables = split_syllables(cleaned, true) add_secondary_stress(syllables) text = table.concat(syllables) end return text end

local function clean_ungeminate(text) return mw.ustring.gsub(text, "[" .. VIRTUAL_BREAK .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. "]", "") end

local function do_gemination(syllables, diacritic) local try_to_geminate = false for index, syllable in ipairs(syllables) do		local stressed = index == 1 or is_stressed_syllable(syllable) if try_to_geminate and not stressed then -- check if the initial consonant in this syllable is followed by two vowels local rest = syllable .. (syllables[index + 1] or "") if mw.ustring.find(rest, "^[" .. IPA_CONSONANTS_GEMINATABLE .. "][" .. PALATALIZE .. J_PALATALIZE .. "]*" .. m_izh.vowel .. m_izh.vowel) then -- CVCVV -> CVC:VV local cg = select(3, mw.ustring.find(syllable, "^([" .. IPA_CONSONANTS_GEMINATABLE .. "][" .. PALATALIZE .. J_PALATALIZE .. "]*)")) syllables[index - 1] = syllables[index - 1] .. cg syllables[index] = mw.ustring.gsub(syllable, "^" .. cg, diacritic) end end try_to_geminate = stressed and mw.ustring.find(syllable, "^[ " .. AUTO_STRESS .. "-]?[" .. IPA_CONSONANTS .. PALATALIZE .. J_PALATALIZE .. TIE .. "]*" .. m_izh.vowel .. "$") end end

local REDUCED = U(0x0325) .. U(0x0306) local reduce_final_vowel = { ["o"] = "o" .. REDUCED, ["ö"] = "ø" .. REDUCED, ["u"] = "u" .. REDUCED, ["y"] = "y" .. REDUCED, }

local function to_schwa(letter, filter) return (not filter or mw.ustring.find(letter, "[" .. filter .. "]")) and "ə" or letter .. VERYSHORT end

local function split_syllables_by_words(syllables) local i = 1 return function local r = {} local e = i		if e <= #syllables then table.insert(r, (mw.ustring.gsub(syllables[e], "^%s+", ""))) e = e + 1 while e <= #syllables and not mw.ustring.find(syllables[e], "^%s") do				table.insert(r, syllables[e]) e = e + 1 end i = e			return r		end end end

local function do_by_word_syllables(out_syllables, fn) local old_syllables = {} for k, v in pairs(out_syllables) do		old_syllables[k] = v		out_syllables[k] = nil end local next_word = false for syllables in split_syllables_by_words(old_syllables) do		fn(syllables) for i, syllable in ipairs(syllables) do			if next_word and i == 1 then table.insert(out_syllables, " " .. syllable) else table.insert(out_syllables, syllable) end end next_word = true end end

local function begins_with_affricate(syllable) return syllable and mw.ustring.find(syllable, "^[" .. IPA_CONSONANTS .. "]" .. TIE) end

local function do_reduction_internal(syllables, replacement) local prev_was_stressed = false local prev_was_long = false local syllables_since_last_stressed = 0 for index, syllable in ipairs(syllables) do		local stressed = index == 1 or is_stressed_syllable(syllable) local final = index == #syllables if stressed then syllables_since_last_stressed = 0 else syllables_since_last_stressed = syllables_since_last_stressed + 1 end prev_was_long = prev_was_long or begins_with_affricate(syllable)

if mw.ustring.find(syllable, "^j'") and prev_was_long then -- hack. /Cj'/ is one consonant. local previous_syllable = syllables[index - 1] if mw.ustring.find(previous_syllable, m_izh.vowel .. "[" .. IPA_CONSONANTS .. "]") then prev_was_long = mw.ustring.find(previous_syllable, m_izh.vowel .. m_izh.vowel) or mw.ustring.find(previous_syllable, m_izh.vowel .. "[" .. IPA_CONSONANTS .. "][" .. IPA_CONSONANTS .. "]") end end

if not stressed and ((prev_was_stressed and prev_was_long) or (index > 1 and final and (syllables_since_last_stressed > 1 or prev_was_long))) then syllables[index] = mw.ustring.gsub(syllable, "(" .. m_izh.vowel .. "+)(.*)", function (nucleus, coda) return replacement(nucleus, coda, index) end) end -- reduce the next syllable only if the current syllable is stressed and not short prev_was_stressed = stressed prev_was_long = mw.ustring.find(syllable, m_izh.vowel .. "[" .. IPA_CONSONANTS .. m_izh.vowels .. "]") end end

local function do_reduction_word(syllables) do_reduction_internal(syllables, function (nucleus, coda, index)		local final = index == #syllables		local never_open = false		if mw.ustring.find(nucleus, "(" .. m_izh.vowel .. ")%1") then			return mw.ustring.sub(nucleus, 1, 1) .. coda		end

if mw.ustring.find(nucleus, m_izh.vowel .. m_izh.vowel) then if mw.ustring.sub(nucleus, 2) ~= "i" then coda = mw.ustring.sub(nucleus, 2) .. coda else never_open = true end nucleus = mw.ustring.sub(nucleus, 1, 1) end

local open = #coda == 0 and not never_open if final then if open then -- reduced, but simply drop it				return (reduce_final_vowel[nucleus] or "") or coda else if coda == "" and reduce_final_vowel[nucleus] then -- /oi/, /ui/, /yi/, /øi/ return reduce_final_vowel[nucleus] end local reduced if nucleus == "e" then reduced = "e" else reduced = to_schwa(nucleus, "aä") end return reduced .. coda end else local next_syllable = syllables[index + 1] local next_syllable_starts_with_vowel = mw.ustring.find(next_syllable, "^[ -]?%.?" .. m_izh.vowel) local next_syllable_stressed = is_stressed_syllable(next_syllable) local next_syllable_open = not (mw.ustring.find(next_syllable, "[" .. IPA_CONSONANTS .. "]$") or begins_with_affricate(syllables[index + 2]))

if next_syllable_starts_with_vowel then return nucleus .. coda elseif next_syllable_stressed then return to_schwa(nucleus, "aäe") .. coda elseif next_syllable_open then return to_schwa(nucleus) .. coda else return to_schwa(nucleus, "aäe") .. coda end end end) end

local function do_coalesce_rhyme_word(syllables) local vowel = mw.ustring.match(syllables[#syllables], "^[aä]$") if mw.ustring.match(syllables[#syllables], "^([aä])$") and #syllables > 1 and not is_stressed_syllable(syllables[#syllables - 1] .. syllables[#syllables]) then local replacement local prefinal = mw.ustring.sub(syllables[#syllables - 1], -1) if vowel == "a" then replacement = ({ ["i"] = "e", ["u"] = "o", ["o"] = "o" })[prefinal] elseif vowel == "ä" then replacement = ({ ["i"] = "e", ["y"] = "ö", ["ö"] = "ö" })[prefinal] end

if replacement then syllables[#syllables - 1] = mw.ustring.gsub(mw.ustring.sub(syllables[#syllables - 1], 1, -2) .. replacement .. replacement, "^" .. AUTO_STRESS, "") syllables[#syllables] = nil end end end

local function do_reduction_rhyme_word(syllables) do_reduction_internal(syllables, function (nucleus, coda, index)		local final = index == #syllables		if mw.ustring.find(nucleus, "(" .. m_izh.vowel .. ")%1") then			return nucleus .. coda		end

if mw.ustring.find(nucleus, m_izh.vowel .. m_izh.vowel) then if mw.ustring.sub(nucleus, 2) ~= "i" then coda = mw.ustring.sub(nucleus, 2) .. coda else return nucleus .. coda end nucleus = mw.ustring.sub(nucleus, 1, 1) end

local open = #coda == 0 if final and open then -- reduced, but simply drop it			return (reduce_final_vowel[nucleus] and nucleus or "") or coda else return nucleus .. coda end end) end

local function do_final_vowel_dropping_word(syllables) if #syllables == 1 or not mw.ustring.find(table.concat(syllables, ""), "[^" .. m_izh.vowels .. "]" .. m_izh.vowel .. "$") then return end

local final = mw.ustring.sub(syllables[#syllables], -1, -1) if reduce_final_vowel[final] then return end

local reduced = {} for _, syllable in ipairs(syllables) do		table.insert(reduced, syllable) end do_reduction_word(reduced) if not mw.ustring.find(reduced[#reduced], m_izh.vowel .. "$") then local leftovers = "" if mw.ustring.find(syllables[#syllables], "i$") then leftovers = REALLY_JUST_PALATAL end syllables[#syllables - 1] = mw.ustring.gsub(syllables[#syllables - 1] .. reduced[#reduced] .. leftovers, "^" .. AUTO_STRESS, "") syllables[#syllables] = nil end end

local function do_reduction(syllables) do_by_word_syllables(syllables, do_reduction_word) end

local function do_reduction_rhyme(syllables) do_by_word_syllables(syllables, do_reduction_rhyme_word) end

local function do_coalesce_rhyme(syllables) do_by_word_syllables(syllables, do_coalesce_rhyme_word) end

local function do_final_vowel_dropping(syllables) do_by_word_syllables(syllables, do_final_vowel_dropping_word) end

local function do_narrow_l(text) -- failsafe if not mw.ustring.find(text, "l") then return text end if mw.ustring.find(text, "l" .. PALATALIZE) then return text end

local velar_l = "ɫ" local palatal_l = U(0xEEEF)

text = mw.ustring.gsub(text, "([aouäöyь])l(" .. m_izh.consonant .. ")", function (before, after)			if after == "l" or after == "j" then				return before .. "l" .. after			elseif mw.ustring.find(before, "[aouь]") then				return before .. velar_l .. after			else				return before .. palatal_l .. after			end		end)

local length = mw.ustring.len(text) local l_indexes = {} local i = 1 local env = {}

while true do		local index = mw.ustring.find(text, "l", i)		if index == nil then break end table.insert(l_indexes, index) i = index + 1 end

local env_tags = { ["a"] = "a", ["o"] = "a", ["u"] = "a", ["i"] = "i", ["j"] = "j", ["ä"] = "ä", ["ö"] = "ä", ["y"] = "ä", ["e"] = "e", [" "] = "_", ["-"] = "_", ["ь"] = "a" }

local cleaned = mw.ustring.gsub(text, "[^aeiouäöyjlь -]", "") .. " "	local env_index = 1 local current_env = "_" local current_env_before = "_" local backburner, backburner_count = {}, 0 for c in mw.ustring.gmatch(cleaned, ".") do		if c == "l" then env[env_index] = current_env_before backburner_count = backburner_count + 1 backburner[backburner_count] = env_index env_index = env_index + 1 else current_env = env_tags[c] or "_" for i = 1, backburner_count do				local back_index = backburner[i] env[back_index] = env[back_index] .. current_env end backburner_count = 0 if current_env ~= "j" then current_env_before = current_env end end end

old_text = text text = "" i = 1

local l_conv = { ["i_"] = palatal_l, ["_i"] = palatal_l, ["äi"] = palatal_l, ["ei"] = palatal_l, ["aj"] = palatal_l, ["äj"] = palatal_l, ["ij"] = palatal_l, ["ej"] = palatal_l, ["ie"] = palatal_l, ["oj"] = palatal_l, ["uj"] = palatal_l,

["a_"] = velar_l, ["_a"] = velar_l, ["aa"] = velar_l, ["ia"] = velar_l, ["ea"] = velar_l, ["ae"] = velar_l }

for env_index, l_index in ipairs(l_indexes) do text = text .. mw.ustring.sub(old_text, i, l_index - 1) .. (l_conv[env[env_index]] or "l") i = l_index + 1 end text = text .. mw.ustring.sub(old_text, i, length)

text = mw.ustring.gsub(text, palatal_l .. palatal_l, "ll" .. PALATALIZE) text = mw.ustring.gsub(text, palatal_l, "l" .. PALATALIZE) return text end

local reduce_a_diphthong = { ["e"] = "e", ["i"] = "e", ["o"] = "o", ["ö"] = "ö", ["u"] = "o", ["y"] = "ö", }

local reduce_e_diphthong = { ["u"] = "o", ["y"] = "ö", }

local function do_additional_reduction(syllables) -- /VA/ (V != A) never in the same syllable local last_stressed = 1 for i = 1, #syllables - 1 do		if i == 1 or is_stressed_syllable(syllables[i]) then last_stressed = i		else local nucleus = mw.ustring.match(syllables[i], m_izh.vowel .. "+") if i - last_stressed <= 2 and nucleus then nucleus = select(3, mw.ustring.find(nucleus, "^" .. UNGEMINATE .. "?(" .. m_izh.vowel .. ")$")) if nucleus then local next_syllable_onset, next_syllable_onset_end, consequent = mw.ustring.find(syllables[i + 1], "^" .. UNGEMINATE .. "?([aeä])") if next_syllable_onset then if mw.ustring.find(consequent, "[aä]") and reduce_a_diphthong[nucleus] then syllables[i] = mw.ustring.gsub(syllables[i], nucleus, reduce_a_diphthong[nucleus] .. reduce_a_diphthong[nucleus]) .. mw.ustring.sub(syllables[i + 1], next_syllable_onset_end + 1) syllables[i + 1] = "" elseif consequent == "e" and reduce_e_diphthong[nucleus] then syllables[i] = mw.ustring.gsub(syllables[i], nucleus, reduce_e_diphthong[nucleus] .. reduce_e_diphthong[nucleus]) .. mw.ustring.sub(syllables[i + 1], next_syllable_onset_end + 1) syllables[i + 1] = "" end end end end end end

-- remove empty syllables local i, j = 1, 1 while i <= #syllables do		if mw.ustring.len(syllables[i]) > 0 then syllables[j] = syllables[i] j = j + 1 end i = i + 1 end while j < i do		syllables[j] = nil j = j + 1 end end

local function pass_diacritics_through(map, consonant) local consonant, diacritics = mw.ustring.match(consonant, "([" .. IPA_CONSONANTS .. "])([" .. PALATAL .. "]?)")	return map[consonant] .. diacritics end

local voiced_consonants = "jlɫmnŋrvʋ" local voiced_sounds = IPA_VOWELS .. m_izh.vowels .. voiced_consonants local function do_voicing(text) text = mw.ustring.gsub(text, "[bdgzž]", { ["b"] = "p", ["d"] = "t", ["g"] = "k", ["z"] = "s", ["ž"]="š" }) local voice = { ["k"] = "g", ["p"] = "b", ["t"] = "d", ["s"] = "z", ["š"] = "ž" } local semivoice = { ["k"] = "g̊", ["p"] = "b̥", ["t"] = "d̥", ["s"] = "z̥", ["š"] = "ž̥" }

local consonants_to_voice = "[kptsš][" .. PALATAL .. "]?"	local vowel = "[" .. IPA_VOWELS .. m_izh.vowels .. "]"

-- k/p/t/s/š is semivoiced if it follows a voiced sound and is followed by a short vowel or a voiced consonant text = gsub_lookahead(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. PALATAL .. "?[" .. AUTO_STRESS .. "-]?)(" .. consonants_to_voice .. ")([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. ".?)",		function (before, consonant, after)			if mw.ustring.find(after, vowel .. ANY_DIACRITICS .. vowel) then				return before .. consonant, after			else				return before .. pass_diacritics_through(semivoice, consonant), after			end		end)

-- k/p/t/s/š is semivoiced if it follows a voiced sound and is not followed by anything text = mw.ustring.gsub(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. PALATAL .. "?[" .. AUTO_STRESS .. "-]?)(" .. consonants_to_voice .. ")$",		function (before, consonant)			return before .. pass_diacritics_through(semivoice, consonant)		end)

-- k/p/t/s/š is voiced if it follows a voiced sound and the next sound in the next word is a voiced sound -- k/p/t/s/š is semivoiced if it follows a voiced sound and the next sound in the next word is not a voiced sound text = gsub_lookahead(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. "[" .. AUTO_STRESS .. "-]?)(" .. consonants_to_voice .. ")([%s" .. AUTO_STRESS .. "-]+)(.)",		function (before, consonant, space, after)			if mw.ustring.find(after, "^[" .. voiced_sounds .. "]") then				return before .. pass_diacritics_through(voice, consonant) .. space, after			else				return before .. consonant .. space, after			end		end)

-- devoice word-initial text = mw.ustring.gsub(text, "^([bdgzž])[" .. U(0x030a) .. U(0x0325) .. "]?", { ["b"] = "p", ["d"] = "t", ["g"] = "k", ["z"] = "s", ["ž"]="š" })

return text end

local function do_alalaukaa_voicing(text) text = mw.ustring.gsub(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. LONG .. "?" .. PALATAL .. "?)s$", "%1z") text = mw.ustring.gsub(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. LONG .. "?" .. PALATAL .. "?)t$", "%1d") text = mw.ustring.gsub(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. LONG .. "?" .. PALATAL .. "?)s([%s-][ˈˌ]?[" .. voiced_sounds .. "])", "%1z%2") text = mw.ustring.gsub(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. LONG .. "?" .. PALATAL .. "?)t([%s-][ˈˌ]?[" .. voiced_sounds .. "])", "%1d%2") return text end

local function do_vowel_replacements(text, vowels_find, vowels_short, vowels_long) return gsub_lookahead(text, "([" .. vowels_find .. "])(.?)",		function (vowel, post)			if post == LONG then				return vowels_long[vowel], post			else				return vowels_short[vowel], post			end		end	) end

local function cleanup_palatal(text) text = mw.ustring.gsub(text, REALLY_JUST_PALATAL, PALATAL) text = mw.ustring.gsub(text, LONG .. PALATAL, PALATAL .. LONG) text = mw.ustring.gsub(text, PALATAL .. "+", PALATAL) text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "])([." .. STRESS_SECONDARY .. AUTO_STRESS .. "])%1" .. PALATAL, "%1" .. PALATAL .. "%2%1" .. PALATAL) return text end

--- <<< COMMON END >>> ---

--- <<< DIALECTS START >>> ---

-- narrow_level 0 = broad, 1 = rhyme, 2 = narrow

-- Ala-Laukaa local function IPA_alalaukaa(text, narrow_level) if narrow_level <= 1 then text = mw.ustring.gsub(text, "j?" .. PALATALIZE, { [PALATALIZE] = "", ["j" .. PALATALIZE] = PALATALIZE }) end text = mw.ustring.gsub(text, "([nr])h", "%1") text = mw.ustring.gsub(zeroth_round_of_common_replacements(text), VIRTUAL_BREAK_UNGEMINATE, VIRTUAL_BREAK) if narrow_level > 0 then if narrow_level > 1 then text = do_narrow_l(text) text = mw.ustring.gsub(mw.ustring.gsub(text, "l", "l" .. PALATALIZE), "l" .. PALATALIZE .. "l" .. PALATALIZE, "ll" .. PALATALIZE) text = mw.ustring.gsub(text, PALATALIZE .. PALATALIZE, PALATALIZE) text = mw.ustring.gsub(text, "l" .. PALATALIZE .. "j", "lj") text = do_alalaukaa_voicing(text) end text = mw.ustring.gsub(text, "j" .. PALATALIZE, J_PALATALIZE) local syllables = split_syllables(text, true) add_secondary_stress(syllables) if narrow_level > 1 then do_final_vowel_dropping(syllables) do_gemination(syllables, LONG) do_additional_reduction(syllables) do_reduction(syllables) elseif narrow_level == 1 then do_final_vowel_dropping(syllables) do_coalesce_rhyme(syllables) do_reduction_rhyme(syllables) end text = table.concat(syllables) text = mw.ustring.gsub(text, J_PALATALIZE, "j" .. PALATALIZE) if narrow_level > 1 then text = automatic_palatalization(text, "[ln]") -- palatalization text = mw.ustring.gsub(text, "h([kg])", "x%1") end text = clean_virtual_break(text) end text = clean_ungeminate(text) text = mw.ustring.gsub(text, "j" .. PALATALIZE, PALATALIZE) text = manual_palatalization(text) text = first_round_of_common_replacements(text) text = long_vowels_and_diphthongs(text) text = long_consonants(text) text = second_round_of_common_replacements(text, narrow_level > 1) if narrow_level > 1 then local vowels_short = { ["e"] = "e̞", ["o"] = "o̞", ["ø"] = "ø̞" } local vowels_long = { ["e"] = "e", ["o"] = "o", ["ø"] = "ø" } text = do_vowel_replacements(text, "eoø", vowels_short, vowels_long) text = mw.ustring.gsub(text, "[sz]", { ["s"] = "s̠", ["z"] = "z̠" }) text = standard_sandhi(text) text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "]" .. ANY_DIACRITICS .. PALATAL .. "?)j%f[ " .. AUTO_STRESS .. "-]", "%1i") text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "]" .. ANY_DIACRITICS .. PALATAL .. "?)j$", "%1i") end text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j$", "%1i" .. NONSYLLABIC) text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j(" .. STRESS_PRIMARY .. "?" .. STRESS_SECONDARY .. "?[" .. IPA_CONSONANTS .. "])", "%1i" .. NONSYLLABIC .. "%2") text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j ", "%1i" .. NONSYLLABIC .. " ") text = cleanup_palatal(text) return add_primary_stress(text) end

-- Soikkola local function IPA_soikkola(text, narrow_level) text = zeroth_round_of_common_replacements(text) if narrow_level > 0 then if narrow_level > 1 then text = do_narrow_l(text) text = mw.ustring.gsub(text, "h([kg])", "x%1") end text = mw.ustring.gsub(text, "j" .. PALATALIZE, J_PALATALIZE) local syllables = split_syllables(text, true) add_secondary_stress(syllables) if narrow_level > 1 then do_gemination(syllables, SEMILONG) end text = table.concat(syllables) text = mw.ustring.gsub(text, VIRTUAL_BREAK_UNGEMINATE, VIRTUAL_BREAK) text = mw.ustring.gsub(text, J_PALATALIZE, "j" .. PALATALIZE) end text = mw.ustring.gsub(text, "(.)" .. PALATALIZE,			function (preceding)				if preceding == "l" then					return preceding .. PALATALIZE				elseif preceding == "j" then					return PALATALIZE				else					return preceding				end			end) text = manual_palatalization(text) if narrow_level > 1 then text = do_voicing(text) end if narrow_level > 0 then text = clean_virtual_break(text) end text = first_round_of_common_replacements(text) text = clean_ungeminate(text) text = long_vowels_and_diphthongs(text) text = long_consonants(text) text = second_round_of_common_replacements(text, narrow_level > 1) if narrow_level > 1 then local vowels_short = { ["e"] = "e̞", ["o"] = "o̞", ["ø"] = "ø̞" } local vowels_long = { ["e"] = "e̝", ["o"] = "o̝", ["ø"] = "ø̝" } text = do_vowel_replacements(text, "eoø", vowels_short, vowels_long) text = mw.ustring.gsub(mw.ustring.gsub(text, "^s", "ʃ"), "([^" .. TIE .. "])s", "%1ʃ") text = mw.ustring.gsub(mw.ustring.gsub(text, "^z", "ʒ"), "([^" .. TIE .. "])z", "%1ʒ") text = standard_sandhi(text) end text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j$", "%1i" .. NONSYLLABIC) text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j(" .. STRESS_PRIMARY .. "?" .. STRESS_SECONDARY .. "?[" .. IPA_CONSONANTS .. "])", "%1i" .. NONSYLLABIC .. "%2") text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j ", "%1i" .. NONSYLLABIC .. " ") return add_primary_stress(text) end

-- Hevaha local function IPA_hevaha(text, narrow_level) text = IPA_soikkola(text, narrow_level) text = mw.ustring.gsub(text, "ˑ", "ː") if narrow_level > 1 then text = mw.ustring.gsub(text, "([bdʒ])" .. U(0x0325) .. "([lr])", "%1%2") text = mw.ustring.gsub(text, "ɡ" .. U(0x030A) .. "([lr])", "ɡ%1") end return text end

-- Ylä-Laukaa local function IPA_ylalaukaa(text, narrow_level) error("Ylä-Laukaa not implemented") -- TODO end

--- <<< DIALECTS END >>> ---

--- <<< INTERFACE START >>> ---

local function cleanup_IPA(ipa) return mw.ustring.gsub(ipa, "g", "ɡ") end

local function cleanup_for_hyphenate(text) local no_hyph_symbols = "[" .. PALATALIZE .. UNGEMINATE .. "]"	return mw.ustring.gsub(text, no_hyph_symbols, "") end

local function cleanup_for_hyphenate_int(text) local no_hyph_symbols = "[" .. PALATALIZE .. UNGEMINATE .. "-]"	return mw.ustring.gsub(text, no_hyph_symbols, "") end

local function cleanup_for_hyphenate_final(sp) -- allow final /oi/, /ui/, /yi/, /øi/ for ,, , <ö> return (mw.ustring.gsub(sp, "([ouyö])i$", "%1")) end

local function match_spelling_with_title_for_hyphenation(sp, title) if mw.ustring.find(sp, "i$") and not mw.ustring.find(title, "i$") then sp = mw.ustring.gsub(sp, "i$", "") end

if mw.ustring.lower(title) == title then return mw.ustring.lower(sp) else -- find letters in title local letters = {} for letter in mw.ustring.gmatch(title, "%a") do			table.insert(letters, letter) end

local respelled = "" local letter_index = 1

for character in mw.ustring.gmatch(sp, ".") do			if mw.ustring.match(character, "%a") then local next_letter = letters[letter_index] if mw.ustring.lower(next_letter) == mw.ustring.lower(character) then respelled = respelled .. next_letter letter_index = letter_index + 1 else respelled = respelled .. character end else respelled = respelled .. character end end

return respelled end end

local function hyphenate_matches(sp, title) return cleanup_for_hyphenate_final(mw.ustring.lower(mw.ustring.gsub(cleanup_for_hyphenate_int(sp), "%.", ""))) == cleanup_for_hyphenate_final(mw.ustring.lower(title)) end

local function hyphenate(text) return split_syllables(cleanup_for_hyphenate(text)) end

local function spell_long_consonants(text) return mw.ustring.gsub(text, "([" .. m_izh.consonants_geminatable .. "])" .. "(" .. PALATALIZE .. "?)" .. LONG,			function (c, p) return c == "j" and "ij" or c .. c .. p end) end

local function generate_rhyme(tuple) local text = tuple.rhyme text = mw.ustring.gsub(cleanup_IPA(text), STRESS_PRIMARY, "")

local index = mw.ustring.find(text, STRESS_SECONDARY .. "[^" .. STRESS_SECONDARY .. "]*$") if index ~= nil then text = mw.ustring.sub(text, index + 1) end

index = mw.ustring.find(text, "[" .. IPA_VOWELS .. "]") if index == nil then return nil end

return mw.ustring.sub(text, index) end

local function make_IPAs(fn, forms, variety) local p = {} for _, form in ipairs(forms) do		form = mw.ustring.lower(form) local suffix = mw.ustring.find(form, "^%-") local prefix = mw.ustring.find(form, "%-$")

if suffix then form = mw.ustring.gsub(form, "^%-", "") end if prefix then form = mw.ustring.gsub(form, "%-$", "") end

local broad = fn(form, 0) local rhyme = fn(form, 1) local narrow = fn(form, 2)

if prefix then broad = broad .. "-"			rhyme = nil narrow = narrow .. "-"		end

if suffix then broad = "-" .. mw.ustring.gsub(broad, "^" .. STRESS_PRIMARY, "") rhyme = nil narrow = "-" .. mw.ustring.gsub(narrow, "^" .. STRESS_PRIMARY, "") end

table.insert(p, { broad = broad, rhyme = rhyme, narrow = narrow }) end local result = { forms = p,		varieties = { variety } }	return result end

local function format_IPAs(tuple, title, has_spaces) local dialects = require("Module:accent qualifier").format_qualifiers(lang, tuple.varieties) local p = {} for _, form in ipairs(tuple.forms) do table.insert(p, {pron = "/" .. cleanup_IPA(form.broad) .. "/"}) table.insert(p,	{pron = "[" .. cleanup_IPA(form.narrow) .. "]"}) end return "* " .. dialects .. " " .. m_IPA.format_IPA_full { lang = lang, items = p, no_count = has_spaces } end

local function get_arg_list(param, fallback, allow_dash) if not param or #param == 0 then return fallback end if not allow_dash and #param == 1 and param[1] == "-" then return {} end return param end

local varieties = { {"A", "Ala-Laukaa", IPA_alalaukaa, false}, {"S", "Soikkola", IPA_soikkola, false}, {"H", "Hevaha", IPA_hevaha, true}, --	{"Y", "Ylä-Laukaa", IPA_ylalaukaa, true}, }

-- rhymes only for these varieties local varieties_with_rhymes = { ["Ala-Laukaa"] = true, ["Soikkola"] = true }

local function get_variety(variety_code) for _, variety in ipairs(varieties) do		if variety[1] == variety_code then return variety end end error("Unrecognized variety code: " .. variety_code) end

function export.get_variety(variety_code) return get_variety(variety_code)[2] end

local function allow_rhyme_for_varieties(varieties) for _, variety in ipairs(varieties) do		if varieties_with_rhymes[variety] then return true end end return false end

function export.generate_one(form, variety_code, transcription) local param, name, fn = unpack(get_variety(variety_code)) local result = make_IPAs(fn, {form}, name).forms[1] if transcription then result = result[transcription] end return result end

function export.generate_multiple(forms, variety_code, transcription) local param, name, fn = unpack(get_variety(variety_code)) local result = make_IPAs(fn, forms, name).forms if transcription then for i, form in ipairs(result) do			result[i] = form[transcription] end end return result end

function export.show(frame) local title = mw.title.getCurrentTitle.text local hyphenation = nil local rhymes = nil local categories = {}

local params = { [1] = { list = true },

["A"] = { list = true }, -- Ala-Laukaa ["S"] = { list = true }, -- Soikkola ["H"] = { list = true }, -- Hevaha ["Y"] = { list = true }, -- Ylä-Laukaa

["title"] = {}, -- for debugging or demonstration only }

local args = require("Module:parameters").process(frame:getParent.args, params) title = args["title"] or title

local spellings = get_arg_list(args[1], { mw.ustring.lower(title) }, true) local IPAs = {}

for _, variety in ipairs(varieties) do		local param, name, fn, optional = unpack(variety) local forms = get_arg_list(args[param], not optional and spellings or nil, true) if forms then table.insert(IPAs, make_IPAs(fn, forms, name)) end end

local results = {} local has_spaces = mw.ustring.find(title, " ")

if not hyphenation then hyphenation = {} if not has_spaces then local sp = spellings[1] if not hyphenate_matches(sp, title) then -- try to geminate local syllables = split_syllables(sp, true) do_gemination(syllables, LONG) sp = spell_long_consonants(clean_ungeminate(table.concat(syllables))) end if hyphenate_matches(sp, title) then table.insert(hyphenation, hyphenate(match_spelling_with_title_for_hyphenation(sp, title))) end end end

if not rhymes then rhymes = {} if not has_spaces then local found_rhymes = {} for _, tuple in ipairs(IPAs) do				if allow_rhyme_for_varieties(tuple.varieties) then for _, form in ipairs(tuple.forms) do						if form.rhyme then local rhyme = generate_rhyme(form) if not found_rhymes[rhyme] then found_rhymes[rhyme] = true table.insert(rhymes, rhyme) end end end end end end end

for _, tuple in ipairs(IPAs) do		table.insert(results, format_IPAs(tuple, title, has_spaces)) end

if #rhymes > 0 then local sylkeys = {} local sylcounts = {} -- get all possible syllable counts from syllabifications for i, h in ipairs(hyphenation) do			local hl = #h if hl > 0 and not sylkeys[hl] then table.insert(sylcounts, hl) sylkeys[hl] = true end end local rhymeobjs = {} for _, rhyme in ipairs(rhymes) do			table.insert(rhymeobjs, {rhyme = rhyme}) end table.insert(results, "* " .. require("Module:rhymes").format_rhymes( { lang = lang, rhymes = rhymeobjs, num_syl = sylcounts })) end

if #hyphenation > 0 then local hyphs = {} for i, h in ipairs(hyphenation) do			table.insert(hyphs, { ["hyph"] = h }) end table.insert(results, "* " .. require("Module:hyphenation").format_hyphenations( { lang = lang, hyphs = hyphs, caption = "Hyphenation" })) end

return table.concat(results, "\n") .. require("Module:utilities").format_categories(categories, lang) end

--- <<< INTERFACE END >>> ---

return export