Module:se-IPA

local export = {}

local lang = require("Module:languages").getByCode("se")

local m_str_utils = require("Module:string utilities") local find = m_str_utils.find local gmatch = m_str_utils.gmatch local gsub = m_str_utils.gsub local len = m_str_utils.len local lower = m_str_utils.lower local sub = m_str_utils.sub

local u = require("Module:string/char") local BREVE = u(0x0306)

local letters_phonemes = { ["ˈ"] = "ː",

["a"] = "a", ["ạ"] = "a", ["á"] = "aː", ["áˈ"] = "aˑ", ["á" .. BREVE ] = "a", ["b"] = "b", ["c"] = "t͡s", ["č"] = "t͡ʃ", ["d"] = "d", ["đ"] = "ð", ["ẹ"] = "e", ["ē"] = "eː", ["g"] = "ɡ", ["ī"] = "iː", ["kh"] = "kʰ", ["ọ"] = "o", ["ō"] = "oː", ["ph"] = "pʰ", ["š"] = "ʃ", ["th"] = "tʰ", ["ŧ"] = "θ", ["ū"] = "uː", ["z"] = "d͡z", ["ž"] = "d͡ʒ",

["ea"] = "ea̯", ["eaˈ"] = "e̯a", ["ea" .. BREVE] = "ĕă̯", ["ie"] = "ie̯", ["ieˈ"] = "i̯e", ["ie" .. BREVE] = "ĭĕ̯", ["oa"] = "oɑ̯", ["oaˈ"] = "o̯ɑ", ["oa" .. BREVE] = "ŏɑ̯̆", ["uo"] = "uo̯", ["uoˈ"] = "u̯o", ["uo" .. BREVE] = "ŭŏ̯",

["dj"] = "ɟ", ["lj"] = "ʎ", ["nj"] = "ɲ",

["hj"] = "j̥", ["hjj"] = "j̥.j̥", ["hjˈj"] = "j̥ː.j̥", ["hl"] = "l̥", ["hll"] = "l̥.l̥", ["hlˈl"] = "l̥ː.l̥", ["hm"] = "m̥", ["hmm"] = "m̥.m̥", ["hmˈm"] = "m̥ː.m̥", ["hn"] = "n̥", ["hnn"] = "n̥.n̥", ["hnˈn"] = "n̥ː.n̥", ["hr"] = "r̥", ["hrr"] = "r̥.r̥", ["hrˈr"] = "r̥ː.r̥", }

--	This adds letters_phonemes["e"] = "e", letters_phonemes["i"] = "i", etc. for letter in gmatch("efhijklmnŋoprstuv", ".") do	letters_phonemes[letter] = letter end

-- Preaspirated for letter in gmatch("ptcčk", ".") do letters_phonemes["h" .. letter] = "h" .. letters_phonemes[letter] letters_phonemes["h" .. letter .. letter] = "hː" .. letters_phonemes[letter] end

local function get_phoneme(remainder) -- Find the longest string of letters that matches a recognised sequence in the list local longestmatch = ""

for letter, _ in pairs(letters_phonemes) do		if sub(remainder, 1, len(letter)) == letter and len(letter) > len(longestmatch) then longestmatch = letter end end

if len(longestmatch) > 0 then return longestmatch, sub(remainder, len(longestmatch) + 1) else return sub(remainder, 1, 1), sub(remainder, 2) end end

local function get_syllable(remainder) local syll = {cons = {}, vowel = ""} local cons

while find(remainder, "^([^aạáeẹēiīoọōuū]+)") do		cons, remainder = get_phoneme(remainder)

if cons == "nˈnj" then require("Module:debug").track("se-IPA/nnj") end

if cons == "ˈ" then syll.cons.quantity = 3 else if cons == "dj" or cons == "lj" then if syll.cons[#syll.cons] == string.sub(cons, 1, 1) then syll.cons[#syll.cons] = cons syll.cons.quantity = 3 else table.insert(syll.cons, cons) end elseif cons == "nj" and syll.cons[#syll.cons] == "n" then syll.cons[#syll.cons] = "nj" end

table.insert(syll.cons, cons) end end

if find(remainder, "^([aạáeẹēiīoọōuū]+)") then syll.vowel, remainder = get_phoneme(remainder) end

if remainder == "" then remainder = nil end

-- Determine consonant quantity if not syll.cons.quantity then if not syll.cons[2] then syll.cons.quantity = 1 else if find(syll.cons[#syll.cons], "(.)%1$") or (syll.cons[#syll.cons] == syll.cons[#syll.cons - 1] and not find(syll.cons[#syll.cons], "^[bdgzž]$")) or (syll.cons[#syll.cons - 1] == "p" and syll.cons[#syll.cons] == "m") or (syll.cons[#syll.cons - 1] == "t" and syll.cons[#syll.cons] == "n") or (syll.cons[#syll.cons - 1] == "t" and syll.cons[#syll.cons] == "nj") or (syll.cons[#syll.cons - 1] == "k" and syll.cons[#syll.cons] == "ŋ") then syll.cons.quantity = 2 else syll.cons.quantity = 3 end end end

return syll, remainder end

-- Split the word into syllables of C(C)V shape local function split_syllables(remainder) remainder = lower(remainder) remainder = gsub(remainder, "([aáeēiīoōuū])i", "%1j")

local syllables = {} local syll

while remainder do		syll, remainder = get_syllable(remainder) table.insert(syllables, syll) end

syllables.count = #syllables

if syllables[#syllables].vowel == "" then syllables.count = syllables.count - 1 end

return syllables end

local function shorten(vowel) vowel = gsub(vowel, "^[ēīōū]$", {["ē"] = "e", ["ī"] = "i", ["ō"] = "o", ["ū"] = "u"})

for _, v in ipairs({"á", "ea", "ie", "oa", "uo"}) do vowel = gsub(vowel, v, v .. BREVE) end

return vowel end

local function shift(vowel) for _, v in ipairs({"á", "ea", "ie", "oa", "uo"}) do vowel = gsub(vowel, v, v .. "ˈ") end

return vowel end

local function lengthen(vowel) vowel = gsub(vowel, "^[eiou]$", {["e"] = "ē", ["i"] = "ī", ["o"] = "ō", ["u"] = "ū"}) vowel = gsub(vowel, BREVE, "")

return vowel end

-- Determine whether long vowels should be shortened before certain consonants local function should_shorten(syll, nextsyll) if nextsyll.cons[1] then if find(nextsyll.cons[1], "^h([ptcčk])%1$") then -- Long preaspirate return true elseif find(nextsyll.cons[1], "^([đflmnŋrsšv])ˈ%1$") then -- Overlong vowel return true elseif (syll.vowel == "ie" or syll.vowel == "uo") and find(nextsyll.vowel, "^[áīū]$") then if find(nextsyll.cons[1], "^([bcčdgkptzž])%1$") then -- Geminate stop return true elseif nextsyll.cons[1] == "pm" or nextsyll.cons[1] == "tn" or nextsyll.cons[1] == "tnj" or nextsyll.cons[1] == "kŋ" then -- Glottalised nasal return true elseif nextsyll.cons[2] and not find(nextsyll.cons[2], "^h[ptcčk]$") then -- Clusters, except when the second element is a strong-grade preaspirate return true end elseif (syll.vowel == "ie" or syll.vowel == "uo") and nextsyll.vowel == "a" then if find(nextsyll.cons[1], "^([bdgzž])%1$") then -- Geminate voiced stop return true elseif nextsyll.cons[2] and not find(nextsyll.cons[2], "(.)%1$") and not find(nextsyll.cons[2], "^h[ptcčk]$") and not (nextsyll.cons[2] == "pm" or nextsyll.cons[2] == "tn" or nextsyll.cons[2] == "tnj" or nextsyll.cons[2] == "kŋ") then -- Clusters, except when the second element is long, or a preaspirate, or a preglottalised nasal return true end end end

return false end

local function convert_spelling(syllables) local foot = 0

for i, syll in ipairs(syllables) do		if syll.vowel == "" then if syll.cons[#syll.cons] == "t" then syll.cons[#syll.cons] = "ht" elseif syll.cons[#syll.cons] == "d" then syll.cons[#syll.cons] = "t" end

break end

local nextsyll = syllables[i + 1] or {cons = {}, vowel = ""}

foot = foot + 1

if foot == 3 and i ~= syllables.count then foot = 1 end

-- Make i and u long in even syllables if foot == 2 and (syll.vowel == "i" or syll.vowel == "u") and nextsyll.cons[1] ~= "j" then syll.vowel = lengthen(syll.vowel) end

if #syll.cons == 1 then if foot == 1 then -- Postaspiration syll.cons[1] = gsub(syll.cons[1], "^([kpt])$", "%1h") elseif foot == 3 then -- d is đ between two unstressed vowels syll.cons[1] = gsub(syll.cons[1], "d", "đ") end elseif #syll.cons > 1 then if syll.cons[#syll.cons] == syll.cons[#syll.cons - 1] and syll.cons[#syll.cons - 2] and find(syll.cons[#syll.cons - 2], "[cčkpsšt]$") then -- Ungeminate last consonant after voiceless syll.cons[#syll.cons] = nil elseif find(syll.cons[#syll.cons], "[cčkpsšt]$") then -- Ungeminate last consonant after voiceless syll.cons[#syll.cons] = gsub(syll.cons[#syll.cons], "(.)%1$", "%1") else -- Preaspirate final voiceless consonant after voiced syll.cons[#syll.cons] = gsub(syll.cons[#syll.cons], "^([cčkpt])$", "h%1") syll.cons[#syll.cons] = gsub(syll.cons[#syll.cons], "^([cčkpt])%1$", "h%1%1") end

-- Devoice final geminates if syll.cons[#syll.cons] == "bb" then syll.cons[#syll.cons] = "pp" elseif syll.cons[#syll.cons] == "dd" then syll.cons[#syll.cons] = "tt" elseif syll.cons[#syll.cons] == "gg" then syll.cons[#syll.cons] = "kk" elseif syll.cons[#syll.cons] == "zz" then syll.cons[#syll.cons] = "cc" elseif syll.cons[#syll.cons] == "žž" then syll.cons[#syll.cons] = "čč" end end

-- Devoice remaining single voiced consonants for j, cons in ipairs(syll.cons) do			if cons == "b" and syll.cons[j - 1] ~= "b" and (j ~= 1 or syll.cons[2] ~= "b" and syll.cons[2] ~= "m") then syll.cons[j] = "p" elseif cons == "d" and syll.cons[j - 1] ~= "d" and (j ~= 1 or syll.cons[2] ~= "d" and syll.cons[2] ~= "n" and syll.cons[2] ~= "nj") then syll.cons[j] = "t" elseif cons == "g" and syll.cons[j - 1] ~= "g" and (j ~= 1 or syll.cons[2] ~= "g" and syll.cons[2] ~= "ŋ") then syll.cons[j] = "k" elseif cons == "z" and syll.cons[j - 1] ~= "z" and (j ~= 1 or syll.cons[2] ~= "z") then syll.cons[j] = "c" elseif cons == "ž" and syll.cons[j - 1] ~= "ž" and (j ~= 1 or syll.cons[2] ~= "ž") then syll.cons[j] = "č" end end

-- Regularise divergent spellings in clusters --if #syll.cons > 2 then --	error("Clusters with more than 2 consonants are not yet supported.") --end

if foot == 2 and syll.cons.quantity == 3 then -- Lengthen initial sonorant in quantity 3 table.insert(syll.cons, 2, "ˈ") end

-- Secondary stress if foot == 1 and i > 1 then if #syll.cons == 1 then table.insert(syll.cons, 1, "ˌ") elseif #syll.cons == 2 then table.insert(syll.cons, 2, "ˌ") end end end

-- This needs to be a separate pass because otherwise unstressed ī and ū won't have been lengthened yet for i, syll in ipairs(syllables) do		local nextsyll = syllables[i + 1] or {cons = {}, vowel = ""}

--	if should_shorten(syll, nextsyll) then --		syll.vowel = shorten(syll.vowel) if find(nextsyll.vowel, "^[ạẹọ]$") then syll.vowel = shift(syll.vowel) end end end

-- Dialect-specific conversions local function dialect(syllables) for i, syll in ipairs(syllables) do		-- Western Finnmark dialect if syll.cons[1] then if syll.cons[#syll.cons] == "ŋ" then syll.cons[#syll.cons] = "nj"

if syll.cons[#syll.cons - 1] == "ˈ" then if syll.cons[#syll.cons - 2] then syll.cons[#syll.cons - 2] = gsub(syll.cons[#syll.cons - 2], "^[gkŋ]$", {["g"] = "d", ["k"] = "t", ["ŋ"] = "nj"}) end else if syll.cons[#syll.cons - 1] then syll.cons[#syll.cons - 1] = gsub(syll.cons[#syll.cons - 1], "^[gk]$", {["g"] = "d", ["k"] = "t", ["ŋ"] = "nj"}) end end end end end end

-- Convert word to IPA local function to_IPA(syllables) for i, syll in ipairs(syllables) do		for j, cons in ipairs(syll.cons) do			if syll.vowel == "" and cons == "ht" then syll.cons[j] = "h(t)" elseif letters_phonemes[cons] then -- Drop the final part after the tie bar if string.find(letters_phonemes[cons], "͡", nil, true) and syll.cons[j] == syll.cons[j + (syll.cons[j + 1] == "ˈ" and 2 or 1)] then syll.cons[j] = gsub(letters_phonemes[cons], "͡.*$", "") else syll.cons[j] = letters_phonemes[cons] end end end

syll.vowel = letters_phonemes[syll.vowel] or syll.vowel

syllables[i] = table.concat(syll.cons) .. syll.vowel end

return "ˈ" .. table.concat(syllables) end

function export.IPA(frame) local params = { [1] = {default = mw.title.getCurrentTitle.text}, }

local args = require("Module:parameters").process(frame:getParent.args, params)

local syllables = split_syllables(args[1]) convert_spelling(syllables) dialect(syllables)

return require("Module:accent qualifier").format_qualifiers(lang, {"Kautokeino"}) .. " " ..		require("Module:IPA").format_IPA_full { lang = lang, items = } .. require("Module:utilities").format_categories({lang:getCanonicalName .. " " .. tostring(syllables.count) .. "-syllable words"}, lang) end

return export