Module:User:Surjection/urj-fin-pro-IPA

-- This is a toy not meant for serious use.

local export = {} local langcode = "urj-fin-pro"

local LONG = "ː" local SEMILONG = "ˑ" local PRIMARY_STRESS = "ˈ" local SECONDARY_STRESS = "ˌ" local AUTO_STRESS = "ˌ" local NONSYLLABIC = mw.ustring.char(0x032F) local UNRELEASED = mw.ustring.char(0x031A) local CENTRAL = mw.ustring.char(0x031E)

local GRAPHEME = ".[" .. mw.ustring.char(0x0300) .. "-" .. mw.ustring.char(0x036F) .. "]*"

local vowels = { ["a"] = "ɑ", ["e"] = "e", ["o"] = "o", ["u"] = "u", ["ä"] = "æ", ["ë"] = "ɤ", ["ö"] = "ø", ["ü"] = "y", ["i"] = "i", }

local consonants = { ["k"] = "k", ["p"] = "p", ["t"] = "t", ["g"] = "ɣ", ["b"] = "β", ["d"] = "ð", ["s"] = "s", ["h"] = "h", ["v"] = "ʋ", ["r"] = "r", ["c"] = "ʦ", ["j"] = "j", ["l"] = "l", ["m"] = "m", ["n"] = "n", }

local IPA_VOWELS = "" local IPA_CONSONANTS = ""

for _, value in pairs(vowels) do IPA_VOWELS = IPA_VOWELS .. value end for _, value in pairs(consonants) do IPA_CONSONANTS = IPA_CONSONANTS .. value end

function export.convert(text, is_phonetic) local prefix = mw.ustring.find(text, "%-$") local suffix = mw.ustring.find(text, "^%-") if prefix then text = mw.ustring.gsub(text, "%-$", "") end if suffix then text = mw.ustring.gsub(text, "^%-", "") end text = mw.ustring.gsub(text, "([aeiouäëöy])%1", "%1" .. LONG) text = mw.ustring.gsub(text, "([kptc])'", "%1" .. SEMILONG) text = mw.ustring.gsub(text, "([kptcnmlrs])%1", "%1" .. LONG) if not is_phonetic then -- weak and semi-long consonants were probably allophones text = mw.ustring.gsub(text, "[bdg]", {["b"] = "p", ["d"] = "t", ["g"] = "k"}) text = mw.ustring.gsub(text, SEMILONG, LONG) end text = mw.ustring.gsub(text, "([aeouäëu])i", "%1" .. "i" .. NONSYLLABIC) text = mw.ustring.gsub(text, "([aeëio])u", "%1" .. "u" .. NONSYLLABIC) text = mw.ustring.gsub(text, "([äeiö])ü", "%1" .. "ü" .. NONSYLLABIC) text = mw.ustring.gsub(text, "n([kg])", "ŋ%1") text = mw.ustring.gsub(text, "%-", SECONDARY_STRESS) text = mw.ustring.gsub(text, "[aeiouäëöü]", vowels) text = mw.ustring.gsub(text, "[bcdghjklmnprstv]", consonants) if is_phonetic then text = mw.ustring.gsub(text, "mβ", "mb") text = mw.ustring.gsub(text, "nð", "nd") text = mw.ustring.gsub(text, "ŋɣ", "ŋg") text = mw.ustring.gsub(text, "k$", "k" .. UNRELEASED) end if suffix then text = "-" .. text else text = PRIMARY_STRESS .. text end text = mw.ustring.gsub(text, " ", " " .. PRIMARY_STRESS) if prefix then text = text .. "-"	end if is_phonetic then -- automatic secondary stress local old_text = text text = "" local IPA_VOWEL = "^[" .. IPA_VOWELS .. "]$"		local vowels_total = 0 local vowels_found = 0 local do_not_stress = {} local distance = 0 for phone in mw.ustring.gmatch(old_text, GRAPHEME) do			if mw.ustring.match(phone, IPA_VOWEL) then vowels_total = vowels_total + 1 elseif mw.ustring.match(phone, "[" .. PRIMARY_STRESS .. SECONDARY_STRESS .. "]") then do_not_stress[vowels_total] = true end end do_not_stress[vowels_total] = true for phone in mw.ustring.gmatch(old_text, GRAPHEME) do			if mw.ustring.match(phone, IPA_VOWEL) then vowels_found = vowels_found + 1 if not do_not_stress[vowels_found] then if distance == 2 then text = text .. AUTO_STRESS distance = 0 else distance = distance + 1 end end elseif mw.ustring.match(phone, "[" .. PRIMARY_STRESS .. SECONDARY_STRESS .. "]") then distance = 0 end

text = text .. phone end if mw.ustring.match(text, "^" .. AUTO_STRESS, last_vowel) then text = mw.ustring.sub(text, 1, last_vowel - 1) .. mw.ustring.sub(text, last_vowel + 1) end text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "])" .. AUTO_STRESS, AUTO_STRESS .. "%1") text = mw.ustring.gsub(text, "([eoɤø])", "%1" .. CENTRAL) end text = mw.ustring.gsub(text, "[gʦ]", { ["g"] = "ɡ", ["ʦ"] = "t͡s" } ) return text end

local use_UPA_stress = true local UPA_SHORT = mw.ustring.char(0x0306) local UPA_LONG = mw.ustring.char(0x0304) local UPA_DIPHTHONG = mw.ustring.char(0x0361) local UPA_UNRELEASED = mw.ustring.char(0x2FE)

local UPA_vowels = { ["ë"] = "e̮", }

local UPA_consonants = { ["g"] = "γ", ["b"] = "β", ["d"] = "δ", ["c"] = "ʦ", }

function export.convert_UPA(text, is_phonetic) local prefix = mw.ustring.find(text, "%-$") local suffix = mw.ustring.find(text, "^%-") if prefix then text = mw.ustring.gsub(text, "%-$", "") end if suffix then text = mw.ustring.gsub(text, "^%-", "") end text = mw.ustring.gsub(text, "([aeiouäëöy])%1", "%1" .. UPA_LONG) text = mw.ustring.gsub(text, "([kptc])'", "%1" .. UPA_SHORT .. "%1") if not is_phonetic then -- weak and semi-long consonants were probably allophones text = mw.ustring.gsub(text, "[bdg]", {["b"] = "p", ["d"] = "t", ["g"] = "k"}) text = mw.ustring.gsub(text, SEMILONG, LONG) end text = mw.ustring.gsub(text, "([aeouäëu])i", "%1" .. UPA_DIPHTHONG .. "i") text = mw.ustring.gsub(text, "([aeëio])u", "%1" .. UPA_DIPHTHONG .. "u") text = mw.ustring.gsub(text, "([äeiö])ü", "%1" .. UPA_DIPHTHONG .. "ü") text = mw.ustring.gsub(text, "n([kg])", "ŋ%1") text = mw.ustring.gsub(text, "%-", SECONDARY_STRESS) text = mw.ustring.gsub(text, "[ë]", UPA_vowels) text = mw.ustring.gsub(text, "[bcdg]", UPA_consonants) if is_phonetic then text = mw.ustring.gsub(text, "mβ", "mb") text = mw.ustring.gsub(text, "nδ", "nd") text = mw.ustring.gsub(text, "ŋγ", "ŋg") text = mw.ustring.gsub(text, "k$", "k" .. UPA_UNRELEASED) end if suffix then text = "-" .. text else text = PRIMARY_STRESS .. text end text = mw.ustring.gsub(text, " ", " " .. PRIMARY_STRESS) if prefix then text = text .. "-"	end if is_phonetic then -- automatic secondary stress local old_text = text text = "" local UPA_VOWEL = "^[aeiouäöü]̮?$" local vowels_total = 0 local vowels_found = 0 local do_not_stress = {} local distance = 0 for phone in mw.ustring.gmatch(old_text, GRAPHEME) do			if mw.ustring.match(phone, UPA_VOWEL) then vowels_total = vowels_total + 1 elseif mw.ustring.match(phone, "[" .. PRIMARY_STRESS .. SECONDARY_STRESS .. "]") then do_not_stress[vowels_total] = true end end do_not_stress[vowels_total] = true for phone in mw.ustring.gmatch(old_text, GRAPHEME) do			if mw.ustring.match(phone, UPA_VOWEL) then vowels_found = vowels_found + 1 if not do_not_stress[vowels_found] then if distance == 2 then text = text .. AUTO_STRESS distance = 0 else distance = distance + 1 end end elseif mw.ustring.match(phone, "[" .. PRIMARY_STRESS .. SECONDARY_STRESS .. "]") then distance = 0 end

text = text .. phone end if mw.ustring.match(text, "^" .. AUTO_STRESS, last_vowel) then text = mw.ustring.sub(text, 1, last_vowel - 1) .. mw.ustring.sub(text, last_vowel + 1) end text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "γβδ])" .. AUTO_STRESS, AUTO_STRESS .. "%1") end text = mw.ustring.gsub(text, "[gʦ]", { ["g"] = "ɡ", ["ʦ"] = "t͡s" } )

if use_UPA_stress then text = mw.ustring.gsub(text, "([ˈˌ])(.-)([aeiouäöü" .. mw.ustring.char(0x0300) .. "-" .. mw.ustring.char(0x036F) .. "]+)", "%2%3%1")       text = mw.ustring.gsub(text, "ˈ", "·") text = mw.ustring.gsub(text, "ˌ", ":") end return text end

function export.show(frame) local title = mw.title.getCurrentTitle.text if type(frame) == "table" then title = frame:getParent.args[1] or mw.ustring.gsub(title, "^Proto-Finnic/", "") end local phonemic = export.convert(title, false) local phonetic = export.convert(title, true) return require("Module:IPA").format_IPA_full { lang = require("Module:languages").getByCode(langcode), items = { {pron = "*/" .. phonemic .. "/"},			{pron = "*[" .. phonetic .. "]"}		},	}

end

return export