Module:User:Michael Ly/cdo fq-pronunciation test

local export = {}

local gsub = mw.ustring.gsub local sub = mw.ustring.sub local len = mw.ustring.len local match = mw.ustring.match local lower = mw.ustring.lower

local split_tone = { ["ă"] = "a".."̆", ["ĕ"] = "e".."̆", ["ĭ"] = "i".."̆", ["ŏ"] = "o".."̆", ["ŭ"] = "u".."̆", ["ā"] = "a".."̄", ["ē"] = "e".."̄", ["ī"] = "i".."̄", ["ō"] = "o".."̄", ["ū"] = "u".."̄", ["á"] = "a".."́", ["é"] = "e".."́", ["í"] = "i".."́", ["ó"] = "o".."́", ["ú"] = "u".."́", ["à"] = "a".."̀", ["è"] = "e".."̀", ["ì"] = "i".."̀", ["ò"] = "o".."̀", ["ù"] = "u".."̀", ["â"] = "a".."̂", ["ê"] = "e".."̂", ["î"] = "i".."̂", ["ô"] = "o".."̂", ["û"] = "u".."̂", }

local tone_from_mark = { [""] = 1, ["̆"] = 1, ["̆k"] = 7, ["̆h"] = 7, ["̄"] = 2,	["́"] = 3,	["́k"] = "4A", ["́h"] = "4B", ["̀"] = 5,	["̂"] = 6 }

local initial_ipa = { ["b"] = { ["unchanged"] = "p", ["lenited"] = "(p-)β", ["nasal"] = "(p-)m" }, ["p"] = { ["unchanged"] = "pʰ", ["lenited"] = "(pʰ-)β", ["nasal"] = "(pʰ-)m" }, ["m"] = { ["unchanged"] = "m", ["lenited"] = "m", ["nasal"] = "m" }, ["d"] = { ["unchanged"] = "t", ["lenited"] = "(t-)l", ["nasal"] = "(t-)n" }, ["t"] = { ["unchanged"] = "tʰ", ["lenited"] = "(tʰ-)l", ["nasal"] = "(tʰ-)n" }, ["n"] = { ["unchanged"] = "nˡ", ["lenited"] = "nˡ", ["nasal"] = "nˡ" }, ["l"] = { ["unchanged"] = "l̃", ["lenited"] = "l̃", ["nasal"] = "(l-)nˡ" }, ["g"] = { ["unchanged"] = "k", ["lenited"] = "(k-)", ["nasal"] = "(k-)ŋ" }, ["k"] = { ["unchanged"] = "kʰ", ["lenited"] = "(kʰ-)", ["nasal"] = "(kʰ-)ŋ" }, ["ng"] = { ["unchanged"] = "ŋ", ["lenited"] = "ŋ", ["nasal"] = "ŋ" }, ["h"] = { ["unchanged"] = "h", ["lenited"] = "(h-)", ["nasal"] = "(h-)ŋ" }, ["c"] = { ["unchanged"] = "t͡s", ["lenited"] = "(t͡s-)ʒ", ["nasal"] = "(t͡s-)ʒ" }, ["ch"] = { ["unchanged"] = "t͡sʰ", ["lenited"] = "(t͡sʰ-)ʒ", ["nasal"] = "(t͡sʰ-)ʒ" }, ["s"] = { ["unchanged"] = "θ", ["lenited"] = "(θ-)l", ["nasal"] = "(θ-)n" },

[""] = { ["unchanged"] = "", ["lenited"] = "", ["nasal"] = "(Ø-)ŋ" }, }

local final_ipa = { ["a"]	= { ["closed"] = "a",		["open"] = "ɑ"	}, ["a̤"]	= { ["closed"] = "ɛ",		["open"] = "ɑ"	}, ["ae̤"]	= { ["closed"] = "œ",		["open"] = "ɔ"	}, ["ae̤h"]	= { ["closed"] = "øyʔ",	["open"] = "ɔyʔ"	},--see 茉莉 ["ae̤k"]	= { ["closed"] = "øyʔ",	["open"] = "ɔyʔ"	}, ["ae̤ng"]	= { ["closed"] = "øyŋ",	["open"] = "ɔyŋ"	}, ["ah"]	= { ["closed"] = "aʔ",	["open"] = "ɑʔ"	}, ["a̤h"]	= { ["closed"] = "ɛʔ",	["open"] = "ɑʔ"	}, ["ai"]	= { ["closed"] = "ai",	["open"] = "ɑi"	}, ["aik"]	= { ["closed"] = "ɛiʔ",	["open"] = "aiʔ"	}, ["aing"]	= { ["closed"] = "ɛiŋ",	["open"] = "aiŋ"	}, ["aiu"]	= { ["closed"] = "ɛu",	["open"] = "ɑu"	}, ["ak"]	= { ["closed"] = "aʔ",	["open"] = "ɑʔ"	}, ["ang"]	= { ["closed"] = "aŋ",	["open"] = "ɑŋ"	}, ["au"]	= { ["closed"] = "au",	["open"] = "ɑu"	}, ["auk"]	= { ["closed"] = "ouʔ",	["open"] = "ɑuʔ"	}, ["aung"]	= { ["closed"] = "ouŋ",	["open"] = "ɑuŋ"	}, ["e"]	= { ["closed"] = "i",		["open"] = "ɛi"	}, ["e̤"]	= { ["closed"] = "œ",		["open"] = "ɔ"	}, ["eh"]	= { ["closed"] = "ɛiʔ",	["open"] = "ɛiʔ"	},--see 茉莉 ["e̤h"]	= { ["closed"] = "œʔ",	["open"] = "œʔ"	}, ["ek"]	= { ["closed"] = "ɛiʔ",	["open"] = "aiʔ"	}, ["ek2"]	= { ["closed"] = "iʔ",	["open"] = "ɛiʔ"	}, ["e̤k"]	= { ["closed"] = "øyʔ",	["open"] = "ɔyʔ"	}, ["eng"]	= { ["closed"] = "ɛiŋ",	["open"] = "aiŋ"	}, ["eng2"]	= { ["closed"] = "iŋ",	["open"] = "ɛiŋ"	}, ["e̤ng"]	= { ["closed"] = "øyŋ",	["open"] = "ɔyŋ"	}, ["eu"]	= { ["closed"] = "ɛu",	["open"] = "ɑu"	}, ["eu2"]	= { ["closed"] = "ieu",	["open"] = "iɛu"	}, ["e̤ṳ"]	= { ["closed"] = "y",		["open"] = "øy"	}, ["e̤ṳk"]	= { ["closed"] = "yʔ",	["open"] = "øyʔ"	}, ["e̤ṳng"]	= { ["closed"] = "yŋ",	["open"] = "øyŋ"	}, ["i"]	= { ["closed"] = "i",		["open"] = "ɛi"	}, ["ia"]	= { ["closed"] = "ia",	["open"] = "iɑ"	}, ["iah"]	= { ["closed"] = "iaʔ",	["open"] = "iɑʔ"	}, ["iak"]	= { ["closed"] = "iaʔ",	["open"] = "iɑʔ"	}, ["iang"]	= { ["closed"] = "iaŋ",	["open"] = "iɑŋ"	}, ["iau"]	= { ["closed"] = "iau",	["open"] = "iau"	}, ["ie"]	= { ["closed"] = "ie",	["open"] = "iɛ"	}, ["ieh"]	= { ["closed"] = "ieʔ",	["open"] = "iɛʔ"	}, ["iek"]	= { ["closed"] = "ieʔ",	["open"] = "iɛʔ"	}, ["ieng"]	= { ["closed"] = "ieŋ",	["open"] = "iɛŋ"	}, ["ieu"]	= { ["closed"] = "ieu",	["open"] = "iɛu"	}, ["ih"]	= { ["closed"] = "iʔ",	["open"] = "ɛiʔ"	}, ["ik"]	= { ["closed"] = "iʔ",	["open"] = "ɛiʔ"	}, ["ing"]	= { ["closed"] = "iŋ",	["open"] = "ɛiŋ"	}, ["io"]	= { ["closed"] = "yo",	["open"] = "yɔ"	}, ["ioh"]	= { ["closed"] = "yoʔ",	["open"] = "yɔʔ"	}, ["iok"]	= { ["closed"] = "yoʔ",	["open"] = "yɔʔ"	}, ["iong"]	= { ["closed"] = "yoŋ",	["open"] = "yɔŋ"	}, ["iu"]	= { ["closed"] = "ieu",	["open"] = "iɛu"	}, ["ng"]	= { ["closed"] = "ŋ̍",		["open"] = "ŋ̍"	}, ["o"]	= { ["closed"] = "u",		["open"] = "ou"	}, ["o̤"]	= { ["closed"] = "o",		["open"] = "ɔ"	}, ["o̤h"]	= { ["closed"] = "oʔ",	["open"] = "ɔʔ"	}, ["oi"]	= { ["closed"] = "øy",	["open"] = "ɔy"	}, ["oi2"]	= { ["closed"] = "uoi",	["open"] = "uɔi"	}, ["o̤i"]	= { ["closed"] = "øy",	["open"] = "ɔy"	}, ["ok"]	= { ["closed"] = "ouʔ",	["open"] = "ɔuʔ"	}, ["ok2"]	= { ["closed"] = "uʔ",	["open"] = "ouʔ"	}, ["o̤k"]	= { ["closed"] = "oʔ",	["open"] = "ɔʔ"	}, --see 汝各儂 ["ong"]	= { ["closed"] = "ouŋ",	["open"] = "ɔuŋ"	}, ["ong2"]	= { ["closed"] = "uŋ",	["open"] = "ouŋ"	}, ["u"]	= { ["closed"] = "u",		["open"] = "ou"	}, ["ṳ"]	= { ["closed"] = "y",		["open"] = "øy"	}, ["ua"]	= { ["closed"] = "ua",	["open"] = "uɑ"	}, ["uah"]	= { ["closed"] = "uaʔ",	["open"] = "uɑʔ"	}, ["uai"]	= { ["closed"] = "uai",	["open"] = "uɑi"	}, ["uak"]	= { ["closed"] = "uaʔ",	["open"] = "uɑʔ"	}, ["uang"]	= { ["closed"] = "uaŋ",	["open"] = "uɑŋ"	}, ["ui"]	= { ["closed"] = "uoi",	["open"] = "uɔi"	}, ["uk"]	= { ["closed"] = "uʔ",	["open"] = "ouʔ"	}, ["ṳk"]	= { ["closed"] = "yʔ",	["open"] = "øyʔ"	}, ["ung"]	= { ["closed"] = "uŋ",	["open"] = "ouŋ"	}, ["ṳng"]	= { ["closed"] = "yŋ",	["open"] = "øyŋ"	}, ["uo"]	= { ["closed"] = "uo",	["open"] = "uɔ"	}, ["uoh"]	= { ["closed"] = "uoʔ",	["open"] = "uɔʔ"	}, ["uoi"]	= { ["closed"] = "uoi",	["open"] = "uɔi"	}, ["uok"]	= { ["closed"] = "uoʔ",	["open"] = "uɔʔ"	}, ["uong"]	= { ["closed"] = "uoŋ",	["open"] = "uɔŋ"	}, }

local tone_ipa = { [1] = "⁵³", --陰平 [2] = "³²", --上聲 [3] = "²¹", --陰去 ["4A"] = "²²", --陰入-甲 (-k) ["4B"] = "²²", --陰入-乙 (-h) [5] = "⁵⁵", --陽平 [6] = "⁴²", --陽去 [7] = "⁵", --陽入 [8] = "²¹", --變調 ["-"] = "⁻",	["("] = "⁽",	[")"] = "⁾", }

local tone_sandhi = { ["A-I"] = "1", ["A-II"] = "1", ["A-III"] = "5", ["A-IV"] = "5", ["B-I"] = "8", ["B-II"] = "8", ["B-III"] = "9", ["B-IV"] = "1", ["C-I"] = "1", ["C-II"] = "2", ["C-III"] = "2", ["C-IV"] = "8",

["A-A-I"] = "8-1", ["A-A-II"] = "8-1", ["A-A-III"] = "8-5", ["A-A-IV"] = "8-5", ["A-B-I"] = "8-8", ["A-B-II"] = "8-8", ["A-B-III"] = "8-9", ["A-B-IV"] = "8-1", ["B-A-I"] = "8-1", ["B-A-II"] = "8-1", ["B-A-III"] = "8-5", ["B-A-IV"] = "8-5", ["B-B-I"] = "8-8", ["B-B-II"] = "8-8", ["B-B-III"] = "8-9", ["B-B-IV"] = "8-1", ["C-A-I"] = "8-1", ["C-A-II"] = "8-1", ["C-A-III"] = "8-5", ["C-A-IV"] = "8-5", ["C-B-I"] = "8-8", ["C-B-II"] = "8-8", ["C-B-III"] = "8-9", ["C-B-IV"] = "8-1", ["A-C-I"] = "1-1", ["A-C-II"] = "1-1", ["A-C-III"] = "5-2", ["A-C-IV"] = "5-8", ["B-C-I"] = "9-2", ["B-C-II"] = "9-2", ["B-C-III"] = "9-2", ["B-C-IV"] = "1-8", ["C-C-I"] = "2-2", ["C-C-II"] = "2-2", ["C-C-III"] = "2-2", ["C-C-IV"] = "8-8", }

local sylcat = { [1] = {		["1"] = "A", ["5"] = "B", ["6"] = "B", ["2"] ="C", ["3"]="D", ["4"]="E", ["7"]="F", } ,	[2] = {		["1"] = "I", ["5"] = "II", ["7"] = "II", ["2"] = "III", ["3"] = "IV", ["6"] = "IV", ["4A"] = "IV", ["4B"] = "IV" } }

local dual_rimes = { ["ong"] = true, ["ok"] = true, ["eng"] = true, ["ek"] = true, ["eu"] = true, ["oi"] = true, }

local neg_assim = { ["labial"] = "(ŋ̍-)m̩", ["dental"] = "(ŋ̍-)n̩", ["velar"] = "(ŋ̍-)ŋ̍", ["alone"] = "(ŋ̍-)ŋ̍/m̩/n̩", }

local neg_type = { ["b"] = "labial", ["p"] = "labial", ["m"] = "labial", ["d"] = "dental", ["t"] = "dental", ["n"] = "dental", ["l"] = "dental", ["s"] = "dental", ["c"] = "dental", ["✘"] = "alone", }

local initial_string = "^([bpmdtnlgkhcs]?[gh]?)"

function export.rom(text) text = gsub(text, "/", " / ") text = gsub(text, "\>([^\> \-]+)", "→%1") return text end

function export.sentence(text) local sentence = {} text = gsub(text, "[,%.%?!]", "") for word in mw.text.gsplit(lower(text), " ", true) do		table.insert(sentence, export.ipa(word)) end return table.concat(sentence, " ") end

local function determ_tone(text) local tone = gsub(gsub(text, ".", split_tone), "^[^̆̄́̀̂hk]*([̆̄́̀̂]?)[^̆̄́̀̂hk]*([hk]?)$", function(tone_symbol, coda)		return tone_from_mark[tone_symbol..coda] end) return tone end

function export.ipa(text, feature) if type(text) == "table" then text = text.args[1] end text = lower(text) local phrase_result = {} local words = mw.text.split(text, "/") for _, word in ipairs(words) do		local word_result = {} local parts = mw.text.split(word, " ") for _, part in ipairs(parts) do			local initial, final, tone, tone_conv, ipa, exc = {}, {}, {}, {}, {}, {} local lenition_blocked = {} local syllables = mw.text.split(part, "-") for index, syllable in ipairs(syllables) do				syllable = gsub(syllable, "\*", function(captured_initial)					lenition_blocked[index] = true					return "" end) if match(syllable, "\>") then tone[index] = determ_tone(gsub(gsub(gsub(syllable, "\>[^\>]+$", ""), initial_string, ""), ".", split_tone)) syllable = gsub(syllable, "[^\>]+\>", "") exc[index] = determ_tone(gsub(syllable, initial_string, "")) end initial[index] = match(syllable, initial_string) final[index] = sub(syllable, len(initial[index]) + 1, -1) final[index] = gsub(final[index], ".", split_tone) tone[index] = exc[index] and tone[index] or determ_tone(final[index]) final[index] = gsub(final[index], "[̆̄́̀̂]", "") if dual_rimes[final[index]] and match(tostring(tone[index]), "[346]") then final[index] = final[index] .. "2"				end final[index] = match(initial[index] .. final[index], "[dtnlcs]h?io") and gsub(final[index], "io", "uo") or final[index] if (initial[index] .. final[index]) == "ng" then initial[index], final[index] = "", "ng" end end if #syllables == 1 or feature == "no_sandhi" then tone_conv = tone elseif #syllables == 2 then tone_conv = { tone[1].."-"..(tone_sandhi[sylcat[1][exc[1] or tone[1]].."-"..sylcat[2][tone[2]]]), tone[2] }			elseif #syllables == 3 then sandhi = mw.text.split(tone_sandhi[sylcat[1][exc[1] or tone[1]].."-"..					sylcat[1][exc[2] or tone[2]].."-"..sylcat[2][tone[3]]], "-") tone_conv = { tone[1].."-"..sandhi[1], tone[2].."-"..sandhi[2], tone[3] }			elseif #syllables == 4 then tone_conv = { tone[1].."-"..tone_sandhi[sylcat[1][exc[1] or tone[1]].."-"..sylcat[2][tone[2]]], tone[2].."(-8)", tone[3].."-"..tone_sandhi[sylcat[1][exc[3] or tone[3]].."-"..sylcat[2][tone[4]]], tone[4] }			end for index = 1, #syllables do				if match(tostring(tone_conv[index]), "[346][AB]?$") and (#syllables == 1 or index == #syllables) then final[index] = final_ipa[final[index]]["open"] else final[index] = final_ipa[final[index]]["closed"] end local initial_state = (index == 1 or match(syllables[index-1], "k$") or lenition_blocked[index]) and "unchanged" or (match(final[index-1], "[ŋ̩̍]$") and "nasal" or "lenited") initial[index] = initial_ipa[initial[index]][initial_state] if final[index] == "ŋ̍" then final[index] = neg_assim[neg_type[sub(syllables[index + 1] or "✘", 1, 1)] or "velar"] end tone_conv[index] = gsub(tone_conv[index], "([1-9AB]+)\-([1-9AB]+)", function(original, sandhi)					if original == sandhi then						return original					end end) tone_conv[index] = gsub(tone_conv[index], "(.[AB]?)", tone_ipa) ipa[index] = initial[index] .. final[index] .. tone_conv[index] end table.insert(word_result, table.concat(ipa, " ")) end table.insert(phrase_result, table.concat(word_result, " ")) end return table.concat(phrase_result, "/, /") end

return export