Module:User:Manishearth/wuu-pron

local export = {}

local ipa_initial = { ["p"] = "p", ["ph"] = "pʰ", ["b"] = "b̥", ["m"] = "m", ["'m"] = "ʔm", ["f"] = "f", ["v"] = "v̥", ["t"] = "t", ["th"] = "tʰ", ["d"] = "d̥", ["n"] = "n", ["'n"] = "ʔn", ["l"] = "l", ["'l"] = "ʔl", ["ts"] = "t͡s", ["tsh"] = "t͡sʰ", ["s"] = "s", ["z"] = "z̥", ["j"] = "t͡ɕ", ["q"] = "t͡ɕʰ", ["jj"] = "d̥͡ʑ", ["ny"] = "n̠ʲ", ["'ny"] = "ʔn̠ʲ", ["x"] = "ɕ", ["xx"] = "ʑ̥", ["k"] = "k", ["kh"] = "kʰ", ["g"] = "ɡ̊", ["ng"] = "ŋ", ["'ng"] = "ʔŋ", ["h"] = "h", ["'"] = "ʔ", ["hh"] = "ɦ", [""] = "" }

local ipa_final = { ["a"] = "a̱", ["o"] = "o", ["au"] = "ɔ", ["eu"] = "ɜ", ["e"] = "e̞", ["oe"] = "ø", ["i"] = "i", ["ia"] = "ia̱", ["io"] = "io", ["iau"] = "iɔ", ["ieu"] = "iɜ", ["u"] = "v̩ʷ", ["ua"] = "ɯa̱", ["ue"] = "ɯe̞", ["uoe"] = "v̩ʷø", ["y"] = "y", ["yoe"] = "yø", ["an"] = "ã", ["aan"] = "ɑ̃", ["en"] = "əɲ", ["on"] = "ʊŋ", ["aq"] = "a̱ʔ", ["oq"] = "ʊʔ", ["eq"] = "əʔ", ["ian"] = "iã", ["iaan"] = "iɑ̃", ["in"] = "ɪɲ", ["ion"] = "i̯ʊŋ", ["iaq"] = "ia̱ʔ", ["ioq"] = "i̯ʊʔ", ["iq"] = "i̯ɪʔ", ["uan"] = "ɯã", ["uaan"] = "ɯɑ̃", ["un"] = "ɯə̯ɲ", ["uaq"] = "ɯa̱ʔ", ["ueq"] = "ɯə̯ʔ", ["yn"] = "ʏɲ", ["yq"] = "ɥ̯ɪʔ", ["er"] = "əɻ", ["r"] = "z̩" }

local ipa_syllabic = { ["mm"] = "m̩", ["ngg"] = "ŋ̍", ["'mm"] = "ʔm̩", ["'ngg"] = "ʔŋ̍" }

local wugniu_initial = { ["p"] = "p", ["ph"] = "ph", ["b"] = "b", ["m"] = "m", ["m"] = "m", ["f"] = "f", ["v"] = "v", ["t"] = "t", ["th"] = "t", ["d"] = "d", ["n"] = "n", ["n"] = "n", ["l"] = "l", ["l"] = "l", ["ts"] = "ts", ["tsh"] = "tsh", ["s"] = "s", ["z"] = "z", ["j"] = "c", ["q"] = "ch", ["jj"] = "j", ["ny"] = "gn", ["ny"] = "gn", ["x"] = "sh", ["xx"] = "zh", ["k"] = "k", ["kh"] = "kh", ["g"] = "ɡ", ["ng"] = "ng", ["ng"] = "ng", ["h"] = "h", ["'"] = "", ["hh"] = "gh", [""] = "" }

local wugniu_final = { ["a"] = "a", ["o"] = "o", ["au"] = "au", ["eu"] = "eu", ["e"] = "e", ["oe"] = "oe", ["i"] = "i", ["ia"] = "ia", ["io"] = "io", ["iau"] = "iau", ["ieu"] = "ieu", ["u"] = "u", ["ua"] = "ua", ["ue"] = "ue", ["uoe"] = "uoe", ["y"] = "iu", ["yoe"] = "ioe", ["an"] = "an", ["aan"] = "aon", ["en"] = "en", ["on"] = "on", ["aq"] = "aq", ["oq"] = "oq", ["eq"] = "eq", ["ian"] = "ian", ["iaan"] = "iaon", ["in"] = "in", ["ion"] = "ion", ["iaq"] = "iaq", ["ioq"] = "ioq", ["iq"] = "iq", ["uan"] = "uan", ["uaan"] = "uaon", ["un"] = "uen", ["uaq"] = "uaq", ["ueq"] = "ueq", ["yn"] = "iun", ["yq"] = "iuq", ["er"] = "ei", ["r"] = "y" } local wugniu_syllabic = { ["mm"] = "m", ["ngg"] = "ng", ["'mm"] = "m", ["'ngg"] = "ng" }

local wugniu_tone_map = { ["1"] = 1, ["2"] = 5, ["3"] = 6, ["4"] = 7, ["5"] = 8}

local tone_contours = { ["1-0"] = "", ["1--"] = "³³",	["1-1"] = "⁵³", ["1-2"] = "³⁴", ["1-3"] = "²³", ["1-4"] = "⁵⁵", ["1-5"] = "¹²", 	["2-1"] = "⁵⁵ ²¹", ["2-2"] = "³³ ⁴⁴", ["2-3"] = "²² ⁴⁴", ["2-4"] = "³³ ⁴⁴", ["2-5"] = "¹¹ ²³",	["3-1"] = "⁵⁵ ³³ ²¹", ["3-2"] = "³³ ⁵⁵ ²¹", ["3-3"] = "²² ⁵⁵ ²¹", ["3-4"] = "³³ ⁵⁵ ²¹", ["3-5"] = "¹¹ ²² ²³",	["4-1"] = "⁵⁵ ³³ ³³ ²¹", ["4-2"] = "³³ ⁵⁵ ³³ ²¹", ["4-3"] = "²² ⁵⁵ ³³ ²¹", ["4-4"] = "³³ ⁵⁵ ³³ ²¹", ["4-5"] = "²² ⁵⁵ ³³ ²¹",	["5-1"] = "⁵⁵ ³³ ³³ ³³ ²¹", ["5-2"] = "³³ ⁵⁵ ³³ ³³ ²¹", ["5-3"] = "²² ⁵⁵ ³³ ³³ ²¹", ["5-4"] = "³³ ⁵⁵ ³³ ³³ ²¹", ["5-5"] = "²² ⁵⁵ ³³ ³³ ²¹",

["A-single"] = "⁴⁴", ["B-single"] = "³³", ["C-single"] = "⁴⁴", ["D-single"] = "²²", ["A-multiple"] = "³³", ["B-multiple"] = "³³", ["C-multiple"] = "³³", ["D-multiple"] = "³³", }

local tone_table = { ["voiceless-unchecked"] = "A", ["voiced-unchecked"] = "B", ["voiceless-checked"] = "C", ["voiced-checked"] = "D", }

local function determ_syl(text) local voicing, coda = "voiceless", "unchecked" if text:find("^[bvdlzg]") or text:find("^m[^m]") or text:find("^n[^n]") or text:find("jj") or text:find("xx") or text:find("hh") then voicing = "voiced" end if text:find("q$") then coda = "checked" end return voicing, coda end

local function tone_determ(text) local voicing, coda = determ_syl(text) return tone_table[voicing .."-" .. coda] end

local function rom_check(text) local tone = text:sub(1, 1) text = mw.text.split(text:sub(2, -1), " ")[1] local voicing, coda = determ_syl(text) if text:find("[kgs]h?[iy]") or text:find("^z[iy]") or text:find("^ni") then error("Invalid syllable: " .. text .. ". Palatalisation expected.") end

if voicing == "voiced" and tone:find("[124]") then error("Invalid syllable: " .. text .. tone .. ". Voiced initials only occur in tones 3 and 5.") elseif voicing == "voiceless" and tone:find("[35]") then error("Invalid syllable: " .. text .. tone .. ". Voiceless initials only occur in tones 1, 2 and 4.") end if coda == "checked" and tone:find("[123]") then error("Checked syllables only occur in tones 4 and 5.") elseif coda ~= "checked" and tone:find("[45]") then error("Unchecked syllables only occur in tones 1, 2 and 3.") end return nil end

function convert_by_parts(original_text, syl_conv, tone_conv, apply_ipa_tone_change) if type(original_text) == "table" then original_text = original_text.args[1] end original_text = mw.ustring.lower(original_text) local text, conv_text = "", "" local reading = mw.text.split(original_text, ",", true) local syllable = {} local syl_tone = {} for reading_index = 1, #reading, 1 do		local components = mw.text.split(reading[reading_index], "&", true) for component_index = 1, #components do			local indep_words = mw.text.split(components[component_index], "+", true) for indep_index = 1, #indep_words do				text = indep_words[indep_index] local no_syllables = string.len(text:gsub("[^ ]", "")) + 1 rom_check(text) local syl_tone = tone_conv(no_syllables, text:sub(1, 1)) text = text:sub(2, -1) local syllable = mw.text.split(text, " ", true) for i = 1, no_syllables, 1 do					if apply_ipa_tone_change and i == no_syllables and indep_words[indep_index + 1] and tone ~= "³³" then syl_tone[i] = tone_contours[tone_determ(syllable[i]) .. "-" .. 							(no_syllables > 1 and "multiple" or "single")] end syllable[i] = syl_conv(syllable[i]) syllable[i] = syllable[i] .. syl_tone[i] end indep_words[indep_index] = table.concat(syllable, " ") end components[component_index] = table.concat(indep_words, " ") end reading[reading_index] = table.concat(components, " ") end return reading end

function ipa_tone_conv(no_syllables, citation_tone) local tones = tone_contours[no_syllables.."-"..citation_tone] or error("Tone notation is incorrect. See WT:WUU.") return mw.text.split(tones, " ", true) end

function export.ipa_conv(original_text) local reading = convert_by_parts(original_text, export.ipa_syl_conv, ipa_tone_conv, true) return table.concat(reading, "/, /") end

function export.ipa_syl_conv(text) return generic_syl_conv(text, ipa_initial, ipa_final, ipa_syllabic) end

function generic_syl_conv(text, initial, final, syllabic) if text:find("^%'?h?h?[mn][mg]?g?$") then text = text:gsub("^(%'?h?h?)([mn][mg]?g?)$", function(a, b) return (initial[a] or '') ..			(syllabic[b] or syllabic[a..b] or error(("Invalid syllable: \"%s\""):format(text))) end) elseif text:find("^%'?ny") then text = text:gsub("^([\']?ny)([aeiouyr][aeou]?[aeu]?[nqr]?)$",			function(a, b) return initial[a] .. (final[b] or error(("Unrecognised final: \"%s\""):format(b))) end) elseif text:find("^[\']?[pbmfvtdnlszjqxkghr%']?[sjgx]?h?[aeiouyr][aeou]?[aeu]?[nqr]?$") then text = text:gsub("^([\']?[pbmfvtdnlszjqxkghr%']?[sjgx]?[h]?)([aeiouyr][aeou]?[aeu]?[nqr]?)$",			function(a, b) return				(initial[a] or error(("Unrecognised initial: \"%s\""):format(a))) ..				(final[b] or error(("Unrecognised final: \"%s\""):format(b))) end) else return error(("Invalid syllable: \"%s\""):format(text)) end return text end

function export.rom(text) if type(text) == 'table' then text = text.args[1] end text = mw.text.split(text, ",", true) for i = 1, #text, 1 do		local parts = mw.text.split(text[i], '+', true) for i = 1, #parts do			if string.find(parts[i], '^[0-9%-]') then parts[i] = parts[i]:sub(2,-1) .. ' (T' .. parts[i]:sub(1,1) .. ')' end end text[i] = table.concat(parts, ' + ') end return table.concat(text, "; ") end

function wugniu_tone_conv(no_syllables, citation_tone) local tones = {} for i = 1, no_syllables, 1 do		if i == 1 then tones[i] = "" .. wugniu_tone_map[citation_tone] .. ""		else tones[i] = "" end end return tones end

function wugniu_conv(text) return generic_syl_conv(text, wugniu_initial, wugniu_final, wugniu_syllabic) end

function export.wugniu(original_text) local reading = convert_by_parts(original_text, wugniu_conv, wugniu_tone_conv, false) return table.concat(reading, ", ") end

return export