Module:cmn-pron-Nanjing

-- Nanjing mandarin

local export = {}

-- see the encoding below local initials = { b = "p", p = "pʰ", m = "m", f = "f", d = "t", t = "tʰ", l = "l", g = "k", k = "kʰ", h = "x", j = "t͡ɕ", q = "t͡ɕʰ", x = "ɕ", Z = "t͡ʂ", C = "t͡ʂʰ", S = "ʂ", r = "ʐ", z = "t͡s", c = "t͡sʰ", s = "s", [""] = "", }

-- see the encoding below (U=ü, N=ng) local finals = { ii = "z̩", iU = "ʐ̩", i = "i", u = "u", U = "y", a = "a", ia = "ia", ua = "ua", o = "o", e = "e", E = "ə", ie = "ie", Ue = "ye", ai = "ɛ", iai = "iɛ", uai = "uɛ", ei = "əi", ui = "uəi", ao = "ɔ", iao = "iɔ", ou = "əɯ", iu = "iəɯ", an = "ã", ian = "iã", uan = "uã", ien = "iẽ", Uen = "yẽ", en = "ə̃", ["in"] = "ĩ", un = "uə̃", Un = "yĩ", on = "oŋ", ion = "ioŋ", iUq = "ʐ̩ʔ", iq = "iʔ", uq = "uʔ", Uq = "yʔ", aq = "aʔ", iaq = "iaʔ", uaq = "uaʔ", eq = "əʔ", ieq = "ieʔ", ueq = "ueʔ", Ueq = "yeʔ", oq = "oʔ", ioq = "ioʔ", er = "ər", ir = "iər", ur = "uər", Ur = "yər", ar = "ar", iar = "iar", uar = "uar", ["or"] = "or", ior = "ior", ier = "ier", air = "ɛr", iair = "iɛr", uair = "uɛr", aor = "ɔr", iaor = "iɔr", anr = "ãr", ianr = "iãr", uanr = "uãr", enr = "ɵ̃r", m = "m̩", n = "n̩", N = "ŋ̍", }

local tones = { ["1"] = "31", --陰平(T1) ["2"] = "24", --陽平(T2) ["3"] = "11", --上(T3) ["4"] = "44", --去(T4) ["5"] = "5", --入(T5) ["0"] = "", -- toneless (T0) ["11"]="33",	["25"]="11", ["20"]="11",	["31"]="12", ["33"]="12",	["45"]="42",	["55"]="3", }

local function tone_superscript(text) text = text:gsub("[1-5]",{['1']='¹',['2']='²',['3']='³',['4']='⁴',['5']='⁵'}) return text end

local tone_sandhi_num = { ["11"]="4",	["25"]="3", ["20"]="3",	["31"]="2", ["33"]="2",	["45"]="1", }

-- internal use, encode and decode digraphs local digraph_encode = { ng = "N", zh = "Z", ch = "C", sh = "S", ["\204\128"] = "1",	["\204\129"] = "2",	["\204\140"] = "3",	["\204\132"] = "4",	["\204\138"] = "5", } local digraph_decode = { N = "ng", Z = "zh", C = "ch", S = "sh", U = "ü", ["0"] = "",	["1"] = "\204\128",	["2"] = "\204\129",	["3"] = "\204\140",	["4"] = "\204\132",	["5"] = "\204\138",	["6"] = ' ',	["7"] = " ", } local function encode(text) text = mw.ustring.toNFD(text) :gsub("[A-Z]",function(c) return "^"..c:lower end) :gsub("u\204\136","U") :gsub("[bpnzcs\204][vfgh\128\129\132\138\140]",digraph_encode) :gsub("n([1-5])g","N%1") return text end local function decode_error(text) text = text:gsub("[NZCSU]",digraph_decode) return text end local function decode(text) text = text :gsub("N([0-5])","n%1g") :gsub("[NZCSU1-7]",digraph_decode) :gsub("%^([a-z])",string.upper) return mw.ustring.toNFC(text) end

-- check that the text is a valid input e.g. ^lan2jin1 ^beq5hua4 local function check_syllable_format(text) local check = text:gsub("[ /]?[%^>]?[bpmfdtlgkhjqxZCSrzcsyw]?[aeiouUmnN][aeiou]*[nq]?r?[0-5]","") if check ~= "" then error("Nanjing: Invalid syllable(s): "..check) end end

-- TODO: inverse of export.py_divide_syllables local function py_join_syllables(text) text = text :gsub("([bpmfdtlgkhjqxZCSrzcsyw]?)([aeiouUmnN][aeiou]*[nq]?r?)([0-5])", function(a,b,c)		 	local d,e = b:match("^([iuU]?[aeiouU])(%l*)$")		 	if d then				return "'"..a..d..(c~="0" and c or "")..e			else				return "'"..a..b..(c~="0" and c or "")			end	 	end) :gsub("'([bpmfdtlgkhjqxZCSrzcsyw][aeiouU])","%1") :gsub("%f[^ %z]'","") return decode(text) end

-- Lánjìn Be̊qhuā --> ^lan2jin1 ^beq5hua4 local function py_divide_syllables(text) local res = encode(text) :gsub("([aeiouU1-5])N%f[aeiouU]","%1n'g") :gsub("'?([bpmfdtlgkhjqxZCSrzcsyw][aeiouU])","'%1") :gsub("'?([bpmfdtlgkhjqxZCSrzcsyw]?[iuU]?[aeiouUmnN])([1-5]?)([aeiou]*[nq]?r?)",			function(a,b,c) return a..c..(b~="" and b or "0") end) check_syllable_format(res) local check = py_join_syllables(res) if text ~= check then error("Nanjing: input should be "..check) end return res end

local function py_numbered(text) text = text:gsub("[0-5]","%0") :gsub("[NZCSU67]",digraph_decode) return text end

-- canonize to adhere to pinyin rules, e.g. jü -> ju local function py_canonize(text) text = text :gsub("([jqx])U","%1u") :gsub("%f[%l%u]u[in]?",{u="w",ui="wei",un="wen"}) :gsub("%f[%l%u]w%f[qr0-5]","wu") :gsub("%f[%l%u]i[uU]?",{i="y",iu="you",iU="rii"}) :gsub("%f[%l%u]y%f[nqr0-5]","yi") :gsub("iU","ii") :gsub("%f[%l%u]U","yu") :gsub("([ZCSr])i%f[qr0-5]","%1ii") -- give error for zhi :gsub("E","e") return text end

-- normalize to initial+final, e.g. ju -> jü local function py_normalize(text) local res = text :gsub("([jqx])u","%1U") :gsub("w[ue][in]?",{wu="u",wei="ui",wen="un"}) :gsub("w","u") :gsub("%f[%l%u]y[iuo]u?",{yi="i",yu="U",you="iu"}) :gsub("%f[%l%u]y","i") :gsub("([ZCSr])ii","%1iU") :gsub("riU%f[q0-5]","iU") :gsub("([bpmfdtlgkhjqxZCSrzcs])e0","%1E0") local check = py_canonize(res) if text ~= check then error("Nanjing: "..decode_error(text).." should be "..decode_error(check)) end return res end

local function py_to_ipa(text) text = text:gsub("[^ ]+",function(syllable)		local a,b,c,d = syllable:match("^([bpmfdtlgkhjqxZCSrzcs]?)([aeiouUEmnN][aeiouU]*[nq]?r?)([0-5])([0-5]?)$")		if not a then error("Nanjing: Invalid syllable: " .. decode_error(syllable)) end		local e = d~="" and tones[c..d]		return (initials[a] or error("Nanjing: Invalid initial: " .. decode_error(a)))			.. (finals[b] or error ("Nanjing: Invalid final: " .. decode_error(b)))			.. tones[c]			.. (e and ("⁻"..e) or "")		end) return "/" .. text .. "/" end

-- returns (display_text, phonetic_text, ipa) function export.py_process(text) local conv_display = {} local conv_hidden = {} local conv_numbered = {} local conv_ipa = {} local i = 0 for reading in mw.text.gsplit(text,"/",true) do		i = i + 1 reading = py_divide_syllables(reading) conv_display[i] = py_join_syllables(reading:gsub(">([a-zZCSUN]+[0-5])","→%1")) local original = reading:gsub("([a-zZCSUN]+[0-5])>[a-zZCSUN]+[0-5]","%1") local phonetic = reading:gsub("[a-zZCSUN]+[0-5]>([a-zZCSUN]+[0-5])","6%17") phonetic = phonetic:gsub("%^","") reading = phonetic:gsub("%f[^0-5](7?6?[a-zZCSUN]+)([0-5])","%2%1%2") phonetic = reading:gsub("([a-zZCSUN]+)([0-5])([0-5])",function(a,b,c)			local d = tone_sandhi_num[b..c]			return d and ('6'..a..d..'7') or (a..b)		end) phonetic = phonetic:gsub("([a-zZCS][a-zU]+)r3","6%1r27") phonetic = phonetic:gsub("6+","6"):gsub("7+","7") reading = reading:gsub("([a-zZCS][a-zU]+)r3","%1r2"):gsub("r2[0-5]","r2") local original_num = original:gsub("([0-5]) ?","%1 "):gsub(" $",""):gsub("%^","") local phonetic_num = phonetic:gsub("([0-5]7?) ?","%1 "):gsub(" $","") if phonetic:find("6") then conv_hidden[i] = py_join_syllables(original) .. " [Phonetic: " .. py_join_syllables(phonetic) .. "]"			conv_numbered[i] = py_numbered(original_num) .. " [Phonetic: " .. py_numbered(phonetic_num) .. "]"		else conv_hidden[i] = py_join_syllables(original) conv_numbered[i] = py_numbered(original_num) end reading = reading:gsub("[67]",""):gsub("([0-5][0-5]?) ?","%1 "):gsub(" $","") reading = py_normalize(reading) conv_ipa[i] = py_to_ipa(reading) end return table.concat(conv_display, " / "), table.concat(conv_hidden, " / "), table.concat(conv_numbered, " / "), tone_superscript(table.concat(conv_ipa, ", ")) end

return export