Module:User:Littlepenny413/yue-pron

local export = {}

local gsub = mw.ustring.gsub local replace = mw.ustring.gsub local match = mw.ustring.match local split = mw.text.split local itersplit = mw.text.gsplit

local super_numbers = { '¹', '²', '³', '⁴', '⁵' } local function to_super(text) return replace(text, '[1-5]', super_numbers) end

local entering_tones = { ["1"] = "7", ["3"] = "8", ["6"] = "9" }

local ipa_preprocess = { [1] = {"a", "ă"}, [2] = {"yu", "y"}, [3] = {"ăă", "a"}, [4] = {"uk", "ŭk"}, [5] = {"ik", "ĭk"}, [6] = {"ou", "ŏu"}, [7] = {"eoi", "eoy"}, [8] = {"ung", "ŭng"}, [9] = {"ing", "ĭng"}, [10] = {"ei", "ĕi"} }

local ipa_initial = { ["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", ["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l", ["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["gw"] = "kʷ", ["kw"] = "kʷʰ", ["zh"] = "t͡ɕ", ["ch"] = "t͡ɕʰ", ["sh"] = "ɕ", ["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s", ["h"] = "h", ["w"] = "w", ["j"] = "j", [""] = "" }

local ipa_nucleus = { ["a"] = "ɑː", ["ă"] = "ɐ", ["e"] = "ɛː", ["ĕ"] = "e", ["i"] = "iː", ["ĭ"] = "ɪ", ["o"] = "ɔː", ["ŏ"] = "o", ["oe"] = "œː", ["eo"] = "ɵ", ["u"] = "uː", ["ŭ"] = "ʊ", ["y"] = "yː" }

local ipa_coda = { ["i"] = "i̯", ["u"] = "u̯", ["y"] = "y̯", ["m"] = "m", ["n"] = "n", ["ng"] = "ŋ", ["p"] = "p̚", ["t"] = "t̚", ["k"] = "k̚", [""] = "" }

local ipa_tone = { ["1"] = "⁵⁵ ", ["2"] = "³⁵", 	["3"] = "³³", 	["4"] = "²¹", 	["5"] = "²³", 	["6"] = "²²", 	["7"] = "⁵", 	["8"] = "³", 	["9"] = "²", 	[""] = "" }

local ipa_tone_sandhi = { ["-"] = "⁻", [""] = "" }

local ipa_syllabic = { ["m"] = "m̩", ["ng"] = "ŋ̍" }

local acute_accents = { ["a"] = "á", ["e"] = "é", ["i"] = "í", ["o"] = "ó", ["u"] = "ú" }

local grave_accents = { ["a"] = "à", ["e"] = "è", ["i"] = "ì", ["o"] = "ò", ["u"] = "ù" }

local macrons = { ["a"] = "ā", ["e"] = "ē", ["i"] = "ī", ["o"] = "ō", ["u"] = "ū" }

local jyutping_final = { ["a"] = "aa", ["el"] = "eu", ["eu"] = "oe", ["eui"] = "eoi", ["eun"] = "eon", ["eung"] = "oeng", ["eut"] = "eot", ["euk"] = "oek", [""] = "" } local sidney_final = { ["o"] = "oh", ["ou"] = "o", ["el"] = "eu", ["u"] = "oo", ["ui"] = "ooi", ["un"] = "oon", ["ut"] = "oot", ["eui"] = "ui", ["eun"] = "un", ["eut"] = "ut", ["eu"] = "euh", ["yu"] = "ue", ["yun"] = "uen", ["yut"] = "uet", [""] = "" } local canton_pinyin_final = { ["a"] = "aa", ["el"] = "eu", ["eu"] = "oe", ["eui"] = "oey", ["eun"] = "oen", ["eung"] = "oeng", ["eut"] = "oet", ["euk"] = "oek", [""] = "" } local m_tone={["m1"]="m̄", ["m2"]="ḿ", ["m3"]="m", ["m4"]="m̀", ["m5"]="ḿh", ["m6"]="mh", ["M1"]="M̄", ["M2"]="Ḿ", ["M3"]="M", ["M4"]="M̀", ["M5"]="Ḿh", ["M6"]="Mh"} local ng_tone={["ng1"]="n̄g", ["ng2"]="ńg", ["ng3"]="ng", ["ng4"]="ǹg", ["ng5"]="ńgh", ["ng6"]="ngh", ["Ng1"]="N̄g", ["Ng2"]="Ńg", ["Ng3"]="Ng", ["Ng4"]="Ǹg", ["Ng5"]="Ńgh", ["Ng6"]="Ngh"} local function yale_tone(final,b) if b == "5" or b == "6" then final = mw.ustring.gsub(final, "([ptkmn]?g?)$", "h%1", 1) end if b == "1" then final = mw.ustring.gsub(final, "[aeiou]", macrons, 1) end if b == "4" then final = mw.ustring.gsub(final, "[aeiou]", grave_accents, 1) end if b == "2" or b == "5" then final = mw.ustring.gsub(final, "[aeiou]", acute_accents, 1) end return final end function export.yale_to_ipa(text) if type(text) == "table" then text = text.args[1] end

text = text:lower text = text:gsub("ch", "c"):gsub("j", "z") text = text:gsub("y([^u])", "j%1") text = text:gsub("([aeiou][aeiou]?[iumngptk]?[g]?)", jyutping_final) text = text:gsub("%.%.%.", " ") text = text:gsub(",", "隔"):gsub("隔 ", ", ") local reading = mw.text.split(text, "隔") for i = 1,#reading do		reading[i] = reading[i]:gsub("([1-6])([a-z])", "%1 %2"):gsub("[^a-z1-6%- ]", "") local syllable = mw.text.split(reading[i], " ") for i = 1,#syllable do			syllable[i] = syllable[i]:gsub("([zcs])yu", "%1hyu") syllable[i] = syllable[i]:gsub("([zc])oe", "%1hoe") syllable[i] = syllable[i]:gsub("([zc])eo", "%1heo") if (not mw.ustring.match(syllable[i], "^[bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?[aeiouy]+[mnptk]?g?[1-9][%-%*]?[1-9]?$") and not mw.ustring.match(syllable[i], "^h?[mn]g?[1-9][%-%*]?[1-9]?$")) and mw.ustring.match(syllable[i], "[a-z1-9]") then error("粵語拼音錯誤") end syllable[i] = syllable[i]:gsub("^(h?)([mn]g?)([1-6])([%-%*]?)([1-6]?)$",				function(a,b,c,d,e) return ipa_initial[a] .. ipa_syllabic[b] .. ipa_tone[c] .. ipa_tone_sandhi[d] .. ipa_tone[e] end) syllable[i] = syllable[i]:gsub("([ptk])([136])",				function(a,b) return a .. entering_tones[b] end) syllable[i] = syllable[i]:gsub("([ptk][1-9][%-%*])([136])$",				function(a,b) return a .. entering_tones[b] end) for regex_idx,regex_pair in ipairs(ipa_preprocess) do				syllable[i] = mw.ustring.gsub(syllable[i],regex_pair[1],regex_pair[2]) end syllable[i] = mw.ustring.gsub(syllable[i], "([bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?)([aăeĕiĭoŏuŭy][eo]?)([iuymngptk]?g?)([1-9])([%-%*]?)([1-9]?)",				function(a,b,c,d,e,f)					return (ipa_initial[a] or error(("Unrecognised initial: \"%s\""):format(a))) ..						(ipa_nucleus[b] or error(("Unrecognised nucleus: \"%s\""):format(b))) ..						(ipa_coda[c] or error(("Unrecognised coda: \"%s\""):format(c))) ..						(ipa_tone[d] or error(("Unrecognised tone: \"%s\""):format(d))) ..						ipa_tone_sandhi[e] ..						(ipa_tone[f] or error(("Unrecognised tone: \"%s\""):format(f)))				end) end reading[i] = table.concat(syllable, " ") end return table.concat(reading, "/, /") end

function export.yale_to_jyutping(text) if type(text) == "table" then text = text.args[1] end

text = "隔" .. text text = text:gsub("[1-6]%-", "") text = text:gsub("ch", "c"):gsub("Ch", "C"):gsub("j", "z"):gsub("J", "Z") text = text:gsub("y([^u])", "j%1"):gsub("Y([^u])", "J%1") text = text:gsub("([^a-zA-Z])yu", "%1jyu"):gsub("([^a-zA-Z])Yu", "%1Jyu") text = text:gsub("([aeiou][aeiou]?[iumngptk]?[g]?)", jyutping_final) text = text:gsub("隔", "") return text end function export.yale_to_sidney(text) if type(text) == "table" then text = text.args[1] end text = "隔" .. text text = text:gsub("([^a-zA-Z])yu", "%1yyu"):gsub("([^a-zA-Z])Yu", "%1Yyu") text = text:gsub("yu", "ue") text = text:gsub("([aeiou][aeiou]?[iumngptk]?[g]?)", sidney_final) text = text:gsub("%-2", "*") text = text:gsub("%-1", "°") text = text:gsub("1", "1°") text = text:gsub("([°*1-6]+)", "%1") text = text:gsub("隔", "") return text end function export.yale_to_cantonese_pinyin(text) if type(text) == "table" then text = text.args[1] end text = "隔" .. text text = text:gsub("ch", "ts"):gsub("Ch", "Ts"):gsub("j", "dz"):gsub("J", "Dz") text = text:gsub("([^a-zA-Z])yu", "%1jyu"):gsub("([^a-zA-Z])Yu", "%1Jyu") text = text:gsub("y([^u])", "j%1"):gsub("Y([^u])", "J%1") text = text:gsub("yu", "y"):gsub("Yu", "Y") text = text:gsub("([aeiou][aeiou]?[iumngptk]?[g]?)", canton_pinyin_final) text = text:gsub("([ptk])([136])", function(a,b) return a .. entering_tones[b] end) text = text:gsub("([1-9][%-]?[1-9]?)", "%1") text = text:gsub("隔", "") return text end function export.yale_to_guangdong(text) if type(text) == 'table' then text = text.args[1] end text = "隔" .. text

text = text:gsub("[1-6]%-", "") text = text:gsub("eui", "êu") text = text:gsub("eu", "ê") text = text:gsub("e", "é") text = text:gsub('([^a])a([1-6])', "%1aa%2") text = text:gsub("([^a])a([^a1-6])", "%1e%2") text = text:gsub("aa", "a") text = text:gsub("j", "z"):gsub("J", "Z"):gsub("([Cc])h", "%1") text = text:gsub("([^a-zA-Z])yu", "%1yyu"):gsub("([^a-zA-Z])Yu", "%1Yyu") text = text:gsub("yu", "ü") local palatal = { ['z']='j', ['c']='q', ['s']='x', ['Z']='J', ['C']='Q', ['S']='X' } text = text:gsub('([zcsZCS])([iü])', function(a,b) return palatal[a] .. b end ) text = text:gsub('([jqxyêJQXY])ü', '%1u') text = text:gsub('([kg])w', '%1u') text = text:gsub('au', 'ao'):gsub('el', 'eo') local final = { ['p']='b', ['k']='g', ['t']='d' } text = text:gsub('([pkt])(%d)', function(a,b) return final[a] .. b end ) text = text:gsub('%d%-(%d)', '%1') text = text:gsub("([1-6])", "%1") text = text:gsub("隔", "") return text end

function yale_to_diacritic(text) text = "隔" .. text:gsub("[1-6]%-", "") text = text:gsub("([aeiou]ng[1-6])([aeouyw][1-6aeioumnptk])", "%1'%2") text = text:gsub("([aeiou][1-6])(ng[aeou][1-6aiumnptk])", "%1'%2") text = text:gsub("([aeiou]n[1-6])(g[aeouyw][1-6aeioumnptk])", "%1'%2") text = text:gsub("([^a][aeiu]m[1-6])([aeou][1-6aiumnptk])", "%1'%2") text = text:gsub("([aeiu][1-6])(m[aeou][1-6aiumnptk])", "%1'%2") text = text:gsub("([^a][aeiou]n[1-6])([aeouyg][1-6aeioumnptk])", "%1'%2") text = text:gsub("([aeiou][1-6])(n[aeouyg][1-6aeioumnptk])", "%1'%2") text = text:gsub("([^a][aeiou]p[1-6])([aeou][1-6aiunptk])", "%1'%2") text = text:gsub("([aeiu][1-6])(p[aeou][1-6aiunptk])", "%1'%2") text = text:gsub("([^a][aeiou]t[1-6])([aeouy][1-6aiumnptk])", "%1'%2") text = text:gsub("([^e][aeiou][1-6])(t[aouy][1-6aiumnptk])", "%1'%2") text = text:gsub("([^a][aeiou]k[1-6])([aeouyw][1-6aioumnptk])", "%1'%2") text = text:gsub("([aeiou][1-6])(k[aeouyw][1-6aioumnptk])", "%1'%2") text = text:gsub("([^A-Za-z])([Mm][1-6])", function(a,b) return a .. b:gsub(b, m_tone) end) text = text:gsub("([^A-Za-z])([Nn]g[1-6])", function(a,b) return a .. b:gsub(b, ng_tone) end)

text = text:gsub("隔", "")

text = text:gsub("([aeiou][aeiou]?[iumngptk]?[g]?)([1-6])", function(a,b) return yale_tone(a,b) end) text = text:gsub("(yu[tn]?)([1-6])",yale_tone) return text end

function export.yale_format(text) if type(text) == "table" then text = text.args[1] end if text:match("[7-9]") then error("粵語拼音聲調錯誤") end if text:match("[^1-6a-zA-Z %-&,]") or text:match("[qrvxzQRVXZ]") or text:match("[cC][^h]") or text:match("aa[1-6]") or text:match("eo") or text:match("oe") then error("粵語拼音錯誤") end local text_num = mw.clone(text) text=yale_to_diacritic(text) local reading = mw.text.split(text, ",") local reading_num = mw.text.split(text_num, ",") for i = 1,#reading_num do		local text = reading_num[i] reading_num[i] = "" .. text:gsub("([1-6])", "%1"):gsub(" -- ", "-") .. "" end return table.concat(reading, ", ") .. " (" .. table.concat(reading_num, ", ") .. ")" end

function export.yale_to_diacritic(text) return yale_to_diacritic(text) end

local corresp = { ['initial'] = { --['wiktionary'] = { 'ipa', 'gene chin', 'DLI', 'stephen li's simplified IPA' }

[]  = { ,    ,   ,   ''    },		['b']  = { 'p',   'b',  'p',   'p'  }, ['p'] = { 'pʰ',  'p',  'p’',  'p'  }, ['m'] = { 'm',   'm',  'm',   'm'  }, ['f'] = { 'f',   'f',  'f',   'f'  }, ['v'] = { 'v',   'v',  'w',   'v'  },

['d'] = { 't',   'd',  't',   'd'  }, ['t'] = { 'tʰ',  't',  't’',  't'  }, ['n'] = { 'ⁿd',  'n',  'n',   'n'  }, ['l'] = { 'l',   'l',  'l',   'l'  }, ['x'] = { 'ɬ',   'x',  'lh',  'ɬ'  },

['g'] = { 'k',   'g',  'k',   'g'  }, ['k'] = { 'kʰ',  'k',  'k’',  'k'  }, ['ng'] = { 'ŋ', 'ng', 'ng',  'ŋ'  },

['j'] = { 't͡s',  'j',  'ch',  'dz' }, ['ch'] = { 't͡sʰ', 'ch', 'ch’', 'ts' }, ['y'] = { 'ʒ',   'y',  'y',   'y'  },

['s'] = { 's',   's',  's',   's'  }, ['h'] = { 'h',   'h',  'h',   'h'  }, },

['final'] = { []    = { ,    ,      ,       ''    },

['a']   = { 'a',   'a符',   'a符',    'a'   }, ['ai']  = { 'ai',  'a符i',  'aai符',  'ai'  }, ['au']  = { 'au',  'a符o',  'aau符',  'ɔu'  }, ['am']  = { 'am',  'a符m',  'aa符m',  'am'  }, ['an']  = { 'an',  'a符n',  'aa符n',  'an'  }, ['ang'] = { 'aŋ',  'a符ng', 'aa符ng', 'aŋ'  }, ['ap']  = { 'ap̚',  'a符p',  'aa符p',  'ap'  }, ['at']  = { 'at̚',  'a符t',  'aa符t',  'at'  }, ['ak']  = { 'ak̚',  'a符k',  'aa符k',  'ak'  },

['i']   = { 'i',   'i符',   'i符',    'i'   }, ['iu']  = { 'iu',  'iu符',  'iu符',   'iu'  }, ['im']  = { 'im',  'i符m',  'i符m',   'im'  }, ['in']  = { 'in',  'i符n',  'i符n',   'in'  }, ['ing'] = { 'iŋ',  'i符ng', 'i符ng',  'iŋ'  }, --only DLI has this ['ip']  = { 'ip̚',  'i符p',  'i符p',   'ip'  }, ['it']  = { 'it̚',  'i符t',  'i符t',   'it'  },

['ie']  = { 'iɛ',  'e符h',  'e符',    'ia'  }, ['iau'] = { 'iau', 'e符l',  'iau符',  'iau' }, ['iam'] = { 'iam', 'e符m',  'ie符m',  'iam' }, ['ian'] = { 'ian', 'e符n',  'ie符n',  'ian' }, ['iang'] = { 'iaŋ', 'e符ng', 'ia符ng', 'iaŋ' }, ['iap'] = { 'iap̚', 'e符p',  'ie符p',  'iap' }, ['iak'] = { 'iak̚', 'e符k',  'ia符k',  'iak' },

['u']   = { 'u',   'u符',   'oo符',   'u'   }, ['ui']  = { 'ui',  'ui符',  'ooi符',  'ui'  }, ['un']  = { 'un',  'u符n',  'oo符n',  'un'  }, ['ung'] = { 'uŋ',  'u符ng', 'u符ng',  'ǝŋ'  }, ['ut']  = { 'ut̚',  'u符t',  'oo符t',  'ut'  }, ['uk']  = { 'uk̚',  'u符k',  'u符k',   'uk'  }, --stephen li don't have this

['ei']  = { 'ei',  'i符',   'ei符',   'i'   }, ['eu']  = { 'eu',  'e符o',  'aau符',  'ǝu'  }, ['em']  = { 'em',  'e符im', '?',      'ǝm'  }, ['en']  = { 'en',  'e符in', 'ie符n',  'ein' }, ['ep']  = { 'ep̚',  'e符ip', '?',      'ǝp'  }, ['et']  = { 'et̚',  'e符ik', '?',      'ɛt'  },

['o']   = { 'ɔ',  'o符',   'o符',    'ɔ'   }, ['oi']  = { 'ɔi', 'o符i',  'oi符',   'ɔi'  }, ['on']  = { 'ɔn', 'o符n',  'o符n',   'ɔn'  }, ['ong'] = { 'ɔŋ', 'o符ng', 'o符ng',  'ɔŋ'  }, ['ot']  = { 'ɔt̚', 'o符t',  'o符t',   'ɔt'  }, ['ok']  = { 'ɔk̚', 'o符k',  'o符k',   'ɔk'  },

['m']   = { 'm̩',   'm符',   'm符',    'm'   } },

['tone_gene'] = { '̄', '̈', '̃', '̀', '̂'}, ['tone_dli'] = { '', '̀', '̄', '̂', '̣̄' }, ['tone_stephen'] = { '55', '33', '22', '32', '21' }, ['tone_change_stephen'] = { '55', '335', '225', '325', '215' } }

function export.hoi_convert(text, scheme) if type(text) == "table" then text, scheme = text.args[1], text.args[2] end text=text:gsub('%s%s', '%s') text=text:gsub("[1-5]%-", "") text=text:gsub("([1-5])*", "*%1") local result = {} for reading in itersplit(text, ',') do		local word_result = {} for word in itersplit(reading, ' ') do			local syllable_result = {} word = word:gsub("([1-5])", "%1 ") if text:match('^%s') or text:match('%s$') then error('invalid spacing') end word=word:gsub('^%s', ):gsub('%s$', ); for syllable in itersplit(word, ' ') do				if syllable:match('^%u') then uppercase = true else uppercase = false end syllable = syllable:lower local initial, final, tone_ch, tone = mw.ustring.match(syllable, '([bpmfvdtnlxgkjcysh]?[hg]?)([aeioumngptk]+)([*]?)(%d)') if final ==  then final, initial = initial,  end if scheme == 'IPA' then local t = { "⁵⁵", "³³", "²²", "³²", "²¹" } local t_changed = { "⁵⁵", "³³", "²²", "³²", "²¹" } local id = 1 initial = corresp.initial[initial][id] final = corresp.final[final][id] if tone_ch~='' then tone=t_changed[tonumber(tone)] else tone=t[tonumber(tone)] end table.insert(syllable_result, initial .. final .. tone) elseif scheme == 'Gene' then local id = 2 initial = corresp.initial[initial][id] final = corresp.final[final][id] final = replace(final, '符', corresp.tone_gene[tonumber(tone)]) if tone_ch == '*' then tone_ch = '/' end if uppercase then initial = initial:gsub('^%l', mw.ustring.upper) end table.insert(syllable_result, initial .. final .. tone_ch) elseif scheme == 'DLI' then local id = 3 initial = corresp.initial[initial][id] final = corresp.final[final][id] final = replace(final, '符', corresp.tone_dli[tonumber(tone)]) if uppercase then initial = initial:gsub('^%l', mw.ustring.upper) end table.insert(syllable_result, initial .. final .. tone_ch) elseif scheme == 'Stephen' then local id = 4 initial = corresp.initial[initial][id] final = corresp.final[final][id] if tone_ch~='' then tone=corresp.tone_change_stephen[tonumber(tone)] else tone=corresp.tone_stephen[tonumber(tone)] end if uppercase then initial = initial:gsub('^%l', mw.ustring.upper) end table.insert(syllable_result, initial .. final .. to_super(tone)) end end table.insert(word_result, table.concat(syllable_result, '')) end table.insert(result, table.concat(word_result, ' ')) end return table.concat(result, ', ') end

return export