Module:yue-pron

local export = {} local m_string_utils = require("Module:string utilities")

local gsplit = m_string_utils.gsplit local gsub = m_string_utils.gsub local len = m_string_utils.len local lower = m_string_utils.lower local split = m_string_utils.split

local entering = { p = 1, t = 1, k = 1 }

local entering_tones = { ["1"] = "7", ["3"] = "8", ["6"] = "9" }

local ipa_allophones = { ei = "eri", eoi = "eoy", ing = "irng", ik = "irk", ou = "oru", ung = "urng", uk = "urk", }

local ipa_initial = { ["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", ["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l", ["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["gw"] = "kʷ", ["kw"] = "kʷʰ", -- ["zh"] = "t͡ʃ", ["ch"] = "t͡ʃʰ", ["sh"] = "ʃ", ["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s", ["h"] = "h", ["w"] = "w", ["j"] = "j", [""] = "" }

-- vowels with "r" only appear as allophones and should not appear in the input local ipa_nucleus = { ["aa"] = "aː", ["a"] = "ɐ", ["e"] = "ɛː", ["er"] = "e", ["i"] = "iː", ["ir"] = "ɪ", ["o"] = "ɔː", ["or"] = "o", ["oe"] = "œː", ["eo"] = "ɵ", ["u"] = "uː", ["ur"] = "ʊ", ["yu"] = "yː" }

local ipa_coda = { ["i"] = "i̯", ["u"] = "u̯", ["y"] = "y̯", ["m"] = "m", ["n"] = "n", ["ng"] = "ŋ", ["p"] = "p̚", ["t"] = "t̚", ["k"] = "k̚", [""] = "" }

local ipa_tone = { ["1"] = "⁵⁵ ", ["2"] = "³⁵", 	["3"] = "³³", 	["4"] = "²¹ ", ["5"] = "¹³", 	["6"] = "²²", 	["7"] = "⁵", 	["8"] = "³", 	["9"] = "²", 	[""] = "" }

local ipa_tone_sandhi = { ["-"] = "⁻", [""] = "" }

local ipa_syllabic = { ["m"] = "m̩", ["ng"] = "ŋ̍" }

-- display `main`, but show `option` on hovering local function alt(main,option) return ' ' .. main .. ' ' end

local acute = { a="á", e="é", i="í", o="ó", u="ú", m="ḿ", ng="ńg" }

local grave = { a="à", e="è", i="ì", o="ò", u="ù", m="m̀", ng="ǹg" }

local macron = { a="ā", e="ē", i="ī", o="ō", u="ū", m="m̄", ng="n̄g" }

-- "?" indicates finals that are not supported by Yale local yale_final = { ["a"] = "?", ["aa"] = "a", ["eu"] = "?", ["em"] = "?", ["en"] = "?", ["ep"] = "?", ["et"] = "?", ["om"] = "?", ["op"] = "?", ["um"] = "?", ["up"] = "?", ["oe"] = "eu", ["oem"] = "?", ["oen"] = "?", ["oeng"] = "eung", ["oep"] = "?", ["oet"] = "?", ["oek"] = "euk", ["eoi"] = "eui", ["eon"] = "eun", ["eot"] = "eut", }

-- The core function to handle conversion to Yale. -- For non-syllabics, there is exactly one vowel cluster in the syllable: -- the first vowel is inputted as `a`, and the rest of the vowels is `b`, and `t` is the tone. --- (e.g. "keui" -> a="e",b="ui"; "keung" -> a="e",b="u") -- Conversion rule: --- if `t` is 4, 5, or 6, then "h" is inserted after `b`. --- if `t` is 1, 2, 4, or 5, then the corresponding accent is put on `a`. -- (finally, for syllabics, the whole syllabic is `a`, and `b` is empty) local function yale_tone(a,b,t) local h = "" if t == "4" or t == "5" or t == "6" then h = "h" end if t == "1" then a = alt(macron[a], grave[a]) end if t == "4" then a = grave[a] end if t == "2" or t == "5" then a = acute[a] end return a..b..h end

function export.jyutping_to_ipa(text) if type(text) == "table" then text = text.args[1] end text = text:gsub(", "," "):gsub("%.%.%.", " "):gsub(" $",""):gsub(" / ","/, /") -- :gsub("([zcs])yu", "%1hyu") -- :gsub("([zcs])oe", "%1hoe") -- :gsub("([zcs])eo", "%1heo") :gsub("(%l+)(%d)(%-?)(%d?)", function(main,tone,symbol,tone2)			-- try initial+final			local initial, final = main:match("^([bpmfdtnlgknzcshwj]?[gw]?)([aeiouy]%l*)$")			if not initial then				-- otherwise try initial+syllabic				local syllabic				initial, syllabic = main:match("^(h?)([mn]g?)$")				if not initial then					error("Invalid Jyutping syllable: " .. main)				end				main = ipa_initial[initial]					.. (ipa_syllabic[syllabic] or error("Unrecognised syllabic: " .. syllabic)) -- really?			else				-- e.g. convert  (which would be */ɛːi̯/) to (/ei̯/)				final = ipa_allophones[final] or final				local nucleus, coda = final:match("^(y?[aeiou][aeor]?)([iuymnptk]?g?)$")				if not nucleus then					error("Invalid Jyutping final: " .. final)				end				if entering[coda] then					tone = entering_tones[tone] or tone					tone2 = entering_tones[tone2] or tone2				end				main = (ipa_initial[initial] or error(("Unrecognised initial: \"%s\""):format(initial)))					.. (ipa_nucleus[nucleus] or error(("Unrecognised nucleus: \"%s\""):format(nucleus)))					.. (ipa_coda[coda] or error(("Unrecognised coda: \"%s\""):format(coda)))			end			return main .. ipa_tone[tone] .. ipa_tone_sandhi[symbol] .. ipa_tone[tone2]		end) return text end

function export.jyutping_to_yale(text) if type(text) == "table" then text = text.args[1] end text = text:gsub("jy?","y") :gsub("[cz]",{z="j",c="ch"}) --:gsub("[1-6]%-","") :gsub("(%l+)(%d)(%-?)(%d?)", function(main,tone,symbol,tone2)			if tone2 ~= "" then				tone = tone2			end			-- find the first vowel letter			local initial,final = main:match("^([^aeiou]*)([aeiou].*)$")			local a,b,c			if initial then				final = yale_final[final] or final				if final == "?" then					return "?"				end				a,b,c = final:match("^([aeiou])([aiu]*)([mnptk]?g?)$")			else -- otherwise it is a syllabic				initial,a = main:match("(h?)([mn]g?)")				b,c = "",""			end			return initial..yale_tone(a,b,tone)..c		end) if text:find("?") then return false end return text end

function export.jyutping_to_cantonese_pinyin(text) if type(text) == "table" then text = text.args[1] end if text:find("oe[mnpt][^g]") then -- unsupported finals return false end text = text:gsub("yu","y") :gsub("eo[int]",{eoi="oey",eon="oen",eot="oet"}) :gsub("[zc]",{z="dz",c="ts"}) :gsub("([ptk])([1-6])(%-?)([1-6]?)",function(a,b,c,d)			return a .. (entering_tones[b] or b) .. c .. (entering_tones[d] or d)		end) return text end

function export.jyutping_to_guangdong(text) if type(text) == 'table' then text = text.args[1] end -- unsupported finals if text:find("%f[a]a%d") or text:find("oe[mnpt][^g]") or text:find("[ou][mp]") or text:find("e[un][^g]") then return false end text = text:gsub("yu","ü") :gsub("j","y") :gsub("[zcs]%f[iü]",{z="j",c="q",s="x"}) -- ü=\xC3\xBC :gsub("([jqxy])ü","%1u") :gsub("eoi","êu") :gsub("[aeo][aeo]?",{aa="a",a="e",e="é",oe="ê",eo="ê"}) :gsub("([ae])u","%1o") :gsub("([gk])w","%1u") :gsub("[ptk]%f[%d]",{p="b",t="d",k="g"}) return text end

-- substitute changed tones for finding homophones function export.jyutping_format(text) text = text:gsub("[1-6]%-([1-6])", "%1") return split(text," / ") end

local function Consolas(text) return '' .. text .. " " end

local function format_IPA(text) return ' ' .. text .. " " end

local function make_superscript(text) return (text:gsub("%d[%d%*%-]*%f[^%d%*]", "%0")) end

-- the only allowed punctuations are: --- ", ": represents a comma (or a break of any sort) --- "...": represents a slot where a text can go to (e.g. 一……就……) --- ",": separates alternate readings local function validate(c_rom) if c_rom:find("[7-9]") then error("Invalid tone in Jyutping.") end if c_rom:find("[A-Z]") then error("Please do not capitalize the Jyutping.") end if c_rom:find("%-[a-z]") then error("Please do not hyphenate the Jyutping.") end if c_rom:find("[0-9][a-z]") then error("Error in Jyutping: please use spaces to separate syllables.") end if c_rom:find("[zcs]h") then error("'zh'/'ch'/'sh' are non-valid Jyutping, use 'z'/'c'/'s' instead.") end if c_rom:find("y[^u]") then error("Wrong usage of 'y' in Jyutping.") end if c_rom:find("oei") then error("Invalid rime oei in Jyutping. Did you mean eoi?") end if c_rom:find("eong") then error("Invalid rime eong in Jyutping. Did you mean oeng?") end if c_rom:find("eok") then error("Invalid rime eok in Jyutping. Did you mean oek?") end if c_rom:find("r") then error("Invalid letter \"r\" in Jyutping.") end if c_rom:find("%d%d") then error("Invalid Jyutping: please use a hyphen to indicate a changed tone.") end c_rom = c_rom:gsub("^%.%.%.",""):gsub("%.%.%.%f[%z,]",""):gsub("%.%.%."," "):gsub(", ?"," ") if c_rom:find("^ ") or c_rom:find(" ") or c_rom:find(" $") then error("Empty syllable detected.") end if c_rom:find("[^a-z1-6%- ]") then error("Invalid character found.") end -- ensure that each syllable matches `^%l+%d%-?%d?$` for syllable in c_rom:gmatch("%S+") do		if not syllable:match("^%l+%d%-?%d?$") then error("Invalid Jyutping syllable: " .. syllable) end end end

-- generate the shown text of Standard Cantonese -- if the pagename is one character long, then generate links to all the readings function export.show_standard(c_rom, is_single_hanzi) validate(c_rom) c_rom = c_rom:gsub(",%f[^ ,]"," / ") if is_single_hanzi then for reading in c_rom:gmatch("[^ ,./]+") do			require('Module:debug').track('yue-pron/'..reading) end c_rom = c_rom:gsub("(%l+)(%d%-?%d?)","%1%2") else c_rom = make_superscript(c_rom) end return c_rom end

-- generate the collapsed text of Standard Cantonese -- generate all the different romanisations, as well as homophones function export.hide_standard(c_rom, is_single_hanzi) local res = "" c_rom = c_rom:gsub(",%f[^ ,]"," / ") -- generate IPA first because the error-catching is located there local c_ipa = export.jyutping_to_ipa(c_rom) local c_yale = export.jyutping_to_yale(c_rom) local c_cp = export.jyutping_to_cantonese_pinyin(c_rom) local c_gd = export.jyutping_to_guangdong(c_rom) local c_hom = mw.loadData("Module:yue-pron/hom") local c_hom_exists = false for _,c_first in ipairs(export.jyutping_format(c_rom)) do		if c_hom[c_first] then c_hom_exists = c_first break end end res = res .. "\n** (Standard Cantonese, Guangzhou–Hong Kong) " if not c_hom_exists and not is_single_hanzi then res = res .. '  [' res = res .. tostring(mw.uri.fullUrl("Module:yue-pron/hom",{["action"]="edit"})) res = res .. " +] "	end res = res .. "\n*** Jyutping : " res = res .. Consolas(make_superscript(c_rom)) if c_yale then res = res .. "\n*** Yale : " res = res .. Consolas(c_yale) end if c_cp then res = res .. "\n*** Cantonese Pinyin : " res = res .. Consolas(make_superscript(c_cp)) end if c_gd then res = res .. "\n*** Guangdong Romanization : " res = res .. Consolas(make_superscript(c_gd)) end res = res .. "\n*** Sinological IPA (key) : " res = res .. format_IPA("/" .. c_ipa .. "/") if c_hom_exists then res = res .. '\n*** Homophones : "		res = res .. ""	end	return res end

function export.jyutping_headword(frame) local args = require("Module:parameters").process(frame:getParent.args, {		["head"] = {},	}) return require("Module:headword").full_headword{ lang = require("Module:languages").getByCode("yue"), sc = require("Module:scripts").getByCode("Latn"), heads = {make_superscript(args.head or mw.loadData("Module:headword/data").pagename)}, pos_category = "jyutping" } end

return export