Module:zhx-sic-pron

local export = {} local m_string_utils = require("Module:string utilities")

local find = m_string_utils.find local gsub = m_string_utils.gsub local match = m_string_utils.match local gmatch = m_string_utils.gmatch local gsplit = mw.text.gsplit local lower = m_string_utils.lower local upper = m_string_utils.upper

local initialConv = { ["b"] = "p", ["d"] = "t", ["g"] = "k", ["p"] = "pʰ", ["t"] = "tʰ", ["k"] = "kʰ", ["z"] = "t͡s", ["j"] = "t͡ɕ", ["c"] = "t͡sʰ", ["q"] = "t͡ɕʰ", ["m"] = "m", ["n"] = "n", ["ny"] = "nʲ", ["ng"] = "ŋ", ["f"] = "f", ["s"] = "s", ["x"] = "ɕ", ["h"] = "x", ["w"] = "v", ["r"] = "z", [""] = "", }

-- note that 'ir' is for internal use by the code and not used in actual sichuanese pinyin local finalConv = { ["ir"] = "z̩", ["er"] = "ɚ",

["a"] = "a", ["o"] = "o", ["e"] = "ɛ", ["ai"] = "ai", ["ei"] = "ei", ["ao"] = "au", ["ou"] = "əu", ["an"] = "an", ["en"] = "ən", ["ang"] = "aŋ", ["ong"] = "oŋ",

["i"] = "i", ["ia"] = "ia", ["ie"] = "iɛ", ["iai"] = "iɛi", ["iao"] = "iau", ["iu"] = "iəu", ["ian"] = "iɛn", ["in"] = "in", ["iang"] = "iaŋ",

["u"] = "u", ["ua"] = "ua", ["ue"] = "uɛ", ["uai"] = "uai", ["ui"] = "uei", ["uan"] = "uan", ["un"] = "uən", ["uang"] = "uaŋ",

["ü"] = "y", ["üo"] = "yo", ["üe"] = "ye", ["üan"] = "yan", ["ün"] = "yn", ["iong"] = "yoŋ", }

local toneConv = { ["1"] = "⁵⁵", ["2"] = "²¹", ["3"] = "⁵³", ["4"] = "²¹³", ["-"] = "⁻", }

local initialConv_swz = { ["j"] = "g", ["q"] = "k", ["n"] = "l", ["ny"] = "n", ["h"] = "x", ["w"] = "", ["r"] = "rh", } local finalConv_swz = { ["ir"] = "", ["er"] = "r", ["ong"] = "ung", ["uang"] = "uong", ["ü"] = "y", ["üo"] = "iuo", ["üe"] = "ye", ["üan"] = "uan", ["ün"] = "un", ["iong"] = "yng", }

local function fix(initial, final) -- ju /tɕy/ if find(initial, '^[yjqx]$') and find(final, '^u') then final = gsub(final, '^u', 'ü') end if initial == 'y' then initial = '' if final == 'ou' then final = 'iu' elseif not find(final, '^[iü]') then -- yin /in/, yuan /yan/, ya /ia/ final = 'i' .. final end end -- wei /uei/ (/-uei/ is usually spelled <-ui> but /uei/ is not ) -- wu /vu/ -- wai /uai/ -- wen /uən/ if initial == 'w' then initial = (final == 'u') and 'w' or '' if final == 'ei' then final = 'ui' elseif final == 'en' then final = 'un' elseif final ~= 'u' then final = 'u' .. final end end

-- distinguish the two 'i's	if find(initial, '^[zcsr]$') and final == 'i' then final = 'ir' end

return initial, final end

local function warn(initial, final, tone) if initial == "" and find(final, "^[iu]") then error("Syllables in Sichuanese Pinyin do not begin with i-/u-. Add y-/w-.") end if not initialConv[initial] and initial ~= "y" then error("Invalid initial: " .. initial) end

if not finalConv[final] and final ~= "uo" then error("Invalid final: " .. final) end

if tone == "5" then error("Chengdu does not have the fifth tone anymore. Use 2.") end end

function export.convert(text, scheme) if type(text) == "table" then text, scheme = text.args[1], text.args[2] end

local result = {} for word in gsplit(text, '/') do		local converted = {}

local extra2 = match(word, '^[^A-Za-zü]*') for syllable in gmatch(word, '[A-Za-zü]+[%d%-]+[^A-Za-zü]*') do			local initial, final, erhua, tone, extra = match(syllable, '^([BDGPTKZJCQMNFSXHVRWYbdgptkzjcqmnfsxhvrwy]?[yg]?)([AEOaiueoüng]+)(r?)([%d%-]+)([^A-Za-zü]*)$') local caps = false

if find(initial .. final, '[A-Z]') then caps = true initial, final = lower(initial), lower(final) end

warn(initial, final, tone)

initial, final = fix(initial, final) if final == 'e' and erhua == 'r' then final, erhua = 'er', '' end

if scheme == 'IPA' then initial = initialConv[initial] final = finalConv[final] tone = gsub(tone, '.', function(char) return toneConv[char] end)

if erhua == 'r' then if find(final, '^y') then -- 撮口呼 final = 'yɚ' elseif find(final, '^i') then -- 齊齒呼 final = 'iɚ' elseif find(final, '^u') then -- 合口呼 final = 'uɚ' elseif (final == 'o' or final == 'oŋ') and find(initial, '^[pmfv]') then final = 'ɚ' elseif final == 'o' or final == 'oŋ' then final = 'uɚ' else -- 開口呼 final = 'ɚ' end end

syllable = initial .. final .. tone

table.insert(converted, syllable) elseif scheme == 'SWZ' then initial = initialConv_swz[initial] or initial final = finalConv_swz[final] or final

tone = gsub(tone, '(%d)%-(%d)', '%2')

-- XXX: what happens with erhua? (disabled output for now) -- cf the given example 貓(mer) if erhua == 'r' then return false end

if tone == '3' and (final == 'a' or final == 'ai') then final = 'a' .. final end

syllable = initial .. final

if caps then syllable = gsub(syllable, '^.', upper) end

table.insert(converted, '@' .. syllable .. extra) else error('Convert to what representation?') end end

if scheme == 'IPA' then local text = '/' .. table.concat(converted, ' ') .. '/'			table.insert(result, text) elseif scheme == 'SWZ' then local text = table.concat(converted, '') text = gsub(text, '([a-z])@(u)', '%1w') text = gsub(text, '([a-z])@(i)', '%1j') text = gsub(text, '([ng])@(y)', '%1j') text = gsub(text, '@un', 'wen') text = gsub(text, '@', '') table.insert(result, extra2 .. text) end end

if scheme == 'IPA' then return table.concat(result, ', ') else return table.concat(result, ' / ') end end

return export