Module:xh-common

local export = {}

local u = mw.ustring.char

local ACUTE    = u(0x0301) local CIRC     = u(0x0302) local SYLL     = u(0x0324)

export.diacritic = ACUTE .. CIRC export.toneless_vowel = "aeiouAEIOU." .. SYLL export.vowel = export.toneless_vowel .. "áéíóúâêîôûÁÉÍÓÚÂÊÎÔÛ" .. export.diacritic

-- Use '.' to denote syllabic m. e.g. um.ntu

function export.split_syllables(word) local syllables = {} for syll in mw.ustring.gmatch(word, "[^" .. export.vowel .. "]*[" .. export.vowel .. "%.]+") do		if mw.ustring.sub(syll, mw.ustring.len(syll)) == '.' then syll = mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1) end table.insert(syllables, syll) end syllables[#syllables] = syllables[#syllables] .. mw.ustring.match(word, "[^" .. export.vowel .. "]*$") return syllables end

function export.apply_tone(word, pattern) word = export.split_syllables(word) pattern = mw.text.split(pattern or mw.ustring.rep("L", #word), "") if #word ~= #pattern then error("The word \"" .. table.concat(word) .. "\" and the tone pattern " .. table.concat(pattern) .. " have different numbers of syllables.") end

for i, tone in ipairs(pattern) do		if tone == "F" then word[i] = mw.ustring.gsub(word[i], "([" .. export.toneless_vowel .. "])", "%1" .. CIRC) elseif tone == "H" then word[i] = mw.ustring.gsub(word[i], "([" .. export.toneless_vowel .. "])", "%1" .. ACUTE) elseif tone ~= "L" then error("Invalid character \"" .. tone .. "\" in tone pattern string.") end end return (mw.ustring.gsub(mw.ustring.toNFC(table.concat(word)), "̩", "")) end

function export.split_tone(word) local syllables = export.split_syllables(word) local tones = "" local stripped = "" for _, syll in ipairs(syllables) do		-- remove any '.' char and convert to NFD syll = mw.ustring.toNFD(mw.ustring.gsub(syll, "%.", "")) local endChar = mw.ustring.sub(syll, mw.ustring.len(syll)) if endChar == ACUTE then tones = tones .. "H" stripped = stripped .. mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1) elseif endChar == CIRC then tones = tones .. "F" stripped = stripped .. mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1) else tones = tones .. "L" stripped = stripped .. syll end end return {mw.ustring.toNFC(stripped), tones} end

return export