Module:urk-common

local export = {}

local find = mw.ustring.find local len = mw.ustring.len local match = mw.ustring.match local sub = mw.ustring.sub

export.syllable_pattern = "([เแโ]?)([กคงจชซฌญดตทนบปพฟมยรลวอฮ]ฺ?)([รล]?)([อาัิีึึืุู]?ว?)([ยะ]?)([กงจดนบวมลฮํ]?)"

-- tokenise an entry into its syllables function export.syllabise(entry, perform_respell) local syllables = {} local idx = 1

while idx <= len(entry) do		-- leave non-thai characters alone if sub(entry, idx, idx) == " " or not match(sub(entry, idx, idx), "[ก-๎]") then table.insert(syllables, sub(entry, idx, idx)) idx = idx + 1 else -- initialise syllabification local v_pref, i, m, v_suf, f_pref, f_suf = match(sub(entry, idx), export.syllable_pattern) local match_length = len(v_pref .. i .. m .. v_suf .. f_pref .. f_suf)

-- prevent initial consonant in the next syllable being -- misinterpreted as the final consonant in the current syllable if idx + match_length <= len(entry) and find(sub(entry, idx + match_length, idx + match_length), "[อาัิีึึืุู]") then f_pref = "" f_suf = "" match_length = len(v_pref .. i .. m .. v_suf) end -- "ะ" can only have "ฮ" as its second segment if f_pref == "ะ" and f_suf ~= "ฮ" then f_suf = "" match_length = len(v_pref .. i .. m .. v_suf .. f_pref) end

-- perform respellings if perform_respell then -- syllables with non-approximant syllable-final have vowel "โ" by default if v_pref == "" and v_suf == "" and find(f_suf, "[กงดนบม]") then v_pref = "โ" -- syllables with syllable-final "ะ" have vowel "ั" by default elseif v_pref == "" and v_suf == "" and f_pref == "ะ" and f_suf == "" then v_suf = "ั" -- syllables with explicitly short vowel have syllable-final "ะ" by default elseif find(v_suf, "[ัิุ]") and f_pref == "" and f_suf == "" then f_pref = "ะ" -- syllables with "ว" and syllable-final actually have vowel "ัว" elseif v_suf == "ว" and f_pref ~= "" then v_suf = "ัว" end end

-- "ว" cannot be part of the vowel if "ั" does not precede it			if match(v_suf, "ว") and v_suf ~= "ัว" then v_suf = sub(v_suf, 1, 1) match_length = len(v_pref .. i .. m .. v_suf) end

-- construct respelt syllable table.insert(syllables, v_pref .. i .. m .. v_suf .. f_pref .. f_suf) idx = idx + match_length end end

return syllables end

return export