Module:User:AmazingJus/lo

local export = {}

local find = mw.ustring.find local len = mw.ustring.len local match = mw.ustring.match local gmatch = mw.ustring.gmatch local gsub = mw.ustring.gsub local sub = mw.ustring.sub

-- Mapping of initial consonants. local initial_conv = { ['ກ'] = 'k', ['ຂ'] = 'kh', ['ຄ'] = 'kh', ['ງ'] = 'ng', ['ຈ'] = 'ch', ['ສ'] = 's', ['ຊ'] = 's', ['ຍ'] = 'ny', ['ດ'] = 'd', ['ຕ'] = 't', ['ຖ'] = 'th', ['ທ'] = 'th', ['ນ'] = 'n', ['ບ'] = 'b', ['ປ'] = 'p', ['ຜ'] = 'ph', ['ຝ'] = 'f', ['ພ'] = 'ph', ['ຟ'] = 'f', ['ມ'] = 'm', ['ຢ'] = 'y', ['ຣ'] = 'r', ['ລ'] = 'l', ['ວ'] = 'w', ['ຫ'] = 'h', ['ອ'] = 'ʼ', ['ຮ'] = 'h', ['ຼ'] = 'r',

['ຫງ'] = 'ng', ['ຫຍ'] = 'ny', ['ຫນ'] = 'n', ['ໜ'] = 'n', ['ຫມ'] = 'm', ['ໝ'] = 'm', ['ຫຣ'] = 'r', ['ຫລ'] = 'l', ['ຫຼ'] = 'l', ['ຫວ'] = 'w',

['ກຣ'] = 'kr', ['ກລ'] = 'kl', ['ຂຣ'] = 'khr', ['ຄຣ'] = 'khr', ['ຂລ'] = 'khl', ['ຄລ'] = 'khl', ['ປຣ'] = 'pr', ['ປລ'] = 'pl', ['ພຣ'] = 'phr', ['ຟຣ'] = 'fr', ['ພລ'] = 'phl', ['ຟລ'] = 'fl', ['ດຣ'] = 'dr', ['ຕຣ'] = 'tr' }

-- Mapping of vowel combinations. local vowel_conv = { ['ະ'] = 'a', ['ັ'] = 'a', ['ິ'] = 'i', ['ຶ'] = 'ư', ['ຸ'] = 'u', ['ຸຍ'] = 'ui', ['ເະ'] = 'e', ['ເັ'] = 'e', ['ແະ'] = 'æ', ['ແັ'] = 'æ', ['ໂະ'] = 'o', ['ົ'] = 'o', ['ເາະ'] = 'ǫ', ['ັອ'] = 'ǫ', ['ເິ'] = 'œ', ['ເັຍ'] = 'ia', ['ັຽ'] = 'ia', ['ເຶອ'] = 'ưa', ['ົວະ'] = 'ua', ['ັວ'] = 'ua', ['ວັ'] = 'ua', ['ໄ'] = 'ai', ['ໃ'] = 'ai', ['ັຍ'] = 'ai', ['ເົາ'] = 'ao', ['ົາວ'] = 'uau', ['ຳ'] = 'am', ['ໍາ'] = 'am', ['ວຳ'] = 'uam',

['າ'] = 'ā', ['າວ'] = 'āo', ['ີ'] = 'ī', ['ື'] = 'ư̄', ['ູ'] = 'ū', ['ເ'] = 'ē', ['ແ'] = 'ǣ', ['ໂ'] = 'ō', ['ໂຍ'] = 'ōi', ['ໂຽ'] = 'ōi', ['ໍ'] = 'ǭ', ['ອ'] = 'ǭ', ['ອຍ'] = 'ǭi', ['ອຽ'] = 'ǭi', ['ເີ'] = 'œ̄', ['ເີຽ'] = 'œ̄i', ['ເີຍ'] = 'œ̄i', ['ເຍ'] = 'īa', ['ເັຽ'] = 'īa', ['ຽ'] = 'īa', ['ເືອ'] = 'ư̄a', ['ເືອຍ'] = 'ư̄ai', ['ົວ'] = 'ūa', ['ວ'] = 'ūa', ['ວຍ'] = 'uāi', ['ວຽ'] = 'uāi', ['ວຽນ'] = 'uīan', ['າຍ'] = 'āi', ['າຽ'] = 'āi', ['ວາ'] = 'uā', ['ວາຍ'] = 'uāi', ['ວາຽ'] = 'uāi', ['ແວ'] = 'ǣu', -- ແ_ວ can either be ǣu and uǣ with the first one being more common. ['ີວ'] = 'īu', ['ິວ'] = 'iu', ['ຽວ'] = 'iāu', ['ວີວ'] = 'uīu', }

-- Mapping of coda consonants. local coda_conv = { ['ກ'] = 'k', ['ຂ'] = 'k', ['ຄ'] = 'k', ['ງ'] = 'ng', ['ຈ'] = 't', ['ຊ'] = 't', ['ດ'] = 't', ['ຕ'] = 't', ['ຖ'] = 't', ['ທ'] = 't', ['ສ'] = 's', ['ນ'] = 'n', ['ບ'] = 'p', ['ປ'] = 'p', ['ພ'] = 'p', ['ຟ'] = 'p', ['ມ'] = 'm', ['ຢ'] = 'y', ['ຣ'] = 'n', ['ລ'] = 'n', ['ວ'] = 'w', [] = , }

-- Special symbols. local sp_symbols = { ['ຯ'] = '〃', ['ໆ'] = '〃', ['໌'] = '',	['໐'] = '0', ['໑'] = '1', ['໒'] = '2', ['໓'] = '3', ['໔'] = '4',	['໕'] = '5', ['໖'] = '6', ['໗'] = '7', ['໘'] = '8', ['໙'] = '9' }

-- List of character types. local char_type = { ['ກ'] = 'coda', ['ຂ'] = 'coda', ['ຄ'] = 'coda', ['ງ'] = 'coda', ['ຈ'] = 'coda', ['ຊ'] = 'coda', ['ຍ'] = 'ambig', ['ດ'] = 'coda', ['ຕ'] = 'coda', ['ຖ'] = 'coda', ['ທ'] = 'coda', ['ນ'] = 'coda', ['ບ'] = 'coda', ['ປ'] = 'coda', ['ຜ'] = 'cons', ['ຝ'] = 'cons', ['ພ'] = 'coda', ['ຟ'] = 'coda', ['ມ'] = 'coda', ['ຢ'] = 'coda', ['ຣ'] = 'coda', ['ລ'] = 'coda', ['ວ'] = 'ambig', ['ສ'] = 'coda', ['ຫ'] = 'cons', ['ອ'] = 'ambig', ['ຮ'] = 'cons', ['ໜ'] = 'cons', ['ໝ'] = 'cons', ['ຯ'] = 'iter_symbol', ['ະ'] = 'vowel_let', ['ັ'] = 'suf_vowel', ['າ'] = 'vowel_let', ['ຳ'] = 'suf_vowel', ['ິ'] = 'suf_vowel', ['ີ'] = 'suf_vowel', ['ຶ'] = 'suf_vowel', ['ື'] = 'suf_vowel', ['ຸ'] = 'suf_vowel', ['ູ'] = 'suf_vowel', ['ົ'] = 'suf_vowel', ['ຼ'] = 'glide', ['ຽ'] = 'vowel_let', ['ເ'] = 'pref_vowel', ['ແ'] = 'pref_vowel', ['ໂ'] = 'pref_vowel', ['ໃ'] = 'pref_vowel', ['ໄ'] = 'pref_vowel', ['ໆ'] = 'iter_symbol', ['່'] = 'tone', ['້'] = 'tone', ['໊'] = 'tone', ['໋'] = 'tone', ['໌'] = 'canc_symbol', ['ໍ'] = 'suf_vowel', ['໐'] = 'number', ['໑'] = 'number', ['໒'] = 'number', ['໓'] = 'number', ['໔'] = 'number', ['໕'] = 'number', ['໖'] = 'number', ['໗'] = 'number', ['໘'] = 'number', ['໙'] = 'number' }

-- List of consonant classes local cons_class = { ['ກ'] = 'mid', ['ຂ'] = 'high', ['ຄ'] = 'low', ['ງ'] = 'low', ['ຈ'] = 'mid', ['ສ'] = 'high', ['ຊ'] = 'low', ['ຍ'] = 'low', ['ດ'] = 'mid', ['ຕ'] = 'mid', ['ຖ'] = 'high', ['ທ'] = 'low', ['ນ'] = 'low', ['ບ'] = 'mid', ['ປ'] = 'mid', ['ຜ'] = 'high', ['ຝ'] = 'high', ['ພ'] = 'low', ['ຟ'] = 'low', ['ມ'] = 'low', ['ຢ'] = 'mid', ['ຣ'] = 'low', ['ລ'] = 'low', ['ວ'] = 'low', ['ຫ'] = 'high', ['ອ'] = 'mid', ['ຮ'] = 'low' }

-- Reset the syllable table. local function reset_syllable return { curr = {}, initial = {}, vowel = {}, tone = {}, coda = {} } end

-- Split the entry into individual syllables. function export.split_syll(text) -- (temp function) if type(text) == 'table' then text = text.args[1] end

-- Store split syllables. local syllables = {} local curr_syll = reset_syllable

-- Iterate through Lao characters. for lao_text in gmatch(text, '[ກ-ໝ]+') do		local c, c_types = {}, {}

-- Classify each character in the syllable. for i = 1, len(lao_text) do			c[i] = sub(lao_text, i, i)			c_types[i] = char_type[c[i]] end

-- Parse the entry by identifying each character's type. for i = 1, #c + 1 do			local type_curr, type_next = c_types[i], c_types[i + 1] local curr_vowel_full = table.concat(curr_syll.vowel)

-- Prefix vowels are always the start of a new syllable. if type_curr == 'pref_vowel' or i == #c + 1 then if #curr_syll.curr ~= 0 then table.insert(syllables, table.concat(curr_syll.curr)) curr_syll = reset_syllable end table.insert(curr_syll.vowel, c[i]) table.insert(curr_syll.curr, c[i])

-- Glide consonants always follow the initial consonant. elseif type_curr == 'glide' then table.insert(curr_syll.initial, c[i]) table.insert(curr_syll.curr, c[i])

-- Suffix vowels and vowel letters are always part of the same syllable. elseif type_curr == 'suf_vowel' or type_curr == 'vowel_let' then table.insert(curr_syll.vowel, c[i]) table.insert(curr_syll.curr, c[i])

-- Same with tone marks. elseif type_curr == 'tone' then table.insert(curr_syll.tone, c[i]) table.insert(curr_syll.curr, c[i])

-- Some consonants can be at the end of a syllable. elseif type_curr == 'coda' then if #curr_syll.coda == 0 and initial_conv[table.concat(curr_syll.initial)..c[i]] and (#curr_syll.vowel == 0 or char_type[curr_vowel_full] == 'pref_vowel') then table.insert(curr_syll.initial, c[i]) table.insert(curr_syll.curr, c[i]) elseif #curr_syll.coda == 0 and #curr_syll.initial ~= 0 and (type_next ~= 'glide' and type_next ~= 'suf_vowel' and type_next ~= 'vowel_let' and type_next ~= 'tone') and not (type_next == 'ambig' and match(c_types[i + 2], 'co')) then table.insert(curr_syll.coda, c[i]) table.insert(curr_syll.curr, c[i]) else table.insert(syllables, table.concat(curr_syll.curr)) curr_syll = reset_syllable table.insert(curr_syll.initial, c[i]) table.insert(curr_syll.curr, c[i]) end

-- However, some consonants can only start a syllable. elseif type_curr == 'cons' then if #curr_syll.coda == 0 and initial_conv[table.concat(curr_syll.initial)..c[i]] and (#curr_syll.vowel == 0 or char_type[curr_vowel_full] == 'pref_vowel') then table.insert(curr_syll.initial, c[i]) table.insert(curr_syll.curr, c[i]) else table.insert(syllables, table.concat(curr_syll.curr)) curr_syll = reset_syllable table.insert(curr_syll.initial, c[i]) table.insert(curr_syll.curr, c[i]) end

-- Ambiguous characters can be either the start or end of a syllable. elseif type_curr == 'ambig' then if #curr_syll.initial == 0 or char_type[table.concat(curr_syll.vowel)] == 'pref_vowel' then table.insert(curr_syll.initial, c[i]) table.insert(curr_syll.curr, c[i]) elseif c[i] == 'ຍ' and #curr_syll.vowel == 0 then table.insert(syllables, table.concat(curr_syll.curr)) curr_syll = reset_syllable table.insert(curr_syll.initial, c[i]) table.insert(curr_syll.curr, c[i]) elseif #curr_syll.initial ~= 0 and (#curr_vowel_full == 0 or vowel_conv[curr_vowel_full..c[i]] and				(type_next ~= 'glide' and type_next ~= 'suf_vowel' and type_next ~= 'vowel_let' and type_next ~= 'tone')) then table.insert(curr_syll.vowel, c[i]) table.insert(curr_syll.curr, c[i]) else table.insert(syllables, table.concat(curr_syll.curr)) curr_syll = reset_syllable table.insert(curr_syll.initial, c[i]) table.insert(curr_syll.curr, c[i]) end

-- The iteration symbol should be treated as its own syllable. elseif type_curr == 'iter_symbol' then if #curr_syll.curr ~= 0 then table.insert(syllables, table.concat(curr_syll.curr)) end curr_syll = reset_syllable table.insert(curr_syll.curr, c[i])

-- Similar with the cancel symbol. elseif type_curr == 'canc_symbol' then table.insert(curr_syll.curr, c[i])

-- As well as with numbers. elseif type_curr == 'number' then table.insert(curr_syll.curr, sp_symbols[c[i]]) end end

-- Add syllables to the syllables table. for _, syllable in ipairs(curr_syll.curr) do			table.insert(syllables, syllable) end end

return table.concat(syllables, '-') end

return export