Module:mnw-pron

local export = {} local gsub = mw.ustring.gsub local sub = mw.ustring.sub local match = mw.ustring.match

local system_list = { { 1, ["type"] = "phonetic",		["name"] = "IPA" }, { 2, ["type"] = "orthographic",	["name"] = "MLCTS" }, { 3, ["type"] = "orthographic",	["name"] = "ALA-LC" }, { 4, ["type"] = "phonetic",		["name"] = "BGN/PCGN" }, { 5, ["type"] = "phonetic",		["name"] = "Okell" }, }

local initial_table = { ["က"]	=	{ "k", "k", "k", "k", "k" }, ["ခ"]	=	{ "kʰ", "hk", "kh", "hk", "hk" }, ["ခက်"]	=	{ "kʰ", "hk", "kh", "hk", "hk" }, ["ခါက်"]	=	{ "khac", "khac", "khac", "khac", "khac" }, ["ကက်"]	=	{ "kɛk", "kɛk", "kɛk", "kɛk", "kɛk" }, ["ကာက်"]	=               { "kac", "kac", "kac", "kac", "kac" }, ["ကိက်"]	=	{ "koc", "koc", "koc", "koc", "koc" }, ["ကုက်"]	=	{ "kɤk", "kɤk", "kɤk", "kɤk", "kɤk" }, ["ကေက်"]	=	{ "kɔc", "kɔc", "kɔc", "kɔc", "kɔc" }, ["ကောက်"]	=	{ "kòk", "kòk", "kòk", "kòk", "kòk" }, ["ကံက်"]	=	{ "kɔk", "kɔk", "kɛ̀k", "kɛ̀k", "kɛ̀k" }, ["ကအ်"]	=	{ "kɔˀ", "kɔˀ", "kɔˀ", "kɔˀ", "kɔˀ" }, ["ကေအ်"]	=	{ "keˀ", "keˀ", "keˀ", "keˀ", "keˀ" }, ["ကောအ်"]	=	{ "kɒˀ", "kɒˀ", "kɒˀ", "kɒˀ", "kɒˀ" }, ["ကိုအ်"]	=	{ "kɜˀ", "kɜˀ", "kɜˀ", "kɜˀ", "kɜˀ" },

}

local final_table = { [""]	=	{ "a̰", "a.", "a", "a.", "á" }, ["က်"]	=	{ "ɛk", "ɛk", "ɛk‘", "ɛk", "ɛk" },

}

local nucleus_table = { [""]	=	{ "à", "a", "a", "a", "a" }, ["ိ"]	=	{ "ò", "ò", "ò", "ò", "ò" }, }

local indep_letter_table = { ["အာ"]	=	{ "ɛ̀ə", "ɛ̀ə.", "ɛ̀ə", "ɛ̀ə.", "ɛ̀ə" }, ["ဣ"]	=	{ "ḭ", "i.", "i", "i.", "í" }, ["ဣဳ"]	=	{ "ɒə", "ɒə", "ɒə", "ɒə", "ɒə" }, ["ဥ"]	=	{ "ṵ", "u.", "u", "u.", "aoˀ" }, ["ဥူ"]	=	{ "ù", "u", "ū", "u", "u" }, ["ဨ"]	=	{ "ey", "ɛ", "ɛ", "ɛ", "èy" }, ["အဲ"]	=	{ "ɔə", "uə", "ɔ̀ə", "ɔə.", "ùə" }, ["ဩ"]	=	{ "ò", "èə:", "o", "èə:", "ò" }, ["အဴ"]	=	{ "ɤ̀", "ao", "o‘", "ao", "ò" }, ["အံ"]	=	{ "ɔm", "ɔ̀m.", "ɔˀ", "ɔ̀m", "ɔ̀m" }, ["အး"]	=	{ "ɛ̀h", "ah", "ɛ̀h", "ɛ̀h.", "ɛ̀h" }, }

local tone_table = { ["း"] = { "́", ":", "″", ":", "̀" },	["့"] = { "̰", ".", "′", ".", "́" }, }

local repl_string = "([ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝဿ][ျြွှ]*[ံါဲါါဴေး]*)([ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝဿ][့]?[^့်္])" function syllabify(text) text = gsub(text, "('?)([%+%-%*]*)", function(a, b)		if a .. b ~= "" then return a .. " " .. b end		end) text = gsub(text, "([အာဣဣဳဥဥူဨအဲသြအဴအံအး][့း်]?)(.?)(.?)", function(a, b, c)		return (c == "္" and " "..a..b.." "..c or (c == "်" and " "..a..b..c or " "..a.." "..b..c))		end) .. " "	text = gsub(text, "(်း?'?)", "%1 ") text = gsub(text, "([း့])([ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝ]်)", "%2%1") while match(text, repl_string) do		text = gsub(text, repl_string, "%1 %2") end text = gsub(text, "္", ", ") text = gsub(text, " +", " ") text = gsub(text, "^ ?(.*[^ ]) ?$", "%1") text = gsub(text, ", ", " ") text = gsub(text, " ([23])", "%1") return text end

function initial_by_char(initial_string, system_index, ref_table) local initial_set = {} for character in mw.text.gsplit(initial_string, "") do		local temp_initial = ref_table[character] or error("Initial data not found.") table.insert(initial_set, temp_initial[system_index] or temp_initial) end return table.concat(initial_set) end

function generate_respelling(text) text = gsub(text, " ", " 　 ") text = gsub(text, "ါ", "ာ") if match(text, "[က-႟အဴ-ꩻ]") then return text end text = gsub(text, "(%+?)([^%?%+'/\\~aeiou　]*)(/?)([%?'/\\~aeiou]+)", function(voicing_mark, latin_initial, opt_sep, latin_final)		return 			voicing_mark .. 			(reverse_table[latin_initial] or initial_by_char(latin_initial, nil, reverse_table)) .. 			opt_sep ..			reverse_table[latin_final]	end) return text end

function process(initial, final, tone, schwa, system, system_index) if match(initial .. final, "ွှ?[တနပမံ]") and system["type"] == "phonetic" then initial = gsub(initial, "[ွ/]", "") final = "ွ" .. final else initial = gsub(initial, "/", "") end initial_new = system["type"] == "phonetic" and gsub(initial, "%+.", initial_voicing) or initial if indep_letter_table[initial_new] then initial_new = match(initial_new, "[ဨအဴ]") and "-" or "" final = initial .. final end

initial_data = initial_table[initial_new] or initial_table[gsub(initial_new, "[%+%-%*]", "")] or (system["type"] == "orthographic" 			and initial_by_char(initial_new, system_index, initial_table)			or error("Initial data not found.")) initial_value = initial_data[system_index] or initial_data

if match(initial, "^%+") and system_index == 5 then initial_value = initial_table[gsub(initial, "%+", "")][system_index] initial_value = gsub(initial_value, "^([^rwy]+)", " %1 ") end

final_data = final_table[system["type"] .. schwa == "phonetic'" and schwa or final] or (system["type"] == "phonetic" 			and (final_table[final .. "်"] or indep_letter_table[final]) 			or indep_letter_table[final]) or gsub(final, "^([^်]*)([^်])(်?)$", function(first, second, third) 			first_data = nucleus_table[first] or final_table[first] or indep_letter_table[first] or first			second_data = initial_table[second] or second			first = first_data ~= first and first_data[system_index] or first			second = second_data ~= second				and second_data[system_index] .. ((system_index == 3 and third ~= "") and "‘" or "")				or second			return (gsub(first .. second, "([%.:])(.*)", "%2"))			end) final_value = type(final_data) == "table" and final_data[system_index] or final_data final_value = mw.ustring.toNFD(final_value) if tone == "" then tone_value = "" else if system_index ~= 4 then final_value = gsub(final_value, "̀", "") end final_value = gsub(final_value, "[́:%.]", "") if system["type"] .. schwa == "phonetic'" then tone_value = "" else tone_data = tone_table[tone] or error("Tone data not found.") tone_value = tone_data[system_index] end end

if system_index == 1 then final_value = gsub(final_value, "^([aeəɛiɪoɔuʊ])", "%1" .. tone_value) elseif system_index == 5 then final_value = gsub(final_value, "([aeiou])([^aeiou]*)$", "%1" .. tone_value .. "%2") else final_value = final_value .. tone_value end return mw.ustring.toNFC(initial_value .. final_value) end

function remove_wide_space(text) return (gsub(text, "　", "")) end

function concatenate(set, system_index) if system_index == 1 then return remove_wide_space(table.concat(set)) end result_text = remove_wide_space(table.concat(set, " ")) for count = 1, 3 do		result_text = gsub(result_text, "(.) (.)([^ ]?)",			function(previous, next, after_next)				if ambig_intersyl[system_index][previous .. next] 				or ((system_index == 2 or system_index == 4) and (match(previous .. " " .. next, "[ptkgmngy] [aeiou]")					or (match(previous .. next .. after_next, "[aeiou][ptkmn][rwyg]") and not match(after_next, "[aeiou]")))) then						return previous .. "-" .. next .. after_next				else					return previous .. next .. after_next				end			end) end return result_text end

function export.get_romanisation(word, pronunciations, system, system_index, mode) local sentences = {} word = gsub(word, " ", "|") word = syllabify(word) word = gsub(word, "ါ", "ာ") if system["type"] == "phonetic" then word = gsub(word, "ဝ([တနပမံ])", "ဝွ%1") end for phrase in mw.text.gsplit(word, "|", true) do		local temp = {} local syllable = mw.text.split(phrase, " ", true) for syllable_index = 1, #syllable do			syllable[syllable_index] = gsub(syllable[syllable_index], "([း့])(်)", "%2%1") temp[syllable_index] = gsub(				syllable[syllable_index], 				"^([%+%-%*]*[ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝဿအာဣဣဳဥဥူဨအဲသြအဴအံအး][ျြ]?ွ?ှ?/?)([^း့']*)([း့]?)('?)$",				function(initial, final, tone, schwa)					return process(initial, final, tone, schwa, system, system_index)				end) end table.insert(sentences, concatenate(temp, system_index)) end if mode == "translit_module" then return table.concat(sentences, " ") end table.insert(pronunciations[system_index], table.concat(sentences, " ")) return pronunciations[system_index] end

function respelling_format(phonetic, page_title) local page_title_set = mw.text.split(syllabify(page_title), " ") local new_respellings = {} for _, respelling in ipairs(phonetic) do		local respelling_set = mw.text.split(syllabify(respelling), " ") if gsub(table.concat(respelling_set), "[%+%-%*']", "") == (gsub(table.concat(page_title_set), "ါ", "ာ")) then for index, element in ipairs(respelling_set) do				if element ~= page_title_set[index] then respelling_set[index] = ' ' .. element .. ' '				end end end table.insert(new_respellings, table.concat(respelling_set)) end text = table.concat(new_respellings, ", ") text = remove_wide_space(text) text = gsub(text, "[%+%-].", initial_voicing) text = gsub(text, "([ခဂၚဒပဝ]ေ?)ာ", "%1ါ") return text end

function export.generate_tests(word, respelling) respelling, word = generate_respelling(respelling), generate_respelling(word) local pronunciations = { [1] = {},		[2] = {},		[3] = {},		[4] = {},		[5] = {},	}	local p, result = { ["orthographic"] = word, ["phonetic"] = respelling or word }, {} table.sort(system_list, function(first, second) return first[1] < second[1] end) for system_index, system in ipairs(system_list) do		pronunciations[system_index] = export.get_romanisation(p[system["type"]], pronunciations, system, system_index) end for system_index = 1, 5 do		table.insert(result, table.concat(pronunciations[system_index])) end return (gsub(gsub(table.concat(result, " | "), " ", "("), " ", ")")) end

function export.make(frame) local args = frame:getParent.args local page_title = mw.title.getCurrentTitle.text local title = generate_respelling(args["word"] or page_title) local p, result = { ["orthographic"] = { title }, ["phonetic"] = {} }, {} local pronunciations = { [1] = {},		[2] = {},		[3] = {},		[4] = {},		[5] = {},	}

if not args[1] then args = { title } end for index, item in ipairs(args) do		table.insert(p["phonetic"], (item ~= "") and generate_respelling(item) or nil) end table.sort(system_list, function(first, second) return first[1] < second[1] end) for system_index, system in ipairs(system_list) do		for _, word in ipairs(p[system["type"]]) do		 	pronunciations[system_index] = export.get_romanisation(word, pronunciations, system, system_index) end end if title ~= table.concat(args) then table.insert(result, 			"* phonetic" .. (#p["phonetic"] > 1 and "s" or "") .. ": " ..			tostring( mw.html.create( "span" ) :attr( "lang", "mnw" ) :attr( "class", "mnw" ) :wikitext( respelling_format( p["phonetic"], page_title ))) .. "\n" ) end

table.insert(result,		'* IPA' ..		'(key): ' ..		(tostring( mw.html.create( "span" )			:attr( "class", "IPA" )			:wikitext( "/" .. gsub(table.concat(pronunciations[1], "/, /"), "ʔʔ", "ʔ.ʔ") .. "/" ))) ..		'\n* Romanization: ') for system_index = 2, 5 do		table.insert(result, 			(system_index ~= 2 and " • " or "") ..			"" .. system_list[system_index]["name"] .. ": " .. 			table.concat(pronunciations[system_index], "/")) end return table.concat(result) end

return export