Module:jje-pron

local export = {}

local m_data = mw.loadData("Module:jje-pron/data") local m_jje_utilities = require("Module:jje") local m_str_utils = require("Module:string utilities")

-- Created from Module:ko.pron

local codepoint = m_str_utils.codepoint local concat = table.concat local floor = math.floor local gmatch = m_str_utils.gmatch local gsub = m_str_utils.gsub local insert = table.insert local len = m_str_utils.len local match = m_str_utils.match local pattern_escape = m_str_utils.pattern_escape local remove = table.remove local sub = m_str_utils.sub local toNFC = mw.ustring.toNFC local u = m_str_utils.char local upper = m_str_utils.upper

local PAGENAME = mw.loadData("Module:headword/data").pagename

local system_lookup = { ["ph"] = 1, ["rr"] = 2, ["rrr"] = 3, ["yr"] = 4, ["ipa-p"] = 5, ["ipa"] = 6, }

local question_mark = "?"

local system_list = { { 		abbreviation = "ph", display = "Phonetic hangul: ", separator = "/", },	{ 		abbreviation = "rr", display = "Revised Romanization" .. question_mark, separator = "/", },	{ 		abbreviation = "rrr", display = "Revised Romanization (translit.)" .. question_mark, separator = "/" },	{ 		abbreviation = "yr", display = "Yale Romanization" .. question_mark, separator = "/" },	{		abbreviation = "ipa-p", display = "(Morphophonemic) IPA(key): ", separator = " ~ " },	{ 		abbreviation = "ipa", display = "(Jeju City) IPA(key): ", separator = " ~ " } }

--[[

vowel_variation: rules for vowel transformation. key: the number of a syllable's vowel (vowel_id): floor(((codepoint('가') - 0xAC00) % 588) / 28) = 0 floor(((codepoint('개') - 0xAC00) % 588) / 28) = 1 value: an integer that is added to the decimal codepoint of the syllable u(codepoint('개') + 112) = '게'

allowed_vowel_scheme: a list of which systems vowel transformation is reflected in. key: vowel_id .. "-" .. system_index system_index: see system_list above. IPA is #6 value: 1, representing true

]]

local final_syllable_conversion = { [""] = "Ø", ["X"] = "" } local com_mc = { ["g"] = "k", ["d"] = "t", ["b"] = "p", ["j"] = "ch", ["sy"] = "s", ["s"] = "ss" } local com_ph = { ["ᄀ"] = "ᄁ", ["ᄃ"] = "ᄄ", ["ᄇ"] = "ᄈ", ["ᄉ"] = "ᄊ", ["ᄌ"] = "ᄍ" } local vowel_variation = { [1] = 112, -- 개→게

[7] = -56, -- 계→게 } local allowed_vowel_scheme = { ["1-1"] = 1,	["1-6"] = 1,

["7-1"] = 1,	["7-6"] = 1, } local ambiguous_intersyllabic_rr = { ["oe"] = 1, ["eo"] = 1, ["eu"] = 1, ["ae"] = 1, ["ui"] = 1 } local ambiguous_intersyllabic_mr = { ["oe"] = 1, ["ae"] = 1 }

local function decompose_syllable(word) local decomposed_syllables = {} for syllable in mw.text.gsplit(word, "") do		insert(decomposed_syllables, m_jje_utilities.decompose_jamo(syllable)) end return decomposed_syllables end

local function tidy_phonetic(original, romanised) local ori_index, rom_index, result = 1, 1, {} for i = 1, len(romanised) do		local romanised_syllable = sub(romanised, rom_index, rom_index) local original_syllable = sub(original, ori_index, ori_index) if romanised_syllable ~= original_syllable then -- Handle cases of non-precomposed choseong character changing if match(romanised_syllable, "[ᄀ-ᄒ]") then -- Insert choseong insert(result, ..romanised_syllable..) -- Insert jungseong romanised_syllable = sub(romanised, rom_index + 1, rom_index + 1) insert(result, ..romanised_syllable..) -- Check if next character is jongseong; if so, insert, if not end iteration -- and continue accoringly romanised_syllable = sub(romanised, rom_index + 2, rom_index + 2) if match(romanised_syllable, "ᆨ-ᇝ") then insert(w, ..romanised_syllable..) ori_index, rom_index = ori_index + 2, rom_index + 2 if match(original_syllable, "[^ ]") then rom_index = rom_index + 1 end if match(romanised_syllable, "[^ ]") then ori_index = ori_index + 1 end else ori_index, rom_index = ori_index + 1, rom_index + 1 if match(original_syllable, "[^ ]") then rom_index = rom_index + 1 end if match(romanised_syllable, "[^ ]") then ori_index = ori_index + 1 end end -- Handle cases of non-precomposed jongseong character changing elseif match(romanised_syllable, "[ᆨ-ᇝ]") then -- Remove previous non-bold choseong & jungseong remove(result, #result - 1) remove(result, #result) -- Insert choseong character romanised_syllable_cho = sub(romanised, rom_index - 2, rom_index - 2) insert(result, ..romanised_syllable_cho..) -- Insert jungseong character romanised_syllable_jung = sub(romanised, rom_index - 1, rom_index - 1) insert(result, ..romanised_syllable_jung..) -- Insert jongseong character insert(result, ..romanised_syllable..) if match(original_syllable, "[^ ]") then rom_index = rom_index + 1 end if match(romanised_syllable, "[^ ]") then ori_index = ori_index + 1 end -- Handle precomposed characters per typical method else insert(result, ..romanised_syllable..) if match(original_syllable, "[^ ]") then rom_index = rom_index + 1 end if match(romanised_syllable, "[^ ]") then ori_index = ori_index + 1 end end else insert(result, ' '..romanised_syllable..' ') ori_index, rom_index = ori_index + 1, rom_index + 1 end end return concat(result) end

local function tidy_ipa(ipa) ipa = gsub(ipa, "ʌ̹ː", "ɘː") ipa = gsub(ipa, "ɭɭ([ji])", "ʎʎ%1") ipa = gsub(ipa, "s([ʰ͈])ɥi" ,"ʃ%1ɥi") ipa = gsub(ipa, "ss͈([ji])" ,"ɕɕ͈%1") ipa = gsub(ipa, "s([ʰ͈])([ji])" ,"ɕ%1%2") ipa = gsub(ipa, "nj", "ɲj") ipa = gsub(ipa, "([ʑɕ])([ʰ͈]?)j", "%1%2") ipa = gsub(ipa, "kʰ[ijɯ]", { 		["kʰi"] = "kçi", 		["kʰj"] = "kçj", 		["kʰɯ"] = "kxɯ" }	) ipa = gsub(ipa, "[hɦ][ijɯouw]", {		["hi"] = "çi",		["hj"] = "çj",		["hɯ"] = "xɯ",		["ho"] = "ɸʷo",		["hu"] = "ɸʷu",		["hw"] = "ɸw",		["ɦi"] = "ʝi",		["ɦj"] = "ʝj",		["ɦɯ"] = "ɣɯ",		["ɦo"] = "βo",		["ɦu"] = "βu",		["ɦw"] = "βw" }	) if match(ipa, "ɥi") then local midpoint = floor(len(ipa) / 2) ipa = sub(ipa, 1, midpoint) .. gsub(sub(ipa, midpoint+1, -1), "ɥi", "y") end return ipa end

function export.romanise(text_param, system_index, args) if type(text_param) == "table" then args = text_param:getParent.args system_index = args[2] or 2 text_param = args[1] end local p, optional_params = {}, { "nn", "l", "com", "cap", "ni" } for _, pm in ipairs(optional_params) do		p[pm] = { } if args[pm] then for pp in mw.text.gsplit(args[pm], ",") do p[pm][tonumber(pp) or pp] = 1 end end end local vowel_ui_i, vowel_ui_e, no_batchim, batchim_reduce, s_variation, iotation, yeo_reduce = args.ui, args.uie, args.nobc, args.bcred, args.svar, args.iot, args.yeored system_index = system_lookup[system_index] or system_index text_param = gsub(text_param, '["](.)', "%1")	for primitive_word in gmatch(text_param, "[%-ᄀ-ᄒ".."ᅡ-ᆢ".."ᆨ-ᇝ" .. "ퟍ" .. "ᆦힲ" .. " ㄱ-ㆎ가-힣' 􀀀-􏿽]+") do		local the_original = primitive_word		primitive_word = gsub(primitive_word, "", "ß")		local formatting_position, formatting_count = {}, 0		primitive_word = gsub(primitive_word, "([ß􀀀-􏿽])", function(m1, m2)			formatting_position[m1 + formatting_count] = m2 == "ß" and "" or m2			return ""		end)		local has_vowel = {}		for ch in gmatch(primitive_word, ".") do			local jungseong = floor(((codepoint(ch) - 0xAC00) % 588) / 28)			if not match(ch, "[예옛례롄]") and match(ch, "[가-힣]") then has_vowel[jungseong] = true end		end		local word_set = { primitive_word }		local function add_respelling(variable, modification, modification2)			modification2 = modification2 or function(x) return x end			if variable and match(system_index, "[16]") then				variable = tonumber(variable) local pre_length = #word_set for i = 1, pre_length do					local item = mw.text.split(word_set[i], "") item[variable] = modification(item[variable]) item[variable + 1] = modification2(item[variable + 1]) word_set[pre_length + i] = concat(item) end end end add_respelling(vowel_ui_i, function(x) return "이" end) add_respelling(vowel_ui_e, function(x) return "에" end) add_respelling(no_batchim, 			function(x) return u(codepoint(x) - (codepoint(x) - 0xAC00) % 28) end, 			function(y) return u(codepoint(y) + 588) end) add_respelling(s_variation, function(x) return u(codepoint(x) - 12) end) add_respelling(iotation, function(x) return u(codepoint(x) + 56) end) add_respelling(yeo_reduce, function(x) return u(codepoint(x) - 56) end) for vowel_id, vowel_variation_increment in pairs(vowel_variation) do if has_vowel[vowel_id] and allowed_vowel_scheme[vowel_id .. "-" .. system_index] then local pre_length = #word_set for i = 1, pre_length do					local item = mw.text.split(word_set[i], "") for num, it in ipairs(item) do						if floor(((codepoint(it) - 0xAC00) % 588) / 28) == vowel_id then item[num] = u(codepoint(it) + vowel_variation_increment) end end if vowel_id == 11 then insert(word_set, i, concat(item)) else insert(word_set, concat(item)) end end end end local word_set_romanisations = {} for _, respelling in ipairs(word_set) do			local decomposed_syllables = decompose_syllable(respelling) local romanisation = {} local formatting_insert_count = 0 for index = 0, #decomposed_syllables, 1 do				local this_syllable_text = index ~= 0 and sub(respelling, index, index) or "" if this_syllable_text == "-" then -- skip it, it will be handled below else local syllable = decomposed_syllables[index] or { initial = "Ø", vowel = "Ø", final = "X" } local next_index = index local next_syllable_text local saw_hyphen_after = false while true do						next_index = next_index + 1 next_syllable_text = next_index > #decomposed_syllables and "" or sub(respelling, next_index, next_index) if next_syllable_text ~= "-" then break end saw_hyphen_after = true end local next_syllable = decomposed_syllables[next_index] or { initial = "Ø", vowel = "Ø", final = "Ø" } syllable.final = final_syllable_conversion[syllable.final] or syllable.final if system_index == 4 and syllable.vowel == "ᅮ" and match(syllable.initial, "[ᄆᄇᄈᄑ]") then syllable.vowel = "ᅳ" end if match(system_index, "[126]") then if match(syllable.initial, "[ᄌᄍᄎ]") then if syllable.vowel == "ᅣ" then syllable.vowel = "ᅡ" elseif syllable.vowel == "ᅤ" then syllable.vowel = "ᅢ" elseif syllable.vowel == "ᅧ" then syllable.vowel = "ᅥ" elseif syllable.vowel == "ᅨ" then syllable.vowel = "ᅦ" elseif syllable.vowel == "ᅭ" then syllable.vowel = "ᅩ" elseif syllable.vowel == "ᅲ" then syllable.vowel = "ᅮ" elseif syllable.vowel == "ᆢ" then syllable.vowel = "ᆞ" end end end if match(system_index, "[16]") then if syllable.vowel == "ᅴ" and this_syllable_text ~= "의" then syllable.vowel = "ᅵ" end end if match(system_index, "[126]") then if this_syllable_text == "넓" then if match(next_syllable.initial, "[ᄌᄉ]") then syllable.final = "ᆸ" elseif next_syllable.initial == "ᄃ" then if match(next_syllable.vowel, "[^ᅡᅵ]") then syllable.final = "ᆸ" end end end end local vowel = m_data.vowels[syllable.vowel][system_index]

if p.nn[next_index] then next_syllable.initial = "ᄂ" end if p.com[index] and match(system_index, "[16]") then next_syllable.initial = com_ph[next_syllable.initial] or next_syllable.initial end if p.ni[next_index] and system_index ~= 3 then next_syllable.initial = (system_index == 4 and syllable.final == "ᆯ") and "ᄅ" or "ᄂ" end if match(system_index, "[^34]") then if tonumber(batchim_reduce or -1) == index then syllable.final = m_data.boundary[syllable.final .. "-Ø"][1] end if index ~= 0 and this_syllable_text == "밟" then syllable.final = "ᆸ" end if match(syllable.final .. next_syllable.initial .. next_syllable.vowel, "[ᇀᆴ][ᄋᄒ]ᅵ") then syllable.final = "ᆾ" elseif match(syllable.final .. next_syllable.initial .. next_syllable.vowel, "ᆮ이") then syllable.final = "ᆽ" elseif match(syllable.final .. next_syllable.initial .. next_syllable.vowel, "ᆮ히") then syllable.final = "ᆾ" elseif match(syllable.final .. next_syllable.initial .. next_syllable.vowel, "[ퟍ][ㅇㅎ]ㅣ") then syllable.final = "ퟹ" elseif syllable.final .. next_syllable.initial == "ᆺᄋ" and not match(next_syllable_text, "[아앗야어엇에예으은을음읍의이인일임입잇]") then syllable.final = "ᆮ" end end local bound = syllable.final .. "-" .. next_syllable.initial if not m_data.boundary[bound] then require("Module:debug").track("jje-pron/no boundary data") mw.log("No boundary data for " .. bound .. ".") return nil end local junction = m_data.boundary[bound][system_index] if formatting_position[index + formatting_insert_count + 1] and system_index == 2 then junction = gsub(junction, "^.*$", function(matched)							local a, b = match(matched, "^(ng%-?)(.?)$")							if not a or not b then a, b = match(matched, "^(.?%-?)(.*)$") end							return match(syllable.final .. next_syllable.initial, "^Ø?[ᄀ-ᄒ]$")								and formatting_position[index + formatting_insert_count + 1] .. (a or "") .. (b or "")								or (a or "") .. formatting_position[index + formatting_insert_count + 1] .. (b or "") end) formatting_insert_count = formatting_insert_count + 1 end if p.l[index] or (p.l["y"] and index == 1) then -- FIXME, verify this code still works with final/initial cons changes if system_index == 1 then if #junction == 0 then junction = junction .. "ː" else junction = gsub(junction, "^(.)(.?)$", function(a, b)									return match(a, "[ᆨ-ᇂ]") and a .. "ː" .. b or "ː" .. a .. b end) end elseif system_index == 5 then vowel = gsub(vowel, "([aeiou])", "%1̄") elseif system_index == 6 then vowel = vowel .. "ː" end end if (p.l["y"] or p.l[1]) and index == 0 and system_index == 6 and #decomposed_syllables > 1 then vowel = vowel .. "ˈ" end if p.com[index] then -- FIXME, verify this code still works with final/initial cons changes junction = gsub(junction, "(.)$", function(next_letter)							return 								(system_index == 5 and "q" or "") .. 								(system_index == 4 and (com_mc[next_letter..(p.cap["y"] or "")] or com_mc[next_letter] or next_letter) or next_letter) end) end if p.ni[next_index] and system_index == 4 then -- FIXME, verify this code still works with final/initial cons changes junction = gsub(junction, "([nl])$", "%1") end

local final_cons, initial_cons = match(junction, "^(.*);(.*)$") if not final_cons then if system_index == 2 then error("Need a semicolon in the boundary value for " .. bound) end -- FIXME, throw an error for all systems once we've added semicolons everywhere final_cons = junction initial_cons = "" end

if system_index == 2 then insert(romanisation, vowel .. final_cons .. (saw_hyphen_after and "-" or "") .. initial_cons) else insert(romanisation, vowel .. junction) end end end local temp_romanisation = concat(romanisation) if p.cap["y"] and match(system_index, "[^16]") then temp_romanisation = upper(sub(temp_romanisation, 1, 1)) .. sub(temp_romanisation, 2, -1) end if system_index == 1 then temp_romanisation = tidy_phonetic(primitive_word, toNFC(temp_romanisation)) elseif match(system_index, "[23]") then for i = 1, 2 do					temp_romanisation = gsub(temp_romanisation, "(.)…(.)", function(a, b)						return a .. (ambiguous_intersyllabic_rr[a .. b] and "'" or "") .. b end) temp_romanisation = gsub(temp_romanisation, "wo'e", "woe") temp_romanisation = gsub(temp_romanisation, "yo'e", "yoe") temp_romanisation = gsub(temp_romanisation, "we'o", "weo") temp_romanisation = gsub(temp_romanisation, "we'u", "weu") temp_romanisation = gsub(temp_romanisation, "ye'u", "yeu") temp_romanisation = gsub(temp_romanisation, "yu'i", "yui") end

elseif system_index == 5 then temp_romanisation = "⫽" .. temp_romanisation .. "⫽"			elseif system_index == 6 then temp_romanisation = "[" .. temp_romanisation .. "]"			end

insert(word_set_romanisations, temp_romanisation) end

text_param = gsub(			text_param,			pattern_escape(the_original),			concat(word_set_romanisations, system_list[system_index].separator),			1		) end

if system_index == 6 then text_param = tidy_ipa(text_param) end return text_param end

function export.make(frame, scheme) local params = { [1] = { default = PAGENAME, list = true }, ["a"] = {}, ["audio"] = { alias_of = "a" }, ["nn"] = {}, ["l"] = {}, ["com"] = {}, ["cap"] = {}, ["ui"] = {}, ["uie"] = {}, ["nobc"] = {}, ["ni"] = {}, ["bcred"] = {}, ["svar"] = {}, ["iot"] = {}, ["yeored"] = {}, }	local args = require("Module:parameters").process(frame:getParent.args, params) local results = {} for _, text_param in ipairs(args[1]) do		local current_word_dataset = {} for system_index, system in pairs(system_list) do			local romanised = export.romanise(text_param, system_index, args) insert(current_word_dataset, romanised) end insert(results, current_word_dataset) end local output_result = { [1] = {}, [2] = {}, [3] = {}, [4] = {}, [5] = {}, [6] = {} } for _, result in ipairs(results) do		for result_index, value in ipairs(result) do			insert(output_result[result_index], value) end end

local html_ul = mw.html.create( "ul" ) :done local html_li_ipa_p = mw.html.create( "li" ) :wikitext( system_list[5].display ) :tag( "span" ) :addClass( "IPA" ) :wikitext( concat(output_result[5], system_list[5].separator) ) :done :done local html_li_ipa = mw.html.create( "li" ) :wikitext( system_list[6].display ) :tag( "span" ) :addClass( "IPA" ) :wikitext( concat(output_result[6], system_list[6].separator) ) :done :done local html_li_ph = mw.html.create( "li" ) :addClass( "jje-pron__ph" ) :wikitext( system_list[1].display ) :tag( "span" ) :addClass( "Kore" ) :attr( "lang", "jje" ) :wikitext( "[" .. concat(output_result[1], system_list[1].separator) .. "]" ) :done :done

if args.a then html_li_ipa :tag( "ul" ) :tag( "li" ) :wikitext( require("Module:audio").format_audio {						lang = require("Module:languages").getByCode("jje"),						file = args.a == "y" and "Jje-" .. PAGENAME .. ".ogg" or args.a,					}) :done :done :done end

--if args.l then		html_li_ph			:tag( "ul" )				:tag( "li" )					:addClass( "ko-pron__note-vowel-length" )					:wikitext( 'Though still prescribed in Standard Korean, the great majority of speakers (in both Koreas) no longer distinguish vowel length.' )				:done			:done		:done	end-

html_ul :node( html_li_ipa_p ) :node( html_li_ipa ) :node( html_li_ph ) :done

local html_table = mw.html.create( "table" ) :addClass( "jje-pron" ) :addClass( "mw-collapsible" ) :addClass( "mw-collapsed" ) :tag( "tr" ) :tag( "th" ) :attr( "colspan", 2 ) :wikitext( "Romanizations" ) :done :done :done for roman_index = 2, 4 do		html_table :tag( "tr" ) :tag( "th" ) :wikitext( system_list[roman_index].display ) :done :tag( "td" ) :addClass( "IPA" ) :wikitext( concat(output_result[roman_index], system_list[roman_index].separator) ) :done :done :done end return tostring(html_ul) .. tostring(html_table) .. require("Module:TemplateStyles")("Template:jje-IPA/style.css") end

--[[function export.make_hanja(frame, scheme)	local params = {		[1] = { list = true },

["l"] = {}, }

local args = require("Module:parameters").process(frame:getParent.args, params)

local results = { [1] = {},		[6] = {},	}	for _, text_param in ipairs(args[1]) do		for _, system_index in pairs({1, 6}) do			local romanised = export.romanise(text_param, system_index, args) insert(results[system_index], romanised) end end

local html_ul = mw.html.create( "ul" ) :done local html_li_ipa = mw.html.create( "li" ) :wikitext( system_list[6].display ) :tag( "span" ) :addClass( "IPA" ) :wikitext( concat(results[6], system_list[6].separator) ) :done :done local html_li_ph = mw.html.create( "li" ) :addClass( "ko-pron__ph" ) :wikitext( system_list[1].display ) :tag( "span" ) :addClass( "Kore" ) :attr( "lang", "ko" ) :wikitext( "[" .. concat(results[1], system_list[1].separator) .. "]" ) :done :done

if args.l then html_li_ph :tag( "ul" ) :tag( "li" ) :addClass( "ko-pron__note-vowel-length" ) :wikitext( 'Long vowel distinction only applies at the initial position. Most speakers no longer distinguish vowel length at any position.' ) :done :done :done end

html_ul :node( html_li_ipa ) :node( html_li_ph ) :done

return tostring(html_ul) .. require("Module:TemplateStyles")("Template:ko-IPA/style.css") end]]

return export