Module:Hrkt-translit

local concat = table.concat local insert = table.insert local load_data = mw.loadData local toNFD = mw.ustring.toNFD local umatch = mw.ustring.match

local m_ja = require("Module:ja") local kata_to_hira = m_ja.kata_to_hira local normalize_kana = m_ja.normalize_kana

local data_common local glottal = "\1" local disambig = "\2" local cons = "b-df-hj-np-tvxz"

local export = {}

local function get_initial(text) return umatch(text, "(.+)%f[" .. umatch(text, ".$") .. "]") or text end

local function handle_initials(data, d_voicing, d_semivoicing, initials, checked) if not data then return end for k, v in pairs(data) do		if not checked[k] and umatch(v, "^%a+$") then local initial = get_initial(v) if initial:match("^[" .. cons .. "]+$") then initials[initial] = true end local v_initial, sv_initial = d_voicing[initial], d_semivoicing[initial] if v_initial and v_initial:match("^[" .. cons .. "]+$") then initials[v_initial] = true end if sv_initial and sv_initial:match("^[" .. cons .. "]+$") then initials[sv_initial] = true end end checked[k] = true end end

function export.process_data(data, common) local initials, checked, d_voicing, d_semivoicing = {}, {}, data.tr_voicing, data.tr_semivoicing data.initials = initials if not common then data_common = data_common or load_data("Module:Hrkt-translit/data") d_voicing = d_voicing or data_common.tr_voicing d_semivoicing = d_semivoicing or data_common.tr_semivoicing end handle_initials(data.rom, d_voicing, d_semivoicing, initials, checked) if not common then handle_initials(data_common.rom, d_voicing, d_semivoicing, initials, checked) end return data end

local function get_data(lang) data_common = data_common or load_data("Module:Hrkt-translit/data") local function inspect_table(t, ...) for i = 1, select("#", ...) do			if type(t) == "table" then t = t[select(i, ...)] else return nil end end return t	end if lang then local name_data = "Module:Hrkt-translit/data/" .. lang if package.loaders[2](name_data) then local data_lang = load_data(name_data) return function(...) local item_lang, item_common = data_lang[...], data_common[...] for i = 2, select("#", ...) do 					local key = select(i, ...) if type(item_lang) == "table" then item_lang = item_lang[key] else return inspect_table(item_common, select(i, ...)) end if type(item_common) == "table" then item_common = item_common[key] else return inspect_table(item_lang, select(i + 1, ...)) end end if item_lang ~= nil then return item_lang else return item_common end end end end return function(...) return inspect_table(data_common[...], select(2, ...)) end end

local function do_voicing(i_last, result, result_sp, hist, d, key) local text = result[i_last] if not hist and result_sp[i_last] == "historical w" then text = "w" .. text end return text:gsub("^" .. get_initial(text), d(key)) end

function export.tr(text, lang, sc, options) if umatch(text, "[" .. mw.loadData("Module:ja/data/range").kanji .. "]") then require("Module:debug").track("ja/invalid Hrkt") end

options = options or {} local result = {[0] = ""} local result_sp = {} local d = get_data(lang) local function getlast(i_start, predicate_good, predicate_bad) local in_xml = false for i = i_start or #result, 1, -1 do			if in_xml then if result[i] == "<" then in_xml = false end elseif result[i] == ">" then in_xml = true else if (predicate_bad or function(index)					return result_sp[index] == "stop"				end)(i) then break end if (predicate_good or function(index)					return result[index]:len > 0 and result_sp[index] ~= "'"				end)(i) then return i end end end return 0 end -- normalize long vowels and iteration marks text = toNFD(kata_to_hira(normalize_kana(text))) for c in text:gmatch(".[\128-\191]*") do		local rc = options.hist and d("rom_hist", c) or d("rom", c) or c		local rc_sp = d("rom_sp", c)		local i_last = getlast if options.keep_dot and c == "." then rc = "." elseif c:match("%a") then rc_sp = "stop" end local repl_digraph = d("digraph", c, result[i_last]) if repl_digraph then result[i_last], rc = repl_digraph, "" result_sp[i_last], rc_sp = nil, nil end if not options.hist then --はへ if d("flag_hahe", result_sp[i_last]) and (umatch(c, "[-~%.゙゚]") or rc:match("[-~%a" .. glottal .. "]")) then result[i_last] = result_sp[i_last] result_sp[i_last] = nil end if d("flag_hahe", rc_sp) and (options.phonetic or result_sp[getlast(nil, function(i) return result[i]:len > 0 and result_sp[i] ~= "'" or result_sp[i] == "stop" end, function return false end)] == "stop" or result[i_last]:match("[-~%a" .. glottal .. "]")) then rc = rc_sp rc_sp = nil end end if rc:match("%a") and umatch(result[i_last], "^[,%.?!:)”†]$") then --space and punctuations			result[i_last] = result[i_last] .. " "		elseif umatch(rc, "^[(“]$") and result[i_last]:match("%a") then rc = " " .. rc		end if rc_sp == "voiced" then -- voicing result[i_last] = do_voicing(i_last, result, result_sp, options.hist, d, "tr_voicing") elseif rc_sp == "semivoiced" then result[i_last] = do_voicing(i_last, result, result_sp, options.hist, d, "tr_semivoicing") end if rc:match("[" .. cons .. "]+" .. "$") and rc_sp ~= "stop" then rc_sp = "coda" end local r_last = result[i_last] local r_lastlast = r_last:match"^.*(%a%A*)$" --vowel clusters or stop consonants if r_lastlast and r_lastlast:match("[aiueo]") then if rc:match("^%-[yw]") and r_last:match("^[" .. cons .. "yw]") then local rc_first = rc:sub(2, 2) r_last = #r_last > 1 and r_last:sub(1, -2) or r_last if not (rc_first == "y" and d("flag_postalveolarconsonant", r_last)) then r_last = r_last .. rc_first end result[i_last] = r_last rc = rc:sub(3) elseif options.hist and r_last:match("^[" .. cons .. "]") and (				r_lastlast == "i" and rc:sub(1, 1) == "y" or				r_lastlast == "u" and rc:sub(1, 1) == "w"			) then local rc_first = rc:sub(1, 1) r_last = r_last:sub(1, -2) if not (rc_first == "y" and d("flag_postalveolarconsonant", r_last)) then r_last = r_last .. rc_first end result[i_last] = r_last rc = rc:sub(2) elseif rc:match"^%-[yw]?[aiueo]$" then rc = rc:sub(2) if r_lastlast == rc then result[i_last] = r_last .. r_lastlast rc = "" elseif d("flag_specialconsonant", r_last) then result[i_last] = r_last:sub(1, -2) elseif r_lastlast == "i" then result[i_last] = r_last:sub(1, -2) .. "y" elseif r_lastlast:match("[ou]") and rc ~= "u" then result[i_last] = r_last:sub(1, -2) .. "w" elseif #r_last > 1 then result[i_last] = r_last:sub(1, -2) end end end insert(result, rc) result_sp[#result] = rc_sp end if not options.hist then --isolated はへ local i_last = getlast if d("flag_hahe", result_sp[i_last]) and getlast(i_last - 1) == 0 then result[i_last] = result_sp[i_last] end end local has_gem = false for i, v in ipairs(result) do		--gemination if has_gem then local apos, consonant, remainder = v:match("^(" .. glottal .. "*)([" .. cons .. "yw]+)(.*)") if consonant then local init, c_gem = apos .. consonant while true do					c_gem = d("tr_gem", init) if #init == 1 or not init:match("[yw]$") then break end init = init:sub(1, -2) end c_gem = c_gem or init:sub(1, 1) v = consonant .. remainder local i_gem = getlast(i) while true do					i_gem = getlast(i_gem - 1) if result_sp[i_gem] == "gem" then result[i_gem] = c_gem elseif result_sp[i_gem] ~= "allow gem" then i_gem = getlast(i_gem + 1) result[i_gem] = apos .. result[i_gem] break end end has_gem = false end elseif result_sp[i] == "gem" then has_gem = true end -- FIXME: ng/nw should be determined automatically by a disambiguation model. local v_first = v:match("^[aiueoyw]") or v:match("^n[gw]") if v_first then local i_last if v_first == "y" or v_first == "w" or v_first == "ng" or v_first == "nw" then i_last = getlast(i - 1, function(index)					local res, res_sp = result[index], result_sp[index]					return res ~= "" and res ~= "." and res_sp ~= "'" and res_sp ~= "gem"				end, function end) else i_last = getlast(i - 1, nil, function end) end if v_first:sub(1, 1) == "n" then if umatch(result[i_last], "%a") and not (v_first == "nw" and result[i_last]:match("n$")) then v = disambig .. v				end elseif result_sp[i_last] == "coda" then local coda = d("tr_coda_apos", v_first, result[i_last]) if coda == nil or options.hist and coda == "hist" then v = disambig .. v				end end end --Diacritics (long vowels and others). v = v:gsub("[aiueo][aiueo%A]*", d("tr_long")) -- From small kana. local i_last = getlast(i - 1) local r_last = result[i_last] -- From digraphs. if r_last and not (options.hist or options.phonetic or options.no_diacritics) then local r_lastlast = r_last:match"^.*(%a%A*)$" --vowel clusters or stop consonants if r_lastlast and d("tr_long", r_lastlast .. v) and not r_last:match("[aiueo][aiueo]$") then result[i_last] = (r_last .. v):gsub("[aiueo][aiueo%A]*", d("tr_long")) v = "" end end result[i] = v	end local num_cap = 0 for i, v in ipairs(result) do		--uppercase if result_sp[i] == "cap" then num_cap = num_cap + 1 end if num_cap > 0 then result[i] = v:gsub(".[\128-\191]*", function(c)				if num_cap <= 0 then return c end				local uc = c:uupper				if c ~= uc then num_cap = num_cap - 1 end				return uc			end) end end return (concat(result):gsub("[" .. glottal .. disambig .. "]", "'")) end

return export