Module:ja-pron

local m_str_utils = require("Module:string utilities")

local concat = table.concat local gsplit = m_str_utils.gsplit local gsub = m_str_utils.gsub local insert = table.insert local len = m_str_utils.len local match = m_str_utils.match local split = m_str_utils.split local sub = m_str_utils.sub local toNFC = mw.ustring.toNFC

local lang = require("Module:languages").getByCode("ja") local kana_to_romaji = require("Module:Hrkt-translit").tr local m_accent = require("Module:accent qualifier") -- also Module:qualifier local PAGENAME = mw.loadData("Module:headword/data").pagename

local range = mw.loadData("Module:ja/data/range") local a_kana = range.vowels.a local i_kana = range.vowels.i local u_kana = range.vowels.u local e_kana = range.vowels.e local o_kana = range.vowels.o local n_kana = range.vowels.n local submoraic_kana = range.submoraic_kana

local export = {}

local ref_template_name_data = { ["DJR"] = "R:Daijirin", ["DJR4"] = "R:Daijirin4", ["DJS"] = "R:Daijisen", ["KDJ"] = "R:Kokugo Dai Jiten", ["NHK"] = "R:NHK Hatsuon", ["NKD2"] = "R:Nihon Kokugo Daijiten 2 Online", ["SMK2"] = "R:Shinmeikai2", ["SMK5"] = "R:Shinmeikai5", ["SMK7"] = "R:Shinmeikai7", ["ZAJ"] = "R:Zenkoku Akusento Jiten", ["JEL"] = "R:Kenkyusha JEL Pocket", ["JAC"] = "R:ja:JAccent", }

local function add_acc_refs(text) local output = {} for ref_name in gsplit(text, ",") do		mw.log(ref_name) local ref_template_name = ref_template_name_data[ref_name] if ref_template_name then insert(output, mw.getCurrentFrame:extensionTag{				name = "ref",				args = { name = ref_name },				content = "",			}) elseif match(ref_name, "ref") then insert(output, mw.getCurrentFrame:preprocess(ref_name)) else -- Special:WhatLinksHere/Wiktionary:Tracking/ja-pron/unrecognized ref require("Module:debug").track("ja-pron/unrecognized ref") end end return concat(output) end

function export.show(frame) local params = { [1] = {default = PAGENAME, list = true}, ["accent"] = {list = true}, ["accent\1_loc"] = {list = true}, ["accent\1_ref"] = {list = true, allow_holes = true}, ["accent\1_note"] = {list = true, allow_holes = true}, ["acc"] = {alias_of = "accent", list = true}, ["acc\1_loc"] = {alias_of = "accent\1_loc", list = true}, ["acc\1_ref"] = {alias_of = "accent\1_ref", list = true}, ["acc\1_note"] = {alias_of = "accent\1_note", list = true}, ["dev"] = {}, ["dev2"] = {}, ["devm"] = {}, ["a"] = {alias_of = "audio"}, ["audio"] = {} }	local args = require("Module:parameters").process(frame:getParent.args, params) local au = args.audio local dev = args.dev or args.devm local dev2 = args.dev2 local maxindex = table.getn(args[1]) local html_list_main = mw.html.create("ul") -- Deals with the accents local a, al, ar, an = args.accent, args.accent_loc, args.accent_ref, args.accent_note local no_acc = true for i, position in ipairs(a) do		local result no_acc = false

local text = args[1][math.min(maxindex,i)] if not al[i] then al[i] = "Tokyo" end result = m_accent.format_qualifiers(lang, {al[i]}) .. " "		result = result .. export.accent(text, position, dev, dev2) if ar[i] then result = result .. add_acc_refs(ar[i]) else require("Module:debug").track("ja-pron/unsourced accent") end result = result .. (an[i] and (" " .. an[i]) or "")

html_list_main:tag("li"):wikitext(			result		) end if no_acc then -- track when entries have no pitch information require("Module:debug").track("ja-pron/no accent") end -- Deals with the IPA local m_IPA = require("Module:IPA") for _, text in ipairs(args[1]) do		local sortkey = (lang:makeSortKey(text)) html_list_main:tag("li"):wikitext(			m_IPA.format_IPA_full {				lang = lang,				items = ,				sort_key = sortkey,			}		) end -- Deals with the audio if au then html_list_main:tag("li"):wikitext(			require("Module:audio").format_audio {				lang = lang,				file = au,				sort = (lang:makeSortKey(args[1][1]))			}		) end return "\n" .. tostring(html_list_main) end

function export.ipa(text, dev, dev2) if type(text) == "table" then text, dev, dev2 = text.args[1], text.args["dev"], text.args["dev2"] end dev = dev or "" dev2 = dev2 or "" if dev2 ~= "" then error("Please remove parameter dev2 and change parameter dev to \"dev=" .. dev .. "," .. dev2 .. "\"") end -- Convert 〜 and 〰 to a regular ー. text = gsub(text, "[〜〰]", "ー") local position_mora = {} for i = 1, len(text) do if not match(sub(text, i, i), "[ " .. submoraic_kana .. "%.]") then local nxt = sub(text, i + 1,i + 1) if nxt and match(nxt, "[" .. submoraic_kana .. "]") then insert(position_mora, i + 1) else insert(position_mora, i)			end end end -- insert @ to stand for devoicing if dev ~= "" then for position in gsplit(dev, ",") do			position = tonumber(position) if #position_mora == position then text = text .. "@"			else local position_devspace = position_mora[position] text = sub(text, 1, position_devspace) .. "@" .. sub(text, position_devspace+1, -1) end for i = position + 1, #position_mora do				position_mora[i] = position_mora[i] + 1 end end end text = toNFC(kana_to_romaji(text, "ja", nil, {keep_dot = true, disambig = true})) text = gsub(text, "[lv'@]", {		["l"] = "r", ["v"] = "b", ["'"] = "ʔ", ["@"] = "̥"	}) -- Hyphens which have been geminated over are removed; otherwise converted to dots. text = gsub(text, "([bcdfghjkmnprstvw])%-%1", "%1%1") :gsub("-", ".") text = text:gsub("([kprt])(%s*)%1", "%1̚%2%1") :gsub("t(%s*)ch", "t̚%1ch") :gsub("([bd])(%s*)%1", "%1̚%2%1̥") :gsub("g(%s*)g", "g̚%1g̊") :gsub("([jz])(%s*)%1", "d̚%2%1") :gsub("s(%s*)sh", "ɕ%1ɕ") text = gsub(text, "ei", "ē") text = gsub(text, "[āēīōūfvjryz]", {		["ā"] = "aː", ["ē"] = "eː", ["ī"] = "iː", ["ō"] = "oː", ["ū"] = "uː", 		["f"] = "ɸ", ["j"] = "d͡ʑ", ["r"] = "ɾ", ["y"] = "j", ["z"] = "d͡z" }) text = gsub(text, "[sct][hs]", {		["sh"] = "ɕ", 		["ch"] = "t͡ɕ", 		["ts"] = "t͡s" }) text = gsub(text, "([aeiouː̥])d͡([zʑ])", "%1%2")

text = gsub(text, "([bdɸgkmnpɾstz][̥̊]*)i", "%1ʲi") text = gsub(text, "([bdɸgkmnpɾstwz][̥̊]*)j", "%1ʲ") text = gsub(text, "([bɕdɸghjkmpɾstzʑʲ][̥̊]*)w", "%1ᵝ") text = gsub(text, "nʲ", "ɲ̟") text = gsub(text, "n+$", function(n)		return ("ɴ"):rep(#n)	end) text = gsub(text, "([^ ː_nɴ])(ː*)ɴ", "%1̃%2ɴ") text = gsub(text, "([^ ː_n])(ː*)n([^aeou%s])", "%1̃%2n%3") text = gsub(text, "n[n ]*[bmp]", function(m)		return m:gsub("n", "m")	end) text = gsub(text, "n[n ]*.͡[ɕʑ]", function(m)		return m:gsub("n", "ɲ̟")	end) text = gsub(text, "n[n ]*ɲ̟", function(m)		return m:gsub("n", "ɲ̟")	end) text = gsub(text, "n[n ]*ɾ", function(m)		return m:gsub("n", "n̺")	end) text = gsub(text, "_ng", "ŋ") text = gsub(text, "(n[n ]*)([kg])([ʲᵝ]*)", function(m1, m2, m3)		return m1:gsub("n", "ŋ" .. m3) .. m2 .. m3	end) text = gsub(text, "_nw", "nᵝ") text = gsub(text, "n[n ]*[ɸszɕhjw]", function(m)		return m:gsub("n", "ɰ̃")	end) text = gsub(text, "([n ]*n)[ʔ_]", function(m) -- ʔ to be removed once Hrkt-translit `disambig` flag is implemented, as ʔ will always represent a glottal		return m:gsub("n", "ɰ̃")	end) text = gsub(text, "n[n ]* [aeiou]", function(m)		return m:gsub("n", "ɰ̃")	end) text = gsub(text, "h[iju]", {		["hi"] = "çi", ["hj"] = "ç",		["hu"] = "ɸu"	}) text = gsub(text, "h([çɸ])", "%1%1") for _, cons in ipairs{"ç", "ɕ", "ɸ", "h", "j", "m", "n", "ɴ", "ŋ", "ɾ", "s", "w", "z", "ʑ"} do text = gsub(text, "(" .. cons .. ")" .. cons .. "+", function(cons, i, j)			return cons .. ("ː"):rep(j - i)		end) end for _, cons in ipairs{"n̺", "nᵝ", "ɲ̟", "ŋʲ", "ŋᵝ", "ɰ̃"} do		local char1, char2 = cons:match("(.[\128-\191]*)(.*)") text = gsub(text, "(" .. cons .. ")" .. char1 .. "[" .. char1 .. char2 .. "]*" .. char2 .. "", function(cons, i, j)			return cons .. ("ː"):rep((j - i) / 2)		end) end text = gsub(text, "(ː+)([ʲᵝ]+)", "%2%1") text = gsub(text, "̚(.[̥̊]*)([ʲᵝ]+)", "̚%2%1%2") text = gsub(text, "[aeiouw]", {		["a"] = "a̠", 		["e"] = "e̞", 		["o"] = "o̞", 		["u"] = "ɯ̟", 		["w"] = "β̞"	}) text = gsub(text, "([szɕʑɲçʲ][̟̥̊]*ː*)ɯ̟", "%1ɨ") text = gsub(text, "̠[̥̃][̥̃]", "̥̃˗") text = gsub(text, "̞[̥̃][̥̃]", "̥̃˕") text = gsub(text, "̟[̥̃][̥̃]", "̥̃˖") text = gsub(text, "([̠̞̟])̥", "%1̊") :gsub("[%._]", "") :gsub("g", "ɡ") return text end

function export.rise_and_fall(word, rftype) word = gsub(word, "([" .. o_kana .. "][゙゚]?)([うウ])", "%1.%2") word = gsub(word, "([" .. e_kana .. "][゙゚]?)([いイ])", "%1.%2") word = kana_to_romaji(word, "ja")

if rftype == "rise" then word = gsub(word, ".", {			["a"] = "á", ["e"] = "é", ["i"] = "í", ["o"] = "ó", ["u"] = "ú", 			["ā"] = "áá", ["ē"] = "éé", ["ī"] = "íí", ["ō"] = "óó", ["ū"] = "úú" }) word = gsub(gsub(word, "n([bcdfghjkmnprstvw%'z ])", "ń%1"), "n$", "ń") elseif rftype == "fall" then word = gsub(word, ".", {			["a"] = "à", ["e"] = "è", ["i"] = "ì", ["o"] = "ò", ["u"] = "ù", 			["ā"] = "àà", ["ē"] = "èè", ["ī"] = "ìì", ["ō"] = "òò", ["ū"] = "ùù" }) word = gsub(gsub(word, "n([bcdfghjkmnprstvw%'z ])", "ǹ%1"), "n$", "ǹ") else return error("Type not recognised.") end return word end

-- Module:ja-ojad and Module:ja-infl-demo rely on the output format of this function function export.accent(text, class, dev, dev2) local result

if(type(text)) == "table" then text, class, dev, dev2 = text.args[1], text.args[2], text.args["dev"], text.args["dev2"] end text = gsub(text, "([" .. o_kana .. "][゙゚]?)[うウ]", "%1ー") text = gsub(text, "([" .. e_kana .. "][゙゚]?)[いイ]", "%1ー") text = gsub(text, "%.", "") if dev == "" then dev = false end if dev2 == "" then dev2 = false end local down_first = "" local down_last = " " local high_first = "" local start = "" local romaji_start = "  [" local romaji_last = "] " local last = " " local position_kana = {}      --position of each kana (ぁ counted), text without space local position_mora = {}      --position of each mora (ぁ not counted), text without space local position_mora_space = {} --position of each mora (ぁ not counted), text with space for i=1, len(text) do if not match(sub(text,i,i), "[ " .. submoraic_kana .. "]") then local extra = len(match(sub(text,i+1), "^[" .. submoraic_kana .. "]*"))			insert(position_mora_space, i+extra) end end local space_removed = gsub(text," ","") for i=1, len(space_removed) do		insert(position_kana, i) if not match(sub(space_removed,i,i), "[" .. submoraic_kana .. "]") then local extra = len(match(sub(space_removed,i+1), "^[" .. submoraic_kana .. "]*"))			insert(position_mora, i+extra) end end local acc_type, acc_number if match(class, "^[h0]$") then acc_type, acc_number = "h", 0 elseif match(class, "^[a1]$") then acc_type, acc_number = "a", 1 elseif match(class, "^o$") then acc_type = "o" acc_number = len(gsub(text, "[ " .. submoraic_kana .. "]", ""))	end if match(class, "^[0-9]+$") and not match(class,"^[01]$") then class = gsub(class, "[on]", "") acc_number = tonumber(class) local morae_count = len(gsub(text, "[ " .. submoraic_kana .. "]", ""))		if morae_count == acc_number then acc_type = "o" elseif morae_count < acc_number then return error(("Mora count (%d) is smaller than position of downstep mora (%d).")				:format(morae_count, acc_number)) else acc_type = "n" end elseif not acc_number then acc_number = class end local start_index = 1 while match(sub(text, start_index+1, start_index+1), "[" .. submoraic_kana .. "]") do		start_index = start_index + 1 end local kanas = {} local single_mora for i=1, len(text) do if not match(sub(text,i,i), "[ " .. submoraic_kana .. "]") then single_mora = gsub(sub(text, i, -1), "^(.[" .. submoraic_kana .. "]*).*", "%1")			insert(kanas, single_mora) end end local function kana_devoice(text) return "" .. text .. " "	end if dev then for position in gsplit(dev, ",") do			position = tonumber(position) kanas[position] = kana_devoice(kanas[position]) end end local romaji_text = gsub(text, "([" .. o_kana .. "][゙゚]?)ー", "%1お") romaji_text = gsub(romaji_text, "([" .. e_kana .. "][゙゚]?)ー", "%1え") romaji_text = gsub(romaji_text, "([" .. u_kana .. "][゙゚]?)ー", "%1う") romaji_text = gsub(romaji_text, "([" .. i_kana .. "][゙゚]?)ー", "%1い") romaji_text = gsub(romaji_text, "([" .. a_kana .. "][゙゚]?)ー", "%1あ") romaji_text = gsub(romaji_text, "([" .. n_kana .. "][゙゚]?)ー", "%1%1") local romajis = split(romaji_text, "") local function count_nspaces(text, index) local i, sample, nspaces = 0, "", 0 while len(sample) < index do			i = i + 1 sample, nspaces = gsub(sub(text, 1, i), " ", "") end return nspaces end local function romaji_devoice(text) -- use @ instead of ̥ return text .. "@"	end if dev then for position in gsplit(dev,",") do			position = position_mora_space[tonumber(position)] romajis[position] = romaji_devoice(romajis[position]) end end if acc_type == "n" then local r_start_index = start_index + count_nspaces(romaji_text, start_index) local r_index = position_mora_space[acc_number] local k_index = acc_number local r_parts = { [1] = concat(romajis, "", 1, r_start_index), [2] = concat(romajis, "", r_start_index + 1, r_index), [3] = concat(romajis, "", r_index + 1, #romajis) }		local k_parts = { [1] = concat(kanas, "", 1, 1), [2] = concat(kanas, "", 2, k_index), [3] = concat(kanas, "", k_index + 1, #kanas) }		local space2 = "" local space3 = "" if sub(r_parts[2], 1, 1) == " " then space2 = " " end if sub(r_parts[3], 1, 1) == " " then space3 = " " end result = start .. k_parts[1] .. down_first .. k_parts[2] .. down_last .. k_parts[3] .. last .. romaji_start .. export.rise_and_fall(r_parts[1], "fall") .. space2 .. export.rise_and_fall(r_parts[2], "rise") .. "ꜜ" .. space3 .. export.rise_and_fall(r_parts[3], "fall") .. romaji_last .. "(Nakadaka – [" .. acc_number .. "])" else local r_start_index = start_index + count_nspaces(romaji_text, start_index) local r_parts = { [1] = concat(romajis, "", 1, r_start_index), [2] = concat(romajis, "", r_start_index + 1, #romajis) }		local k_parts = { [1] = concat(kanas, "", 1, 1), [2] = concat(kanas, "", 2, #kanas) }		local space2 = "" if sub(r_parts[2], 1, 1) == " " then space2 = " " end if acc_type == "h" then result = start .. k_parts[1] .. high_first .. k_parts[2] .. last .. last .. romaji_start .. export.rise_and_fall(r_parts[1], "fall") .. space2 .. export.rise_and_fall(r_parts[2], "rise") .. romaji_last .. "(Heiban – [" .. acc_number .. "])" elseif acc_type == "a" then result = start .. down_first .. k_parts[1] .. down_last .. k_parts[2] .. last .. romaji_start .. export.rise_and_fall(r_parts[1], "rise") .. "ꜜ" .. space2 .. export.rise_and_fall(r_parts[2], "fall") .. romaji_last .. "(Atamadaka – [" .. acc_number .. "])" elseif acc_type == "o" then result = start .. k_parts[1] .. down_first .. k_parts[2] .. down_last .. last .. romaji_start .. export.rise_and_fall(r_parts[1], "fall") .. space2 .. export.rise_and_fall(r_parts[2], "rise") .. "ꜜ" .. romaji_last .. "(Odaka – [" .. acc_number .. "])" else return error("Accent type not recognised.") end end result = gsub(result, "(.)@", " %1 ") return result end

return export