Module:och-pron

local export = {} local m_string_utils = require("Module:string utilities")

local codepoint = m_string_utils.codepoint local gsub = m_string_utils.gsub local len = m_string_utils.len local safe_require = require("Module:utilities").safe_require local u = m_string_utils.char

local colour_1 = "#ccece6" local colour_2 = "#E0F3F3" local colour_3 = "#F7FCFB"

local function zh_fmt(text) return text ~= "" and ' ' .. text .. ' ' or "" end

local function insert_pron(reading_temp, text, system, i, return_note, index) reading_temp = gsub(reading_temp, "([̥̊]) ", "%1") local part = mw.text.split(reading_temp, " ") local IPA = gsub(part[1], "^%*", "") table.remove(part, 1) return ((i == 1 and (len(text) == 1 or (index or 1) == 1)) and "/*" or "") .. IPA .. (len(text) == i and "/" or "") .. ((#part > 0 and return_note) and (" " .. table.concat(part, " ")) or "") end

local function pron_table(titlechar, reading, system, reading_index, count, i)	return system == "BS" and { nil, ' .. zh_fmt(titlechar) .. ',			reading_index .. "/" .. count, reading[1], '‹ ''' .. gsub(reading[2], '([XH])', '%1') .. ''' ›',			' ' .. gsub(insert_pron(reading[3], titlechar, "BS", 1, true), "ˤ", "ˁ") .. ' ',			reading[4] }		or { nil, ..zh_fmt(titlechar).., reading_index .. "/" .. count, reading[1], zh_fmt(""..reading[2]..""), zh_fmt(""..reading[3]..""), reading[4], zh_fmt(""..reading[5]..""), ' /*' .. reading[6] .. '/ ',			gsub(gsub(gsub(reading[7], "([一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮯯𰀀-𱍏，]+)", zh_fmt("%1")), "([^>，][^>，][^>，][^>，][^>，][^>，]，)", "%1 "), "([一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮯯𰀀-𱍏])", "%1") } end

local function get_data(system, ch) return safe_require(("Module:zh/data/och-pron-%s/%s"):format(system, ch)) or nil end

function export.ipa(index_text, preview) local titlechar = mw.title.getCurrentTitle.text local reading_index = mw.text.split(index_text, ";") local output_text = {} local systems = { "BS", "ZS" } local indiv_pronunciation = { ["BS"] = {}, ["ZS"] = {} } local rand = ""

local fields = { ["BS"] = { "Baxter–Sagart system 1.1 " .. "(2014)", " Character ", " Reading # ", " Modern Beijing (Pinyin) ", " Middle Chinese ", " Old Chinese ", " English " },		["ZS"] = { "Zhengzhang system (2003)", " Character ", " Reading # ", " No. ", " Phonetic component ", " Rime group ", " Rime subdivision ", " Corresponding MC rime ", " Old Chinese ", " Notes " }	}	for system_seq, system in ipairs(systems) do		for i, cp in ipairs { codepoint(titlechar, 1, -1) } do			local ch = u(cp) local data_module = get_data(system, ch) if data_module then local count = 0 for index, value in ipairs(data_module) do					count = count + 1 end local reading_number = reading_index[i] and (mw.text.split(reading_index[i], ',')[system_seq] or reading_index[i]) or "y" if reading_number == "y" then for reading_index, reading in ipairs(data_module) do						table.insert(indiv_pronunciation[system], pron_table(ch, reading, system, reading_index, count, i)) end elseif reading_number == "n" then break else for indiv_number in mw.text.gsplit(reading_number, '%+') do						table.insert(indiv_pronunciation[system], pron_table(ch, data_module[tonumber(indiv_number)], system, indiv_number, count, i)) end end end end if indiv_pronunciation[system][1] then local hash, results = {}, {} local value_eff for _, value in ipairs(indiv_pronunciation[system]) do				local valeur = value table.remove(valeur, 1) value_eff = table.concat(valeur) if not hash[value_eff] then hash[value_eff] = true table.insert(value, 1, nil) results[#results + 1] = value end end rand = rand ~= "" and rand or gsub("oc-" .. value_eff, "[^A-Za-z0-9]", codepoint) local fmt = { header = '\n{| class="wikitable mw-collapsible mw-collapsed" id="mw-customcollapsible-oc' .. rand ..					'" style="width:100%; margin:0; text-align:center; border-collapse: collapse; border-style: hidden; display: table;"', lv1 = '\n|-\n! style="background-color:' .. colour_1 .. '" colspan=' .. #results+1 .. '|',				lv2 = '\n|-\n! style="background-color:' .. colour_2 .. '; width:8em"|', lv3 = '\n| style="background-color:' .. colour_3 .. '"|', closing = '\n|}', BS_note = '\n|-\n|colspan=' .. #results+1 .. [=[ style="text-align:left; font-size:90%"| Notes for Old Chinese notations in the Baxter–Sagart system: * Parentheses "" indicate uncertain presence; * Square brackets "[]" indicate uncertain identity, e.g. *[t] as coda may in fact be *-t or *-p; * Angle brackets "&lt;>" indicate infix; * Hyphen "-" indicates morpheme boundary; * Period "." indicates syllable boundary. ]=]			}			table.insert(output_text, fmt.header) for field_index, field in ipairs(fields[system]) do				if field:find("small") then local field_set = {} for _, result in ipairs(results) do						table.insert(field_set, result[field_index]) end if table.concat(field_set) ~= "" then table.insert(output_text, fmt.lv2 .. field .. fmt.lv3 .. table.concat(field_set, fmt.lv3)) end else table.insert(output_text, fmt.lv1 .. field) end end table.insert(output_text, (system == "BS" and fmt.BS_note or "") .. fmt.closing) end end local fold = '\n* ' .. 'Old Chinese▼ ' .. preview .. ' '	return output_text[1] and fold .. gsub(table.concat(output_text), "%[%[%]%]", "") or "" end

function export.retrieve_pron(text, reconstruction, no_intro, index) if type(text) == "table" then text = text.args[1] end text = require("Module:links").remove_links(text) local retrieve_result = {} local intro = no_intro and "" or "OC " if not reconstruction then local index_set if index and index ~= "y" then index_set = mw.text.split(index, ",") end for char_index, cp in ipairs { codepoint(text, 1, -1) } do			local char_pronunciation = {} local ch = u(cp) local data_module = get_data("ZS", ch) if data_module then local reading_no = index_set and index_set[char_index] or "y" if reading_no ~= "y" then for number in mw.text.gsplit(reading_no, "+") do						table.insert(char_pronunciation, data_module[tonumber(number)][6]) end else for _, reading in ipairs(data_module) do						table.insert(char_pronunciation, reading[6]) end end else return nil end table.insert(retrieve_result, table.concat(char_pronunciation, len(text) == 1 and ", *" or "/")) end end return intro .. (reconstruction or "*" .. table.concat(retrieve_result, " ")) end

function export.generate_show(text, index) local index_set = mw.text.split(index, ";") local extract_results = { ["BS"] = {}, ["ZS"] = {} } local result = {} local position = { ["BS"] = 3, ["ZS"] = 6 } local fmt = { beginning = { ["BS"] = "\n*: (Baxter–Sagart) : " .. ' ',			["ZS"] = "\n*: (Zhengzhang) : " .. ' ',		},		ending = { ["BS"] = " ", ["ZS"] = " " }	}	for system_seq, system in ipairs({ "BS", "ZS" }) do		for i, cp in ipairs { codepoint(text, 1, -1) } do			local char_pronunciation = {} local ch = u(cp) local data_module = get_data(system, ch) if data_module then local existing_pron = {} local reading_number = index_set[i] and (mw.text.split(index_set[i], ',')[system_seq] or index_set[i]) or "y" if reading_number == "y" then index = 0 for _, reading in ipairs(data_module) do						index = index + 1 local reading_temp = reading[position[system]] if not existing_pron[reading_temp] then table.insert(char_pronunciation, insert_pron(reading_temp, text, system, i, false, index)) existing_pron[reading_temp] = true end end elseif reading_number == "n" then break else index = 0 for indiv_number in mw.text.gsplit(reading_number, '%+') do						index = index + 1 local reading_temp = data_module[tonumber(indiv_number)][position[system]] if not existing_pron[reading_temp] then table.insert(char_pronunciation, insert_pron(reading_temp, text, system, i, false, index)) existing_pron[reading_temp] = true end end end table.insert(extract_results[system], table.concat(char_pronunciation, len(text) == 1 and ", " or "｜")) else extract_results[system] = {} break end end if extract_results[system][1] then table.insert(result, fmt.beginning[system] .. 				' ' .. 				table.concat(extract_results[system], " ") .. 				 ' ' .. 				 fmt.ending[system]) end end text = result[1] and gsub(table.concat(result), "｜%*", "｜") or nil text = text and gsub(text, "/｜", "｜") or nil text = text and gsub(text, "｜", ' | ') return text end

function export.link(frame, arg) local args = arg or frame:getParent.args local text, meaning, lit = args[1], args[2] or args['gloss'] or nil, args['lit'] or nil return require("Module:zh/link").link(frame, nil, { "*" .. text, tr = export.retrieve_pron(text, args["tr"] or false, args["no_intro"] or false, args["id"] or false), gloss = meaning, lit = lit }, mw.title.getCurrentTitle.subpageText) end

return export