Module:zh-translit

local m_str_utils = require("Module:string utilities") local m_utils = require("Module:utilities")

local findTemplates = require("Module:template parser").findTemplates local get_section = m_utils.get_section local gsub = string.gsub local insert = table.insert local safe_require = m_utils.safe_require local sub = string.sub local toNFD = mw.ustring.toNFD local trim = mw.text.trim local ugsub = m_str_utils.gsub local ulen = m_str_utils.len local ulower = m_str_utils.lower local usub = m_str_utils.sub local uupper = m_str_utils.upper

local frame = mw.getCurrentFrame local tag

local lect_code = mw.loadData("Module:zh/data/lect codes").langcode_to_abbr

local export = {}

local function fail(lang, request) require("Module:debug/track")("zh-translit/needs manual translit/" .. lang) return nil end

local function get_content(title) local content = mw.title.new(title) if not content then return false end return get_section(content:getContent, "Chinese", 2) end

local function get_reading(readings, lang, i, i_end, start) if i == i_end then return sub(readings, start, i - 1) end local c = sub(readings, i, i)	if c == "," and (		lang == "cmn" or		lang == "csp" or		lang == "wuu" or		lang == "yue" or		lang == "zhx-tai"	) then if sub(readings, i + 1, i + 1) ~= " " then return sub(readings, start, i - 1) end elseif c == "/" then return sub(readings, start, i - 1) end end

local function handle_readings(readings, lang, tr) if lang == "ltc" or lang == "och" then if tr and readings ~= tr then return false end return readings end local tr_orig, i, start, i_end, reading = tr, 1, 1, #readings + 1 while i <= i_end do		reading = get_reading(readings, lang, i, i_end, start) if not reading then elseif not reading:find("=") then if (				not tr or				tr == reading or				gsub(ulower(tr), "%^", "") == reading			) then tr = reading elseif ulower(reading) ~= tr then return false end start = i + 1 elseif lang == "cmn" and reading == "cap=y" then local tr_cap = "^" .. tr			if not tr_orig or tr_orig == tr_cap then tr = tr_cap end end i = i + 1 end return tr end

local function iterate_content(content, lang, see, seen, tr) for template, args in findTemplates(content) do		if template == "zh-pron" then for k, v in pairs(args) do				if (					#v > 0 and					type(k) == "string" and					frame:preprocess(k) == lect_code[lang]				) then tr = handle_readings(frame:preprocess(v), lang, tr) break end end if tr == false then return tr			end elseif template == "zh-see" then local arg = trim(frame:preprocess(args[1])) if not seen[arg] then insert(see, arg) end end end return tr end

function export.tr(text, lang, sc) if (not text) or text == "" then return text end if lang == "zh" or lang == "lzh" then lang = "cmn" end if not lect_code[lang] then lang = require("Module:languages").getByCode(lang, nil, true):getFullCode end local content = get_content(text) if not content then return fail(lang) end local see = {} local seen = { [text] = true }	local tr = iterate_content(content, lang, see, seen) if tr == nil then local i, title = 1 while i <= #see do			title = see[i] content = get_content(title) if content then tr = iterate_content(content, lang, see, seen, tr) if tr == false then return fail(lang) end seen[title] = true end i = i + 1 end end if not tr then return fail(lang) end if lang == "cmn" then tr = tr:gsub("#", "") if tr:match("[\194-\244]") then tag = tag or mw.loadData("Module:zh/data/cmn-tag").MT			tr = tr:gsub(".[\128-\191]*", function(m)				if m == "一" then					return "yī"				elseif m == "不" then					return "bù"				else					m = tag[m] and tag[m][1]					if m then						return toNFD(m):gsub("^[aeiou]", "'%0")					end				end			end) :gsub("^'", "") --remove initial apostrophe inserted by previous function end tr = ugsub(tr, "%^(.)", uupper) elseif lang == "csp" or lang == "yue" or lang == "zhx-tai" then tr = tr:gsub("%d[%d%*%-]*%f[^%d%*]", "%0") elseif lang == "hak" then -- TODO elseif lang == "ltc" or lang == "och" then if tr == "n" then return fail(lang) end local index = {} if tr then if lang == "ltc" then index = mw.text.split(tr, ",") else index = mw.text.split(tr, ";") end end for i = 1, ulen(text) do local module_type = lang .. "-pron" if lang == "och" then module_type = module_type .. "-ZS" end local data_module = safe_require("Module:zh/data/" .. module_type .. "/" .. usub(text, i, i)) if not data_module or (((not index[i]) or index[i] == "y") and #data_module > 1) then return fail(lang) end if index[i] == "y" then index[i] = 1 elseif index[i] then index[i] = tonumber(index[i]) end index[i] = index[i] and data_module[index[i]] or data_module[1] if lang == "ltc" then local data = mw.loadData("Module:ltc-pron/data") local initial, final, tone = require("Module:ltc-pron").infer_categories(index[i]) tone = tone ~= "" and ("" .. tone .. "") or tone index[i] = data.initialConv["Zhengzhang"][initial] .. data.finalConv["Zhengzhang"][final] .. tone else index[i] = index[i][6] end end tr = table.concat(index, " ") if lang == "och" then tr = "*" .. tr		end elseif lang == "nan" then -- TODO elseif lang == "nan-tws" then tr = require("Module:nan-pron").pengim_display(tr) elseif lang == "wuu" then local w_pron = require("Module:wuu-pron") if tr:match(';') then --TODO return fail(lang) elseif tr:match(':') then tr = w_pron.wugniu_format(tr:sub(4)) else tr = w_pron.wugniu_format(w_pron.wikt_to_wugniu(tr)) end elseif lang == "zhx-sic" then tr = ugsub(tr, "([%d-])(%a)", "%1 %2") :gsub("%d[%d%*%-]*%f[^%d%*]", "%0") else tr = require("Module:" .. lang .. "-pron").rom(tr) end -- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated. return tr .. " " end

return export