Module:zh-new

local M = require('Module:zh')

local len = mw.ustring.len local sub = mw.ustring.sub local gsub = mw.ustring.gsub local match = mw.ustring.match local find = mw.ustring.find

local pos_aliases_title = { ["n"] = "Noun", ["pn"] = "Proper noun", ["propn"] = "Proper noun", ["pron"] = "Pronoun", ["v"] = "Verb", ["a"] = "Adjective", ["adj"] = "Adjective", ["adv"] = "Adverb", ["prep"] = "Preposition", ["postp"] = "Postposition", ["conj"] = "Conjunction", ["part"] = "Particle", ["suf"] = "Suffix", ["prov"] = "Proverb", ["id"] = "Idiom", ["ch"] = "Idiom", ["cy"] = "Idiom", ["ph"] = "Phrase", ["intj"] = "Interjection", ["cl"] = "Classifier", ["num"] = "Numeral", ["abb"] = "Abbreviation", ["deter"] = "Determiner", }

-- This is now used only as an inverse alias table. local pos_aliases_head = { ["n"] = "noun", ["pn"] = "proper noun", ["propn"] = "proper noun", ["v"] = "verb", ["a"] = "adj", ["postp"] = "post", ["conj"] = "con", ["part"] = "particle", ["pron"] = "pronoun", ["prov"] = "proverb", ["id"] = "idiom", ["ch"] = "idiom", ["cy"] = "idiom", ["ph"] = "phrase", ["intj"] = "interj", ["abb"] = "abbr", ["cl"] = "cls", ["num"] = "numeral", ["deter"] = "det", }

local pos_aliases_head_vi = { ["n"] = "noun", ["pn"] = "proper noun", ["propn"] = "proper noun", ["v"] = "verb", ["a"] = "adj", ["postp"] = "post", ["conj"] = "con", ["part"] = "part", ["pron"] = "pronoun", ["prov"] = "proverb", ["id"] = "idiom", ["ch"] = "idiom", ["cy"] = "idiom", ["ph"] = "phrase", ["intj"] = "interj", ["abb"] = "abbr", ["cl"] = "classifier", ["num"] = "num", ["deter"] = "determ", }

local function check_pron_nan(title) local result = M.check_pron(title, 'nan-hbl') if result then result = gsub(result, "%-á%-", "-仔-") result = gsub(result, "%-á/", "-仔/") result = gsub(result, "%-á$", "-仔") result = gsub(result, "^(.+)%-%1%-%1$", "(%1)") result = gsub(result, "^(.+)%-%1%-%1([%-%/])", "(%1)%2") result = gsub(result, "([%-%/])(.+)%-%1%-%1$", "%1(%2)") result = gsub(result, "([%-%/])(.+)%-%1%-%1([%-%/])", "%1(%2)%3") end return result end

function M.pytemp(text,comp,pos,p,is_erhua) local m_cmn_pron = require("Module:zh/data/cmn-pron") local wordlist_1, wordlist_2, wordlist_3 = require("Module:zh/data/wordlist/1"), require("Module:zh/data/wordlist/2"), require("Module:zh/data/wordlist/3") if not is_erhua then is_erhua = false end if type(text) == 'table' then text,comp,pos = text.args[1],text.args[2],text.args[3] or 'n' end comp = comp or '' local q = {} local sum = 0 local wordlist_result = wordlist_1[text] or wordlist_2[text] or wordlist_3[text] or nil local moe_pron = wordlist_result and mw.text.split(wordlist_result, " ") or {} local textconv = M.ts(text) local length = len(text) if is_erhua == true then length = length - 1 textconv = sub(textconv, 1, length) end text = '' if comp ~= '' and comp ~= '12' and comp ~= '21' and not is_erhua then for i = 1, len(comp) do			sum = sum + tonumber(sub(comp,i,i)) q[sum] = 'y'		end end if not p then p={} end for i = 1, length do		if p[i] and p[i] ~= '' then --pronunciation supplied text = text .. p[i] else local char = sub(textconv,i,i) if ('一不期績绩蹟跡迹嵌框微突帆藩擊击夾夹鞠拈夕汐矽昔惜息危椰濤涛叔寂馴驯築筑質质播究菌矻識识穴膜餾馏企辱署偽伪蹈諷讽斂敛坊樸朴儲储剖檔档髮轍辙賜赐堤壑酵括懾慑蝸蜗淆攜携崖癌暫暂蟄蛰驟骤液血酪嘌覲幀蕁曳室癬癣亞亚穹褐貯贮淑場场踮鱒跌擁綏胺翕煦伐髮眶湮櫛栉萎閩闽銨铵鑿凿鈸钹謅诌雌綜综摑掴癖梵'):find(char, 1, true) then text = text .. char else char = moe_pron[i] or m_cmn_pron.py[char] or char if i ~= 1 and find(char,'^[aoeāōēáóéǎǒěàòè]') then char = "'" .. char end text = text .. char end end if q[i] == 'y' and i ~= length and not is_erhua and pos ~= 'cy' then text = text .. ' ' end end text = gsub(text," '"," ") if pos == 'pn' or pos == 'propn' then local characters = mw.text.split(text,' ') for i = 1, #characters do			characters[i] = mw.language.getContentLanguage:ucfirst(characters[i]) end text = table.concat(characters,' ') end return text end

function M.pytemp_er(text,comp,pos,p) return M.pytemp(text,comp,pos,p,true) end

function M.hzbox(title,comp,e,alt,gloss,lit,t2,t3,delink) if type(title) == 'table' then title,comp = title.args[1],title.args[2] end local id = M.ts_determ(title) local text = '' end

function M.hzbox_er(title) if type(title) == 'table' then title = title.args[1] end local length = len(title) local id	if sub(title, length, length) == '兒' then id = 'trad' else id = 'simp' end title = sub(title, 1, length-1) local text = '')	else		text = (text .. '' .. M.ts(title) .. '儿|' .. title .. '兒}}')	end	return text end

function M.create_er(f) return M.create(f,true) end

function M.semantics(text,name,sem) local orig_text = text if sem[1] and sem[1] ~= '' then text = (text .. '\n\n====' .. name .. '====') if name == 'Derived terms' or name == 'Compounds' then if sem[1] == 'a' or find(sem[1], '^a,') then local zh_der = mw.getCurrentFrame:preprocess('") .. )				if zh_der ==  then return orig_text end				text = text .. '\n' .. zh_der			else				text = text .. '\n'			end		else			for i = 1, #sem do				text = text .. '\n* '			end		end	end	return text end

local function checkpos(pos) for poscode,posname in pairs(pos_aliases_head) do		if pos == posname then return poscode end end for poscode,posname in pairs(pos_aliases_title) do		if pos == posname then return poscode end end return pos end

function M.postitle(pos) pos = pos or '' if pos == '' then pos = 'n' end return pos_aliases_title[pos] or pos end

function M.poshead(pos) return mw.ustring.lower(M.postitle(pos)) end

function M.poshead_vi(pos) pos = pos or '' if pos == '' then pos = 'n' end return pos_aliases_head_vi[pos] or pos end

function M.newDer(frame) local title = mw.title.getCurrentTitle.subpageText local prefix = "Module:zh/data/wordlist/" local args = frame:getParent.args local limit = args["limit"] and tonumber(args["limit"]) or false local char_pronunciation = args["p"] or false local fold = args["fold"] or false local hide_pron = args["hide_pron"] or false local big = args["big"] or false local result = {} for _, arg in ipairs(args) do		table.insert(result, arg) end local i = 1 if big then while i < 3 do local wordlist = require(prefix .. 'big' .. tostring(i)).list for _, word in ipairs(wordlist) do				if match(word, title) and word ~= title and not (len(title) == 1 and len(word) > (limit or 4)) then table.insert(result, word) end end i = i + 1 end else while i < 4 do local wordlist = require(prefix .. tostring(i)) for word, pronunciation in pairs(wordlist) do				if match(word, title) and word ~= title and not (len(title) == 1 and len(word) > (limit or 4)) then if char_pronunciation then if mw.text.split(pronunciation, " ")[find(word, title)] == char_pronunciation then table.insert(result, word) end else table.insert(result, word) end end end i = i + 1 end end local hash, res = {}, {} for _, element in ipairs(result) do		local section = mw.text.split(element, ":")[1] if not hash[section] then res[#res + 1] = element hash[section] = true end end return "" end

function M.check_yue(title, c)	local ret = {} if mw.ustring.len(title) > 1 then -- do not do anything on hanzi pages. cf. 宑&diff=49855439 c = gsub(c, ", ", " ") c = gsub(c, " *%.%.%. *", " ") for phrase in mw.text.gsplit(c, ",") do			local c_set = mw.text.split(phrase, " ") i = 0 for ch in mw.text.gsplit(mw.ustring.gsub(title, "[…，]", ""), "") do				i = i + 1 if mw.title.new(ch).exists then local content = mw.title.new(ch):getContent local templates = mw.ustring.gmatch(content, "|c=([^};|\n]+)") local prons = {} for template in templates do						for indiv_pron in mw.text.gsplit(template, ",") do							prons[indiv_pron] = true end end if not prons[c_set[i]] then table.insert(ret, "") end end end end end return #ret > 0 and (table.concat(ret, '\n') .. '\n\n') or '' end

function M.create(f,is_erhua) if not is_erhua then is_erhua = false end local title = mw.title.getCurrentTitle.text local params = { ["type"] = {}, ["comp"] = {alias_of = "type"}, [1] = {list = true, allow_holes = true}, ["pos"] = {list = true, allow_holes = true}, ["def"] = {list = true, allow_holes = true}, ["e1"] = {list = "e", allow_holes=true}, ["etym1"] = {list = "etym", allow_holes=true}, ["etymology1"] = {list = "etymology", allow_holes=true}, ["meaning"] = {list = true, allow_holes=true}, ["k"] = {}, ["ko"] = {alias_of = "k"}, ["korean"] = {alias_of = "k"}, ["kt"] = {}, ["tr"] = {alias_of = "kt"}, ["transcription"] = {alias_of = "kt"}, ["ktr"] = {alias_of = "kt"}, ["kotr"] = {alias_of = "kt"}, ["koreantr"] = {alias_of = "kt"}, ["ktrans"] = {alias_of = "kt"}, ["kotrans"] = {alias_of = "kt"}, ["ke"] = {}, ["kodef"] = {alias_of = "ke"}, ["kodefinition"] = {alias_of = "ke"}, ["koreandef"] = {alias_of = "ke"}, ["v"] = {}, ["vi"] = {alias_of = "v"}, ["vietnam"] = {alias_of = "v"}, ["ve"] = {}, ["videf"] = {alias_of = "ve"}, ["videfinition"] = {alias_of = "ve"}, ["vietnamdef"] = {alias_of = "ve"}, ["vietnamdefinition"] = {alias_of = "ve"}, ["p"] = {list = true, allow_holes=true}, ["pron"] = {list = true, allow_holes=true}, ["pronunciation"] = {list = true, allow_holes=true}, ["go"] = {}, ["e"] = {}, ["etym"] = {alias_of = "e"}, ["etymology"] = {alias_of = "e"}, ["origin"] = {alias_of = "e"}, ["ori"] = {alias_of = "e"}, ["o"] = {alias_of = "e"}, ["syn"] = {list = true}, ["synonym"] = {list = true}, ["ant"] = {list = true}, ["antonym"] = {list = true}, ["hyper"] = {list = true}, ["hypernym"] = {list = true}, ["hypo"] = {list = true}, ["hyponym"] = {list = true}, ["coo"] = {list = true}, ["coord"] = {list = true}, ["coordinate"] = {list = true}, ["der"] = {list = true}, ["deriv"] = {list = true}, ["derived"] = {list = true}, ["derivedterm"] = {list = true}, ["rel"] = {list = true}, ["related"] = {list = true}, ["also"] = {list = true}, ["see"] = {list = true}, ["seealso"] = {list = true}, ["alsosee"] = {list = true}, ["wp"] = {}, ["wiki"] = {alias_of = "wp"}, ["wikipedia"] = {alias_of = "wp"}, ["cat"] = {list = true}, ["poscat"] = {list = true}, ["rawcat"] = {list = true}, ["pic"] = {}, ["file"] = {alias_of = "pic"}, ["image"] = {alias_of = "pic"}, ["picture"] = {alias_of = "pic"}, ["piccap"] = {}, ["caption"] = {alias_of = "piccap"}, ["description"] = {alias_of = "piccap"}, ["desc"] = {alias_of = "piccap"}, ["er"] = {}, ["erhua"] = {alias_of = "er"}, ["tl"] = {}, ["toneless"] = {alias_of = "tl"}, ["tonelessvariant"] = {alias_of = "tl"}, ["variant"] = {alias_of = "tl"}, ["variation"] = {alias_of = "tl"}, ["tonelessvariation"] = {alias_of = "tl"}, ["a"] = {}, ["audio"] = {alias_of = "a"}, ["listen"] = {alias_of = "a"}, ["sound"] = {alias_of = "a"}, ["pronounced"] = {alias_of = "a"}, ["alt"] = {}, ["alter"] = {alias_of = "alt"}, ["altern"] = {alias_of = "alt"}, ["alternate"] = {alias_of = "alt"}, ["alternative"] = {alias_of = "alt"}, ["c"] = {}, ["cant"] = {alias_of = "c"}, ["cantonese"] = {alias_of = "c"}, ["mn"] = {}, ["nan"] = {alias_of = "mn"}, ["minnan"] = {alias_of = "mn"}, ["w"] = {}, ["wu"] = {alias_of = "w"}, ["shanghai"] = {alias_of = "w"}, ["m"] = {}, ["m-s"] = {}, ["m-x"] = {}, ["m-nj"] = {}, ["dg"] = {}, ["c-t"] = {}, ["g"] = {}, ["h"] = {}, ["j"] = {}, ["mb"] = {}, ["md"] = {}, ["mn-t"] = {}, ["mn-l"] = {}, ["x"] = {}, ["mc"] = {}, ["oc"] = {}, ["ts"] = {}, ["gloss"] = {}, ["lit"] = {}, ["t2"] = {}, ["t3"] = {}, ["delink"] = {}, ["vtype"] = {}, ["tlb"] = {}, }	local args = require("Module:parameters").process(f:getParent.args, params) local comp = args["type"] or "" local pos = {} local def = {} for i=1,math.max(args[1].maxindex/2, args["pos"].maxindex, args["def"].maxindex) do		table.insert(pos, args[1][2*i-1] or args["pos"][i] or "") table.insert(def, args[1][2*i] or args["def"][i] or "") end local function length(array) return array.maxindex or #array end local function expand(arg) local result = {} local maximum = 0 for i=1,#arg do if length(arg[i]) > maximum then maximum = length(arg[i]) end end local current = nil for i=1,maximum do			current = nil for j=1,#arg do				if current then break else current = arg[j][i] end end current = current or "" table.insert(result, current) end return result end local etyms = expand({args["e1"], args["etym1"], args["etymology1"], args["meaning"]}) local ko = args["k"] or "" local kotrans = args["kt"] or "" -- currently unused local kodef = args["ke"] or def[1] or "" local vi = args["v"] or "" local videf = args["ve"] or def[1] or "" local p = expand({args["p"], args["pron"], args["pronunciation"]}) local glyph_origin = args["go"] or "" local etym = args["e"] or "" local syn = expand({args["syn"], args["synonym"]}) local ant = expand({args["ant"], args["antonym"]}) local hyper = expand({args["hyper"], args["hypernyms"]}) local hypo = expand({args["hypo"], args["hyponyms"]}) local coo = expand({args["coord"], args["coo"], args["coordinate"]}) local der = expand({args["der"], args["deriv"], args["derived"], args["derivedterm"]}) local rel = expand({args["rel"], args["related"]}) local also = expand({args["also"], args["see"], args["alsosee"], args["seealso"]}) local wp = args["wp"] or "" local cat = args["cat"] local poscat = args["poscat"] local rawcat = args["rawcat"] local pic = args["pic"] or "" local piccap = args["piccap"] or "" local er = args["er"] or "" local tl = args["tl"] or "" local audio = args["a"] or "" local alt = args["alt"] or "" local m = args["m"] or "" local m_s = args["m-s"] or "" local m_x = args["m-x"] or "" local m_nj = args["m-nj"] or "" local dg = args["dg"] or "" local c = args["c"] or "" local c_t = args["c-t"] or "" local g = args["g"] or "" local h = args["h"] or "" local j = args["j"] or "" local mb = args["mb"] or "" local md = args["md"] or "" local mn = args["mn"] or "" local mn_t = args["mn-t"] or "" local mn_l = args["mn-l"] or "" local w = args["w"] or "" local x = args["x"] or "" local mc = args["mc"] or "" local oc = args["oc"] or "" local ts = args["ts"] or "" local gloss = args["gloss"] or "" local lit = args["lit"] or "" local t2 = args["t2"] or "" local t3 = args["t3"] or "" local delink = args["delink"] or "" local vtype = args["vtype"] or "" local tlb = args["tlb"] or "" local text = '' if not pos[1] or pos[1] == '' then pos[1] = 'n' end for i=1,#pos do pos[i] = checkpos(pos[i]) end text = (text .. '==Chinese==\n') if M.ts_determ(title) == 'simp' and ts ~= "trad" then return text .. ):format(mw.title.getCurrentTitle.text)	end	length = len(title)	local noerhua = sub(title,1,length-1) -- currently unused	local erhua = sub(title,length,length) -- currently unused	text = text .. (is_erhua and M.hzbox_er(title) or M.hzbox(title,comp,table.concat(etyms,'|'),alt,gloss,lit,t2,t3,delink))	if wp ~=  then text = (text .. '\n') end	if pic ~= '' then text = (text .. '\n')	end

if is_erhua then text = (text .. '===Pronunciation===\n\n\n') else text = (text .. '\n\n') if glyph_origin ~= '' then text = (text .. '===Glyph origin===\n' .. glyph_origin .. '\n\n') end if etym ~= '' then text = (text .. '===Etymology===\n' .. etym .. '\n\n') end text = (text .. '===Pronunciation===\n\n\n') local pcall_success, yue_check = pcall(M.check_yue, title, c) if pcall_success and c and c ~= '' and c ~= '-' then text = text .. yue_check end end text = (text .. '===' .. (length == 1 and "Definitions" or M.postitle(pos[1])) .. '===\n') if length == 1 then text = (text .. '') -- an alias for 'Han character', see Module:headword/data else if M.poshead(pos[1]) == "verb" then text = (text .. '') else text = (text .. '') end end if tlb ~= "" then text = (text .. ' ') end text = (text.. '\n\n') if is_erhua then text = text .. '# '	else text = (text .. '# ' .. ((def[1] and def[1] ~= "") and def[1] or "")) end if syn[1] then if match(syn[1], "^dial") then text = text .. "\n\n====Synonyms====\n" else text = M.semantics(text,'Synonyms',syn) end end text = M.semantics(text,'Antonyms',ant) text = M.semantics(text,'Hypernyms',hyper) text = M.semantics(text,'Hyponyms',hypo) text = M.semantics(text,'Coordinate terms',coo) text = M.semantics(text,length == 1 and 'Compounds' or 'Derived terms',der) text = M.semantics(text,'Related terms',rel) for i=2,#pos do text = text .. '\n\n===' .. M.postitle(pos[i]) .. '===\n' text = text .. '\n\n' if is_erhua then text = text .. '# '		else text = text .. '# ' .. def[i] end end if #also > 0 then text = (text .. '\n\n====See also====') for i=1,#also do text = (text .. '\n* ') end end if #cat > 0 or #poscat > 0 or #rawcat > 0 then text = text .. "\n" end local chinese_rawcats = {} local mandarin_rawcats = {} local mandarin_poscats = {} if #rawcat > 0 then for _, rc in ipairs(rawcat) do			local c = rc:match("^Mandarin (.*)$") if c then table.insert(mandarin_poscats, c)			end if not c then c = rc:match("^Chinese (.*)$") if c then table.insert(poscat, c)				end end if not c then c = rc:match("^(.*Mandarin.*)$") if c then table.insert(mandarin_rawcats, c)				end end if not c then table.insert(chinese_rawcats, rc) end end end if #cat > 0 then text = (text .. "\n") end if #poscat > 0 then text = (text .. "\n") end if #mandarin_poscats > 0 then text = (text .. "\n") end if #chinese_rawcats > 0 then text = (text .. "\n") end if #mandarin_rawcats > 0 then text = (text .. "\n") end if ko ~= '' then text = text .. '\n\n==Korean==\n\n\n===Noun===\n\n\n# ' end if vi ~= '' then text = text .. '\n\n==Vietnamese==\n\n\n===' .. M.postitle(pos[1]) .. '===\n\n\n# ' end return text end

return M