Module:zh-forms

local export = {} local links = require("Module:links") local lang = require("Module:languages") local m_data = require("Module:zh-forms/data") local m_scripts = require("Module:scripts") local m_str_utils = require("Module:string utilities")

local concat = table.concat local explode = m_str_utils.explode_utf8 local find = m_str_utils.find local get_section = require("Module:utilities").get_section local gsub = m_str_utils.gsub local insert = table.insert local len = m_str_utils.len local match = m_str_utils.match local new_title = mw.title.new local sub = m_str_utils.sub local u = m_str_utils.char

local Hani_chars = m_scripts.getByCode("Hani"):getCharacters local nonbreaking_hyphen = u(0x2011)

local sc = { ["trad"] = "Hant", ["simp"] = "Hans", ["both"] = "Hani", }

-- Change one variant character to another variant character. -- Currently, only do so once. Return false if there is more than one character to change, or if there is no change. function export.change_to_variant(text) local count = 0 local text = gsub(text, ('([%s])'):format(m_data.chars_variant_both), function(char)		count = count + 1		if (count > 1) then return '' end -- add |t2= manually, please		local this_char_index = find(m_data.chars_variant_both, char)		local first_or_second = (this_char_index % 2)		local other_char_index = this_char_index + (first_or_second == 1 and 1 or -1)		return sub(m_data.chars_variant_both, other_char_index, other_char_index)	end) return (count == 1 and text or '') end

local zh_link_impl = nil local function zh_link(...) if zh_link_impl == nil then zh_link_impl = require("Module:zh/link") end return zh_link_impl.link(unpack(arg)) end

function export.make(frame) local params = { [1] = { list = true, allow_holes = true, allow_empty = true }, ["s"] = { list = true }, ["t"] = { list = true }, ["ss"] = {}, ["ns"] = {}, ["alt"] = {}, ["type"] = {}, ["delink"] = {}, ["lit"] = {}, ["note"] = {}, ["gloss"] = {}, ["align"] = {} }	local args = require("Module:parameters").process(frame:getParent.args, params) local comp_type = args["type"] local s, t = {}, {} local annotation = {} local pagename = mw.loadData("Module:headword/data").pagename local current_title = mw.title.getCurrentTitle local content = current_title:getContent if not frame:getParent.args["t"] then insert(t, 1, current_title.subpageText) end for i = 1, #args.s do		if (#args.s == 1) and pagename == args.s[i] then error('Redundant text in |s=.') end insert(s, args.s[i]) end for i = 1, #args.t do		if (#args.t == 1) and pagename == args.t[i] then error('Redundant text in |t=.') end insert(t, args.t[i]) end local t1 = t[1] local t1_len = len(t1) -- temp tracking if #s == 0 and require("Module:zh").ts(t1) ~= t1 then require('Module:debug').track('zh-forms/entry possibly missing a simplified form') end if #t == 1 then local to_variant = export.change_to_variant(t1) if (to_variant ~= '') then -- automatically generate a |t2= insert(t, export.change_to_variant(t1)) elseif find(t1, ('[%s]'):format(m_data.chars_variant_both .. m_data.chars_variant_one)) then require('Module:debug').track('zh-forms/entry possibly missing a variant form') end end s.name = "simp" t.name = "trad" if #t ~= 1 and #s == 0 then insert(s, t1) end

local function asterisk(term, iscomp) if iscomp and t1_len > 1 then return "" end if term == current_title.subpageText or not (new_title(term) or {}).exists then return "" end local content = new_title(term):getContent content = gsub(content, "zh%-pron", "Ꙁ") content = gsub(content, "zh%-see", "Ꙁ") content = gsub(content, "[^Ꙁ]", "") return len(content) > 1 and '*' or '' end

local function var_fmt(length, color) return '\n|-\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..			'em;border: 1px solid #aaa;background: #' .. (color or 'E0FFFF') ..			';font-weight: normal;font-size: smaller;" colspan="2" |' end local function char_gap(length, script, last) return '\n| style="padding: 0.' .. (length > 8 and '3' or '5') ..			'em; background-color:white;' ..			(last and 'border-right: 1px solid #aaa;border' .. (length ~= 1 and '-bottom' or '') .. ': 1px solid #aaa; '				or 'border-bottom: 1px solid #aaa; ') ..			'font-size:x-large" lang="zh-' .. sc[script] .. '" class="' .. sc[script] .. '" | ' end local function header(length, var_count) return ((length > 3 or var_count * length > 5 or args.align == 'left') and '{|' or '{| class="floatright"') .. ' style="clear: right;margin: 1em 0 1em 1em;border-collapse: collapse;text-align: center"' .. (length ~= 1 and '\n|-\n! colspan=2|' or '') end local function gloss_fmt(word, colspan, length) return '\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..			'em;border: 1px solid #aaa;background:#F5F5DC;font-weight: normal;font-size: 85%; width:' ..			(length <= 8 and (40 * word + 40) or (25 * word + 25)) .. 'px" colspan=' .. (colspan or 1) ..'|' end

local function form_fmt(text, length, script) local fmtd_text = {} for i,value in ipairs(text) do fmtd_text[i] = links.language_link{ lang = lang.getByCode("zh"), term = value } .. asterisk(value, false) end return length ~= 1 and ((length > 8 and '' or ' ') ..			'(' .. concat(fmtd_text, "/") .. ' )' ..			(length > 8 and  or ' ')) or  end local function char_fmt(text, length, script) return (#text ~= 1 and (char_gap(length, script) .. concat(text, char_gap(length, script), 1, #text-1)) or '') .. char_gap(length, script, true) .. text[#text] end local word_division = {} local i = 1 local decomposable

if comp_type then for index in mw.text.gsplit(comp_type, "", true) do			if find(sub(t1, i, i), '[，%-]') then insert(word_division, { i, i } ) i = i + 1 elseif sub(t1, i, i) == '…' then insert(word_division, { i, i + 1 } ) i = i + 2 end insert(word_division, { i, i + index - 1 } ) i = i + index end if i - 1 ~= len(gsub(t1, '…+$', '')) and not find(concat(t) .. concat(s), "[⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻]") then error("'type' parameter does not match word length.") end else for i = 1, t1_len do			insert(word_division, { i, i } ) end decomposable = len(gsub(t1, '…+$', '')) > 2 and true or false end local delink = {} if args["delink"] then if args["delink"] == "y" then for del_index, _ in ipairs(word_division) do				delink[del_index] = "yes" end else for position in mw.text.gsplit(args["delink"], ",") do				delink[tonumber(position)] = "yes" end end end local char_set = { ['simp'] = {}, ['trad'] = {} } local identity = #s == 0 and {t} or {s,t} local uncreated = {} for _, id in ipairs(identity) do		for i, position in ipairs(word_division) do			local char_string = "" for j = 1, #id do				local word_form = sub(id[j], position[1], position[2]) if not find(char_string, word_form) then char_string = (char_string ~= "" and (char_string .. '/') or "") .. word_form end end if not find(char_string, '[，%-]') then local hash = {} for thing in mw.text.gsplit(char_string, (delink[i] and "" or "/")) do insert(hash, links.language_link{ lang = lang.getByCode("zh"), term = thing } .. asterisk(thing, true)) end char_string = concat(hash, (delink[i] and "" or "/")) end insert(char_set[id.name], char_string) end for _, item in ipairs(id) do			if not (new_title(item) or {}).exists and item ~= current_title.subpageText then insert(uncreated, '"' .. item .. '"') end end end local scripts = { [m_scripts.getByCode("Hani"):getCharacters] = "Hani", [m_scripts.getByCode("Latn"):getCharacters] = "Latn", ["0-9０-９"] = "Numb", [m_scripts.getByCode("Polyt"):getCharacters] = "Grek", [m_scripts.getByCode("Bopo"):getCharacters] = "Bopo" } local script = {} for range, script_name in pairs(scripts) do if find(t1, '[' .. range .. ']') then insert(script, script_name) end end

if find(t1, "([^─…]+)%1") and args['gloss'] ~= '-' and t1_len < 7 then if gsub(comp_type or "", "1", "") == "" then insert(annotation, '') elseif find(t1, "([^…][^…]+)%1") or find(concat(char_set['trad'], " "), "([^─…%[%]a-z]+)%1") then local evil for _, component in ipairs(char_set['trad']) do				if len(component) > 1 then local comp_content = new_title(links.remove_links(component)):getContent or false if not comp_content or find(comp_content, "|gloss=-") or not find(comp_content, "==Chinese==") then evil = true end end end if not evil then insert(annotation, '') end end end

insert(annotation, #script > 1 and '' or nil) insert(annotation, (decomposable and args['gloss'] ~= '-' and not args['note'] and not args['lit']) and '' or nil) insert(annotation, (#uncreated > 0 and current_title.nsText == "") and '' .. 		'(' .. (#uncreated == 1 and 'This form' or 'These forms') .. ' in the hanzi box ' .. (#uncreated == 1 and 'is' or 'are') .. ' uncreated: ' .. concat(uncreated, ", ") .. '.) ' or nil) local ss = "" if args["ss"] then ss = var_fmt(t1_len, 'FFFFE0') .. '2nd round simp.' .. sub(char_gap(t1_len, 'simp', true), 1, -45) .. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'			.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { "*" .. args["ss"], tr = "-" }, current_title.subpageText)	end	local ns = ""	if args["ns"] then		ns = var_fmt(t1_len, 'FFFFE0') .. 'nonstandard simp.' .. sub(char_gap(t1_len, 'simp', true), 1, -45)			.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"' .. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { "*" .. args["ns"], tr = "-" }, current_title.subpageText) end local altforms = "" if args["alt"] then local altform_list = {} for altform in mw.text.gsplit(args["alt"], ",") do			local altdecomp = mw.text.split(altform, ":") local altdecomp2 = mw.text.split(altdecomp[1], "-") local altdecomp3 = altdecomp2[2] and ' ' .. gsub(altdecomp2[2], "‡", frame:expandTemplate{					title = "Template:zh-historical-dict",					args = { type = "form", nocat = "1" }				}) .. ' ' or '' insert(altform_list, ' ' ..				zh_link(nil, nil, { altdecomp2[1], tr = (altdecomp[2] or "-") }, current_title.subpageText) .. 				altdecomp3 .. ' ') end if #altform_list > 5 then altforms = ' ' .. ' ' .. concat(altform_list, " ", 1, 5) .. ' ' .. concat(altform_list, " ") .. '  '		else altforms = concat(altform_list, " ") end altforms = var_fmt(t1_len, 'F0FFE0') .. 'alternative forms' .. sub(char_gap(t1_len, "trad", true), 1, -45) .. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'			.. ' colspan="' .. #word_division .. '"|' .. altforms	end	local anagrams = ""	if t1_len >= 2 and t1_len <= 5 then		local function generate_anagrams(term, n, anagrams, checked)			if n == 0 then				local anagram = concat(term)				if checked[anagram] then					return				end				local title = new_title(anagram)				if not title then					return				end				title = title:getContent				if title and get_section(title, "Chinese", 2) then					insert(anagrams, anagram)				end				checked[anagram] = true			else				for i = 1, n do					generate_anagrams(term, n - 1, anagrams, checked)					local i = n % 2 == 0 and i or 1					term[i], term[n] = term[n], term[i]				end			end			return anagrams		end		local term, checked = explode(t1), {[t1] = true}		anagrams = generate_anagrams(term, #term, {}, checked)		local anagrams_num = #anagrams		for i = 1, anagrams_num do			anagrams[i] = "" ..				zh_link(nil, nil, {anagrams[i], tr = "-"}, current_title.subpageText) .. " "		end if anagrams_num == 0 then anagrams = "" else anagrams = concat(anagrams, " ") local label = anagrams_num == 1 and "anagram" or "anagrams" anagrams = var_fmt(t1_len, 'F0FFE0') .. label .. sub(char_gap(t1_len, "trad", true), 1, -45) .. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'				.. ' colspan="' .. #word_division .. '"|' .. anagrams		end	end	local literal = (args["lit"] or args["note"]) and '\n|-' .. 		gloss_fmt(t1_len, #word_division + 2, t1_len) .. 		(args["lit"] and Literally: “' .. args["lit"] .. 		(find(args["lit"], "%.$") and "”" or "”.") or args["note"]) or ""	local gloss = {}	if args['gloss'] == '-' then		gloss = { gloss_fmt(t1_len * 1.6, #word_division, t1_len) .. phonetic' }	elseif t1_len == 1 then		gloss = {}--{ gloss_fmt(1.6, #word_division) .. '-' }	else		for i, position in ipairs(word_division) do			local character = sub(t1, position[1], position[2])			local gloss_text = args[1][i]			-- Load glosses module if no gloss was supplied and the word is a single character.			if not gloss_text and position[1] == position[2] then				local glosses = mw.loadData("Module:zh/data/glosses")				gloss_text = glosses.glosses[character] or ""				gloss_text = gsub(gloss_text, "",					function (taxlink_text) local taxlink_args, argi = {}, 1 for arg in mw.text.gsplit(taxlink_text, "|") do							local arg_split = mw.text.split(arg, "=") if arg_split[2] then taxlink_args[arg_split[1]] = arg_split[2] else taxlink_args[argi] = (arg ~= "" and arg or nil) argi = argi + 1 end end local frame = mw.getCurrentFrame return frame:expandTemplate{ title = 'taxlink', args = taxlink_args }					end)				if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") and not find(glosses.nonlemma, character) then					require('Module:debug').track('zh-forms/no gloss found for Chinese character')				end			end			if gloss_text == "-" then gloss_text = "phonetic" end			if not gloss_text then				gloss_text = ""			end			--				To ensure that suffixes are not broken up between lines, like this:				-				ist						if find(gloss_text, "-", nil, true) then				gloss_text = gsub(gloss_text, "^%-", nonbreaking_hyphen)				gloss_text = gsub(gloss_text, "(%s)%-", "%1" .. nonbreaking_hyphen)			end			if gloss_text == "" and position[2] > position[1] then				local content = new_title(character):getContent or false				if content then					gloss_text = require("Module:zh/extract").extract_gloss(content, false)					if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") then						require('Module:debug').track('zh-forms/no gloss found but entry exists')					end					--					if not string.match(content, character) then						require('Module:debug').track('zh-forms/compounds not mentioned in derived terms on the component pages')					end					--				else					if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") then						insert(annotation, )					end				end			end			gloss_text = gsub(gloss_text, ";[^a-zA-Z0-9]+;", ";")			gloss_text = gsub(gloss_text, ";[^a-zA-Z0-9]*$", "")			gloss_text = gsub(gloss_text, ";+", ";")			if len(gsub(gloss_text, '[^;]', )) > 2 then				gloss_text = ' ' ..					match(gloss_text, '^[^;]+;[^;]+;[^;]+') .. ' ' .. gloss_text .. '   '			end			local word_length = match(character, '[，…%-]') and 0 or 				(delink[i] and len(character) or len(character) * (len(gsub(char_set['trad'][i], '[^/]', '')) + 1))			insert(gloss, gloss_fmt(word_length, 1, t1_len) .. gloss_text)		end	end	if content then		--		local applicable_pos = { ["Noun"] = 1, ["Verb"] = 1, ["Adjective"] = 1, ["Adverb"] = 1, 			["Definitions"] = 1, ["Pronunciation"] = 1 }		local previous_level = 2		local subheading_wanted		--		--[=[		-- Used under headers for Chinese varieties, for instance in āu-piah		local Chinese_section = string.match(content, "\n==Chinese==\n(.-)\n==[^=]")		if not Chinese_section then			error("No Chinese section found.")		end		--]=]		if t1_len == 1 and not string.match(content, "===Definitions===") then			require('Module:debug').track('zh-forms/no definitions section found')		end		-- disable, has been deprecated so this expression will always be false		if t1_len == 1 and not string.match(content, "===Compounds===") and string.match(content, "zh-der") then			require('Module:debug').track('zh-forms/derived terms probably needing renaming')		end		-- -- comment out relatively expensive parsing		for equals, heading_text in string.gmatch(content, "\n(%=%=+)([^%=]+)%=%=+") do			local current_level = -- end local simp_note = "" --This is not a complete list! if #identity == 1 and find(m_data.chars_unified, '[' .. t1 .. ']') then simp_note = ' 1 and 's' or '') .. ' as the traditional form due to Han unification. Without proper font support, it may be displayed as the same as the traditional form.">#' char_set['simp'] = char_set['trad'] end return concat{ header(t1_len, math.max(#t, #s)), concat(gloss, ""), ((#identity == 1 and simp_note == '') and			(var_fmt(t1_len) .. 'simp. and trad. ' .. form_fmt(t, t1_len, 'both') .. char_fmt(char_set['trad'], t1_len, 'both'))		or			 var_fmt(t1_len) .. 'trad. ' .. 			 	form_fmt(t, t1_len, 'trad') .. char_fmt(char_set['trad'], t1_len, 'trad') ..

(var_fmt(t1_len) .. 'simp. ' .. simp_note ..				form_fmt(#s == 0 and t or s, t1_len, 'simp') .. char_fmt(char_set['simp'], t1_len, 'simp'))

), ss, ns, altforms, anagrams, literal, '\n|}', concat(annotation)	} end

return export