Module:User:Justinrleung

local export = {}

local gsub = mw.ustring.gsub local find = mw.ustring.find local gsplit = mw.text.gsplit

function export.extract_gloss(content, useetc) local senses = {} local len = mw.ustring.len local literally = match(content, 'zh%-forms[^}]*|lit=([^{|}]+)[|}]') local sense_id = 0 local etc = false local translingual_section, zh_section, j, pos, section while true do		-- Find language sections beginning with ==...== and ending with the same -- or an empty string. Grab the Chinese and Translingual ones. _, j, language_name, section = content:find("%f[=]==%s*([^=]+)%s*==(\n.-)\n==%f[^=]", pos) if j == nil then i, j, language_name, section = content:find("%f[=]==%s*([^=]+)%s*==(\n.+)", pos) end if j == nil then break else -- Move to the beginning of "==" at the end of the current match. pos = j - 1 end if language_name == 'Translingual' then translingual_section = section elseif language_name == 'Chinese' then zh_section = section break end end if not zh_section then zh_section = translingual_section if not zh_section then return "" end elseif translingual_section then -- also use translingual section if Chinese section contains only rfdef zh_section = zh_section..translingual_section end

-- Delete etymology sections, because they sometimes contain ordered lists, -- which would then be interpreted as definitions. zh_section = zh_section:gsub("\n===+Etymology.-(\n==)", "%1") for sense in zh_section:gmatch('\n# ([^\n]+)') do		if not sense:match('rfdef') and not sense:match('defn') then sense_id = sense_id + 1 if sense_id > 2 then etc = true break end table.insert(senses, sense) end end gloss_text = (literally and literally .. "; " or "") .. (senses[1] or "") local gloss_text_extend = gloss_text .. (senses[2] and "; " .. senses[2] or "") gloss_text = (len(gloss_text) < 80 and len(gloss_text_extend) < 160) and gloss_text_extend or gloss_text if gloss_text ~= gloss_text_extend then etc = true end

local function replace_gloss(text) local function replace_wp(text) return text:gsub('{{w|([^|}]+)|?([^|}]*)}}',				function(w_link, w_display)					return ..w_link..'｜'..(w_display~= and w_display or w_link)..''			end) end if text:find("{{") then text = replace_wp(text) text = text:gsub(' %({{taxlink[^}%)]+}}%)', )				:gsub('{{zh%-l|%*([^}]*)}}', '%1')				:gsub('{{lb|zh|[^}]*}}', )				:gsub('{{zh%-erhua form of|word=[^}]+}}', )				:gsub('{{zh%-erhua form of|([^}]+)}}', '%1')				:gsub('{{zh%-alt%-name|[^}]+|([^\n]+)}}', '%1')				:gsub('{{zh%-short%-comp|[^}]+|t=([^\n}|]+)[^}]*}}', '%1')				:gsub('{{zh%-short%-comp|[^}]+}}', )				:gsub('{{zh%-classifier|[^}]+|t=([^\n}|]+)[^}]*}}', '%1')				:gsub('{{zh%-classifier|[^}]+}}', )				:gsub('{{zh%-alt%-form|[^}]+}}', )				:gsub('{{zh%-[^dm|}][^|}]+|[^|}]+|([^\n}|]+)}}', '%1')				:gsub('{{place|zh|[^}]*t=([^\n}|]+)[^}]*}}', '%1')				:gsub('{{vern', '{{w')				:gsub('｜', "|")		end		text = text:gsub('( ?)([{%(]+[^}%){%(]+[}%)]+)', function(space, captured) local taxlink = captured:match("{{taxlink|([^|}]+)") local wiki_link = taxlink and "''" .. taxlink .. "''" or 				(match(captured, "({{w|.+}})") or false) return wiki_link and space..wiki_link or "" end)		text = mw.text.split(text, ';')		local text_sec = {}		for _, s in ipairs(text) do			if s:find'%w' then				table.insert(text_sec, (s:gsub('^%s+',):gsub('%s+$',)))			end		end		return table.concat(text_sec, '; ')	end	gloss_text = replace_gloss(gloss_text)	gloss_text = replace_gloss(gloss_text)	if etc and useetc and gloss_text ~= "" then		gloss_text = gloss_text .. "; etc."	elseif gloss_text:find("{{") then --temporary solution to suppress wikitext issues		gloss_text = ""	end	return gloss_text end

function export.is_redirect(frame) if mw.title.new( frame.args[1] ).isRedirect then return 1 else return 0 end end

function export.link(text) return require("Module:links").language_link(text, nil, require("Module:languages").getByCode("zh")) end

local function ine(var) if var == "" then return nil else return var end end

local tones = '[̄́̌̀]' local py_tone = { ['̄'] = '1',	['́'] = '2',	['̌'] = '3',	['̀'] = '4' }

function export.py_transform(text, detone, not_spaced) if type(text) == 'table' then text, detone, not_spaced = text.args[1], text.args[2], text.args[3] end if find(text, '​') then error("Pinyin contains the hidden character: ​ (U+200B). Please remove that character from the text.") end detone = ine(detone) not_spaced = ine(not_spaced) text = gsub(gsub(mw.ustring.toNFD(text), mw.ustring.toNFD('ê'), 'ê'), mw.ustring.toNFD('ü'), 'ü') if find(mw.ustring.lower(text), '[aeiouêü]' .. tones .. '[aeiou]?[aeiouêü]' .. tones .. '') and not not_spaced then error(("Missing apostrophe before null-initial syllable - should be \"%s\" instead."):format(gsub(text, '([aeiouêü]' .. tones .. '[aeiou]?)([aeiouêü]' .. tones .. ')', "%1'%2"))) end original_text = text text = gsub(text,'([aoeAOE])([iou])(' .. tones .. ')', '%1%3%2')	text = gsub(text,'([iuü])(' .. tones .. ')([aeiou])', '%1%3%2') if text ~= original_text then error("Incorrect diacritic placement in Pinyin - should be \"".. text .. "\" instead.") end text = mw.ustring.lower(text) if not mw.ustring.find(text, tones) and text:find('[1-5]') then return gsub(text, '(%d)(%l)', '%1 %2') end text = gsub(text, "#", " #") if find(text, '[一不,.?]') then text = gsub(text, '([一不])$', {['一'] = ' yī', ['不'] = ' bù'}) text = gsub(text, '([一不])', ' %1 ') text = gsub(text, '([,.?])', ' %1 ') text = gsub(text, ' +', ' ') text = gsub(text, '^ ', '') text = gsub(text, ' $', '') text = gsub(text, '%. %. %.', '...') end text = gsub(text, "['%-]", ' ') text = gsub(text, '([aeiouêümn]' .. tones .. '?n?g?r?)([bpmfdtnlgkhjqxzcsywr]h?)', '%1 %2') text = gsub(text, ' ([grn])$', '%1') text = gsub(text, ' ([grn]) ', '%1 ') if detone then text = gsub(text, tones, py_tone) text = gsub(text, '([1234])([^ ]*)', '%2%1') text = gsub(text, '([%lüê]) ', '%15 ') text = gsub(text, '([%lüê])$', '%15') end if not_spaced then text = gsub(text, ' ', '') end return mw.ustring.toNFC(text) end

function export.py_tongyong(text) if type(text) == 'table' then text = text.args[1] end local ty_tone = { ["1"] = "", ["2"] = "\204\129", ["3"] = "\204\140", ["4"] = "\204\128", ["5"] = "\204\138"	}	local function num_to_mark(syllable, tone) tone = ty_tone[tone] if tone ~= "" then if syllable:find('[aeê]') then syllable = syllable:gsub("([aeê])", "%1" .. tone) elseif syllable:find('o') then syllable = syllable:gsub("(o)", "%1" .. tone) elseif syllable:find('[iu]') then syllable = syllable:gsub("([iu])", "%1" .. tone) end end return syllable end local words = {} for word in gsplit(text, " ") do		local cap = word:find("^[A-Z]") word = export.py_transform(word, true) local syllables = {} for syllable in gsplit(word, " ") do			syllable = syllable:gsub("([zcs]h?)i", "%1ih") syllable = syllable:gsub("ü", "yu") syllable = syllable:gsub("([jqx])u", "%1yu") syllable = syllable:gsub("iu", "iou") syllable = syllable:gsub("ui", "uei") syllable = syllable:gsub("([wf])eng", "%1ong") syllable = syllable:gsub("wen", "wun") syllable = syllable:gsub("iong", "yong") syllable = syllable:gsub("^zh", "jh") syllable = syllable:gsub("^q", "c") syllable = syllable:gsub("^x", "s") syllable = #syllables ~= 0 and syllable:gsub("^([aeo])", "'%1") or syllable syllable = syllable:gsub("^([^1-5]+)([1-5])$", num_to_mark) table.insert(syllables, syllable) end word = table.concat(syllables, "") word = cap and word:gsub("^.", string.upper) or word table.insert(words, word) end return mw.ustring.toNFC(table.concat(words, " ")) end

function export.pfs_check_invalid(text) local correct = mw.ustring.toNFD(text) .. "-"	local accent = "[́̀̂̍]" local switch = "%1%3%2%4" correct = gsub(correct, "(o)([ae])(" .. accent .. ")([ⁿ%-/ ])", switch) --correct = gsub(correct, "(o)(" .. accent .. ")([ae])([imnptkh][gh]?ⁿ?)", switch) --correct = gsub(correct, "(oa)(i)(" .. accent .. ")(h?ⁿ?)", switch) --correct = gsub(correct, "(a)([iu])(" .. accent .. ")(h?ⁿ?)", switch) --correct = gsub(correct, "(i)(" .. accent .. ")([aou])(u?[mnptkh]?g?ⁿ?)", switch) --correct = gsub(correct, "(ia)(u)(" .. accent .. ")(h?ⁿ?)", switch) --correct = gsub(correct, "(u)(i)(" .. accent .. ")([hⁿ]?)", switch) --correct = gsub(correct, "(e)(e)(" .. accent .. ")(h?ⁿ?)", switch) --correct = gsub(correct, "(o" .. accent .. ")[ou·]", "%1͘") correct = mw.ustring.toNFC(gsub(correct, "-$", "")) --if text ~= correct then --error("invalid poj \"" .. gsub(text, "-$", "") .. "\": correct poj is \"" .. correct .. "\"") --end return correct end

function export.gd_to_ipa(text) local initial_conv = { ["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", ["v"] = "ʋ", ["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l", ["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h", [""] = "", ["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s", ["j"] = "t͡ɕ", ["q"] = "t͡ɕʰ", ["x"] = "ɕ" }	local final_conv = { ["ii"] = "z̩", ["i"] = "i", ["u"] = "u", ["a"] = "a", ["ia"] = "ia", ["ua"] = "ua", ["ê"] = "e", ["iê"] = "ie", ["uê"] = "ue", ["o"] = "o", ["io"] = "io", ["uo"] = "uo", ["m"] = "m̩", ["n"] = "n̩", ["ai"] = "aɪ", ["iai"] = "iaɪ", ["uai"] = "uaɪ", ["oi"] = "oɪ", ["ui"] = "uɪ", ["iui"] = "iuɪ", ["au"] = "au", ["iau"] = "iau", ["êu"] = "eu", ["iu"] = "iu", ["em"] = "əm", ["im"] = "im", ["am"] = "am", ["iam"] = "iam", ["êm"] = "ɛm", ["en"] = "ən", ["in"] = "in", ["an"] = "an", ["ian"] = "ian", ["uan"] = "uan", ["ên"] = "ɛn", ["iên"] = "iɛn", ["uên"] = "uɛn", ["on"] = "ɔn", ["ion"] = "iɔn", ["uon"] = "uɔn", ["un"] = "un", ["iun"] = "iun", ["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ", ["ong"] = "ɔŋ", ["iong"] = "iɔŋ", ["uong"] = "uɔŋ", ["ung"] = "ʊŋ", ["iung"] = "iʊŋ", ["eb"] = "əp̚", ["ib"] = "ip̚", ["ab"] = "ap̚", ["iab"] = "iap̚", ["êb"] = "ɛp̚", ["ed"] = "ət̚", ["id"] = "it̚", ["ad"] = "at̚", ["iad"] = "iat̚", ["uad"] = "uat̚", ["êd"] = "ɛt̚", ["iêd"] = "iɛt̚", ["uêd"] = "uɛt̚", ["od"] = "ɔt̚", ["ud"] = "ut̚", ["iud"] = "iut̚", ["ag"] = "ak̚", ["iag"] = "iak̚", ["uag"] = "uak̚", ["og"] = "ɔk̚", ["iog"] = "iɔk̚", ["uog"] = "uɔk̚", ["ug"] = "ʊk̚", ["iug"] = "iʊk̚" }	local tone_conv = { ["1"] = "⁴⁴", ["2"] = "¹¹",		["3"] = "³¹",		["4"] = "⁵³",		["5"] = "¹", ["6"] = "⁵",		["1*"] = "⁴⁴⁻³⁵",		["4*"] = "⁵³⁻⁵⁵"	}	local palatal = { ['g'] = 'c', ['k'] = 'cʰ', ['ng'] = 'ɲ', ['h'] = 'ç' }	if type(text) == 'table' then text = text.args[1] end local syllables = mw.text.split(mw.ustring.gsub(text, 'gd=', ''), ' ') local initial, final, tone, ipa, result = {}, {}, {}, {}, {} for i, syllable in ipairs(syllables) do		initial[i] = mw.ustring.match(syllable, "^[bpmfvdtnlgkhzcsjqx]?g?") final[i] = mw.ustring.match(mw.ustring.sub(syllable, mw.ustring.len(initial[index]) + 1, -1), "^[^1-6]*") final[i] = mw.ustring.gsub(mw.ustring.gsub(final[i], "^yi", "i"), "^y", "") if mw.ustring.find(initial[i], "[zcs]") and final[i] == "i" then final[i] = "ii" end if final[i] == "" then final[i] = initial[i] initial[i] = "" end tone[i] = mw.ustring.match(syllable, "[1-6]$") end for i, syllable in ipairs(syllables) do		initial[i] = (mw.ustring.find(final[i], "^i") and palatal[initial[i]] or initial_conv[initial[i]]) or error(("Unrecognised initial: \"%s\""):format(initial[i])) final[i] = final_conv[final[i]] or error(("Unrecognised final: \"%s\""):format(final[i])) if mw.ustring.match(tone[i], "[14]") and mw.ustring.match(tone[i+1] or "", "[2345]") then tone[i] = tone[i] .. "*"		end tone[i] = tone_conv[tone[i]] ipa[i] = initial[i] .. final[i] .. tone[i] end return table.concat(ipa, " ") end

function export.pfs_to_hrs(text) if type(text) == 'table' then text = text.args[1] end local syllables = mw.text.split(mw.ustring.gsub(mw.ustring.gsub(mw.ustring.lower(text), 'pfs=', ''), ' ', '-'), "-") for i, syllable in ipairs(syllables) do		-- change consonants syllable = mw.ustring.gsub(syllable,'[ptky]',{['p']='b',['t']='d',['k']='g',['y']='i'}) syllable = mw.ustring.gsub(syllable,'[bdgc]h',{['bh']='p',['dh']='t',['gh']='k',['ch']='z'}) syllable = mw.ustring.gsub(syllable,'zh','c') local palatal = {['z']='j',['c']='q',['s']='x',['i']=''} syllable = mw.ustring.gsub(syllable,'([zcsi])([iíìî])', function(a,b) return palatal[a]..b end) -- find tones local tone = '' if mw.ustring.find(syllable, '[âêîôû̂]') then tone = '´' elseif mw.ustring.find(syllable, '[àèìòùǹ̀]') then tone = 'ˇ' elseif mw.ustring.find(syllable, '[áéíóúń́]') or			(mw.ustring.find(syllable, '[aeiouṳ][bdg]$') and not mw.ustring.find(syllable, '̍')) then tone = '`' end -- remove tone marks and fix vowels local final_conv = { ['á'] = 'a', ['é'] = 'e', ['í'] = 'i', ['ó'] = 'o', ['ú'] = 'u', ['́'] = '', ['à'] = 'a', ['è'] = 'e', ['ì'] = 'i', ['ò'] = 'o', ['ù'] = 'u', ['̀'] = '', ['â'] = 'a', ['ê'] = 'e', ['î'] = 'i', ['ô'] = 'o', ['û'] = 'u', ['̂'] = '', ['ń'] = 'n', ['ǹ'] = 'n', ['̍'] = '',			['ṳ'] = 'ii', }		syllable = mw.ustring.gsub(syllable, '[âêîôû̂àèìòù̀áéíóú́ńǹ̍ṳ]', final_conv) syllable = mw.ustring.gsub(syllable, 'o([ae])', 'u%1') -- add new tone marks syllables[i] = syllable .. tone end return table.concat(syllables, " ") end

function export.test local a = "abc" local b = "abc" local c = {} c[a] = 5 return (c[b] == c[a]) end

return export