Module:User:Suzukaze-c/02

--[==[

mostly unrelated things, in one single module bc i can.

show format simple word lists rpre convert to using data given extract_ja_readings extract readings from a japanese entry r convert to using data in the linked entry hzreport generate hanzi entry report newja new japanese entry generator newhz new hanzi entry generator newhzmul new hanzi translingual entry generator, from a copy-paste of the unihan page. how horrifying test_2 create a special:search link test_3 newhz wu test_9 ltc/och attention note to self: consider retracting test_10 determine ?action=edit&section=x test_11 HSK list words missing appropriate category → https://en.wiktionary.org/?oldid=46157868 test_12 jouyou kanji that need to be updated User:Suzukaze-c/cleanup/12 test_13 process/update test_14 generate list of s from a copy-paste of the Category: page make_map map_test_data kartographer test test_16 kun readings that may be a verb with a '-' in the wrong place https://en.wiktionary.org/w/index.php?title=%E6%89%93&diff=prev&oldid=50292857 test_17 look for missing japanese entries, based on jouyou kanji data https://en.wiktionary.org/?oldid=52318414 test_18 extract entry names from a copy-paste of the Category: page test_19 do things from a copy-paste of

=p.newja{['args']={['testing_pagename']='プログレッシブロック', 'プログレッシブ ロック', 'n'}}

]==]

local export = {} local replace = mw.ustring.gsub local find = mw.ustring.find local match = mw.ustring.match local itermatch = mw.ustring.gmatch local split = mw.text.split local itersplit = mw.text.gsplit local trim = mw.text.trim local lower = mw.ustring.lower local sub = mw.ustring.sub local len = mw.ustring.len

function export.show(frame) local text = trim(frame.args[2]) local lang = frame.args[1] local out = {}

local non = require('Module:string utilities').pattern_escape('＊！？．，. 、：；…《》「」【】（）!?.,:;"—·☆[] ')	local rubied = false

text = replace(text, '\n+', '＊') text = replace(text, '　', ' ') text = replace(text, '%s+', ' ')

if not find(text, '[ ＊]') then for char in itermatch(text, '(.)') do			table.insert(out, ''..char..' ') end else for punca, word, puncb in itermatch(text, '(['..non..']*)([^'..non..']+)(['..non..']*)') do			if (lang == 'ja') and match(word, '[￥・]') then rubied = true word = replace(word, '￥', ' ') local lemma, kana = match(word, '(.+)・(.+)') if not kana then lemma = word kana = replace(word, '([㐀-鿕々])', '!') end word = require('module:ja-link').link({lemma = lemma, kana = kana}) table.insert(out, punca..word..puncb) else table.insert(out, punca....word....puncb) end end end

out = table.concat(out)

if rubied then out = require('module:links').full_link({term = out .. '//', tr = '-', lang = require('module:languages').getByCode(lang), sc = require('module:scripts').getByCode('Jpan')}) else out = require('module:links').full_link({term = out .. '//', tr = '-', lang = require('module:languages').getByCode(lang)}) end

return '«' .. lang .. '» ' .. out end

function export.rpre(frame) -- --

local one = frame.args[1] or '' local two = frame.args[2] or '' local three = frame.args[3] or '' local four = frame.args[4] or ''

local jp = '' local tr = frame.args['tr'] or '' local gloss = frame.args['gloss'] or ''

if one == 'ja' then jp = two linktitle = three gloss = (gloss ~= '' and gloss or four) else jp = one linktitle = two gloss = three end

tr = replace(tr, '[^￥.^、ぁ-ー]+', '') tr = replace(tr, '￥', ' ')

if gloss ~= '' then gloss = ': ' .. gloss end

if tr ~= '' then tr = '|' .. tr	end

if tr == '' and find(jp, '[㐀-鿕𠀀-𬺡]') then tr = '|' .. jp	end

if linktitle ~= '' then jp = 'linkto=' .. jp .. '|' .. linktitle end

return '' .. gloss end

function export.extract_ja_readings(pagename) if match(pagename, "%[%[") then --error("Cannot process Japanese text with embedded wikilinks.") return {} end

local readings local content

local function process(text) text = replace(text, 'hhira=[^|}]+', '') text = replace(text, 'decl=[^|}]+', '') text = replace(text, 'infl=[^|}]+', '') text = replace(text, 'kyu=[^|}]+', '') text = replace(text, 'head=[^|}]+', '') text = replace(text, 'hira=', '') if text == 'proper' or text == 'proper noun' then table.insert(readings, '^' .. pagename) end if find(text, 'proper') and not find(text, '%^') then text = '^' .. replace(text, '([ |])', '%1^') end if find(content, 'infl=い') then text = replace(text, 'しい', 'し.い') end if find(content, 'ja%-verb') then text = replace(text, 'おう', 'お.う') end for parameter in itersplit(text, '|') do			if find(parameter, '[あ-ー]') then table.insert(readings, parameter) end end end

local function go for parameters in itermatch(content, '{{ja%-adj|([^}]+)}}') do			process(parameters) end for parameters in itermatch(content, '{{ja%-noun|([^}]+)}}') do			process(parameters) end for parameters in itermatch(content, '{{ja%-verb|([^}]+)}}') do			process(parameters) end for parameters in itermatch(content, '{{ja%-verb%-suru|([^}]+)}}') do			process(parameters) end for parameters in itermatch(content, '{{ja%-phrase|([^}]+)}}') do			process(parameters) end for parameters in itermatch(content, '{{ja%-pos|([^}]+)}}') do			process(parameters) end for parameters in itermatch(content, '{{ja%-altread|([^}]+)}}') do			process(parameters) end end

readings = {}

content = mw.title.new(pagename):getContent if content then go else return readings end

for pagename in itermatch(content, '{{ja%-see|([^|}]+)') do		local readings_old = readings

content = mw.title.new(pagename):getContent if content then go end

if #readings_old == #readings then -- 解れる→ほぐれる, scanning ほぐれる will find nothing table.insert(readings, pagename) end end

readings = require("Module:table").removeDuplicates(readings)

return readings end

function export.r(frame) local one = frame.args[1] or '' local two = frame.args[2] or '' local three = frame.args[3] or '' local four = frame.args[4] or ''

local jp = '' local tr = '' local gloss = frame.args['gloss'] or '' local choice = ''

if find(one, '[0-9]') then choice = one jp = two linktitle = three gloss = (gloss ~= '' and gloss or four) elseif one == 'ja' then choice = '' jp = two linktitle = three gloss = (gloss ~= '' and gloss or four) else choice = '' jp = one linktitle = two gloss = (gloss ~= '' and gloss or three) end

local readings = export.extract_ja_readings(jp)

if #readings > 1 then if choice ~= '' then tr = readings[tonumber(choice)] else return '\n' .. require("Module:debug").highlight_dump(readings) end else tr = (readings[1] and readings[1] or jp) end

-- if term is pure kana and kana is identical if replace(jp, '[あ-ー]', ) ==  and tr == jp then tr = '' end

if gloss ~= '' then gloss = ': ' .. gloss end

if tr ~= '' then tr = '|' .. tr	end

if linktitle ~= '' then jp = 'linkto=' .. jp .. '|' .. linktitle end

return '' .. gloss

--[[

変換済みの言葉を再変換 ・選択してスペースキーを押す ・または選択してWin+Cを押す

]] end

function export.hzreport(frame) local text = {} local candidates = mw.loadData('Module:User:Suzukaze-c/02/hz').hz

local rows = {} local y, n = '✔️', '❌️' -- taking advantage of colored emoji. with vs16

for hz in itersplit(candidates[frame.args[1]], ) do		local content = mw.title.new(hz):getContent or 

local zh = find(content, '==Chinese==') and y or n		local def = find(content, '\n\n#') and y or n		local der = find(content, '===Compounds===+\n{{zh%-der') and y or n		local uns = match(content, '|sim=(.)') or '' local unt = match(content, '|tra=(.)') or '' local ufs = match(content, '{{zh%-forms|s=(.)') or '' local uft = match(content, '{{zh%-see|([^}]+)}}') or '' local goh = find(content, '===Glyph origin===') and y or n		local histf = find(content, '{{[Hh]an[_ ]ety[ml]}}') and y or n		local ids = find(content, '|ids=[⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻]') and y or n		local yue = match(content, '|c=([a-z0-9,]+)') or '' local prc_tw = find(content, '|m=[㐀-鿕𠀀-𬺡]') and y or n

uft = replace(uft, "[{|']", '.') if len(uft) > 6 then uft = sub(uft, 1, 5) .. '◆' end if len(yue) > 6 then yue = sub(yue, 1, 5) .. '◆' end hz = '[' .. tostring(mw.uri.canonicalUrl(hz,'action=edit')) .. ' ' .. hz .. ']'

local cells = { hz, zh, def, der, uns, unt, ufs, uft, goh, histf, ids, yue, prc_tw } table.insert(rows, '| ' .. table.concat(cells, ' || ')) end

table.insert(text, '') table.insert(text, '{| class="wikitable sortable Hani"') table.insert(text, '! hz || zh? || def || der || unS || unT || ufS || ufT || goh || histF || ids || yue || prc/tw') table.insert(text, '|-') table.insert(text, table.concat(rows, '\n|-\n')) table.insert(text, '\n|}')

return table.concat(text, '\n') end

function export.newja(frame) local a = frame.args[1] and frame.args or frame:getParent.args

local m_languages = require('Module:languages') local m_scripts = require('Module:scripts')

local lang_ja = m_languages.getByCode('ja') local sc_Jpan = m_scripts.getByCode('Jpan') local sc_Hrkt = m_scripts.getByCode('Hrkt') local sc_Hani = m_scripts.getByCode('Hani')

local pagename = a['testing_pagename'] or mw.title.getCurrentTitle.text local header_level = 2 -- header level

local this_content = mw.title.new(mw.title.getCurrentTitle.text):getContent or ''

local pos_datas = { ['a'] = {'Adjective:adj'}, ['n'] = {'Noun:noun'}, ['p'] = {'Phrase:phrase'}, ['v'] = {'Verb:verb'}, ['suru'] = {'Verb:verb-suru'}, ['vf'] = {'Verb:verb form'},

['ad'] = {'Adverb:pos|adverb'}, ['adv'] = {'Adverb:pos|adverb'}, ['i'] = {'Interjection:pos|interjection'}, ['pn'] = {'Proper noun:pos|proper noun'}, ['af'] = {'Adjective:pos|adjective form'}, ['c'] = {'Conjunction:pos|conjunction'},

['s'] = {'Noun:noun', 'Verb:verb-suru'}, ['an'] = {'Adjective:adj', 'Noun:noun'}, }	local pos_aliases = { [''] = 'n', }	for pos_codes, array in pairs(pos_datas) do		for i, name_and_template in ipairs(array) do			name_and_template = split(name_and_template, ':') pos_datas[pos_codes][i] = { ['name'] = name_and_template[1], ['template'] = name_and_template[2], }		end end

local verb_types_corresp = { ['1'] = '2', -- ichidan is type=2 ['5'] = '1', -- godan is type=1 }

local etymology_magic_words = { ['der ([^.,{}\\]+)'] = function(a) return '{{der|ja|' .. replace(a, '^([a-z]+) ', '%1|') .. '}}'		end, ['bor ([^.,{}\\]+)'] = function(a) return '{{bor|ja|' .. replace(a, '^([a-z]+) ', '%1|') .. '}}'		end, ['b ([^.,{}\\]+)'] = function(a) return '{{bor|ja|' .. replace(a, '^([a-z]+) ', '%1|') .. '}}'		end, ['obor ([^.,{}\\]+)'] = function(a) return '{{obor|ja|' .. replace(a, '^([a-z]+) ', '%1|') .. '}}'		end, ['int'] = function(a) return '{{internationalism|ja}}; see {{cog|en|}}' end, ['rd ([a-z_]+) ([a-z_]+)'] = function(a, b) return '{{rendaku2|' .. replace(a, '_', '̄') .. '|' .. replace(b, '_', '̄') .. '}}'		end, ['rfe (.+)'] = function(a) return '{{rfe|ja' .. (a and '|' .. a or '') .. '}}'		end, ['ltc'] = function(a) return '{{der|ja|ltc|-}} {{ltc-l|' .. pagename .. '}}'		end, }	local usage_notes_magic_words = { ['bio'] = '{{U:ja:biology}}' }

local output = {}

-- parameters sorted by appearance local params = { [2] = {list = true, allow_holes = true}, -- pos and def

['en'] = {type = 'number'}, -- etymology number ['e'] = {}, -- etymology text ['eid'] = {}, -- t:etymid ['l'] = {}, -- alt in header ['ate'] = {allow_empty = true}, -- ateji ['y'] = {allow_empty = true}, -- yomi ['r'] = {allow_empty = true}, -- rendaku ['ll'] = {}, -- alt in t:ja-kanjitab ['w'] = {allow_empty = true}, -- wikipedia ['wen'] = {allow_empty = true}, -- en.wikipedia ['file'] = {}, -- file ['caption'] = {}, -- file caption

['nop'] = {allow_empty = true}, -- no t:ja-pron ['a'] = {}, -- accent ['dev'] = {}, -- devoicing

['head'] = {}, -- head [1] = {allow_empty = true, default = ''}, -- kana ['tr'] = {}, -- transivity ['ak'] = {}, -- alternate kana ['mw'] = {}, -- measure word/counter ['kyu'] = {}, -- kyuujitai ['hh'] = {}, -- historical hiragana

['un'] = {}, -- usage notes ['sy'] = {}, -- synonyms ['an'] = {}, -- antonyms ['hsup'] = {}, -- hypernyms ['hsub'] = {}, -- hyponyms. super- and sub- bc i'm big dumb ['co'] = {}, -- coordinate terms ['de'] = {}, -- derived terms ['re'] = {}, -- related terms ['desc'] = {}, -- descendants ['al'] = {}, -- see also

['reflist'] = {allow_empty = true}, -- references header

['c'] = {}, -- t:C ['cln'] = {}, -- t:cln

['testing_pagename'] = {},

-- `allow_empty = true` -- instead of `type = 'boolean'` -- `|r=` is sufficient for saying 'rendaku yes' -- instead of `|r=y`

-- "typo" "correction" also takes up time ['ateji'] = {alias_of = 'ate'}, ['yomi'] = {alias_of = 'y'}, ['h'] = {alias_of = 'hh'}, ['s'] = {alias_of = 'sy'}, ['syn'] = {alias_of = 'sy'}, ['ant'] = {alias_of = 'an'}, ['d'] = {alias_of = 'de'}, ['also'] = {alias_of = 'al'}, ['cat'] = {alias_of = 'c'}, }	a = require('Module:parameters').process(a, params)

local function waapuro_to_kana(text) if text == '' then return text end -- with just one parameter, mod:typing-aids will fallback to language 'all' and treat parameter 1 as the text to convert. for us that will return 'ja'

text = replace(text, '-', '@@@') -- preserve hyphen text = replace(text, '_', '-') -- instead of hyphen, use underscore for chouonpu --text = require('Module:typing-aids').replace({'ja', text}) text = replace(text, '@@@', '-') -- restore hyphen text = replace(text, '　', ' ') -- replace fullwidth space return text end local function wr(text) table.insert(output, text) end local function wrh(text) wr('\n' .. mw.ustring.rep('=', header_level) .. text .. mw.ustring.rep('=', header_level)) end local function hl(n) header_level = header_level + n	end local function needs_reading(pagename, kana) return not (kana or find(pagename, '^[ぁ-ー ^%%.%[%]%-]+$')) end local function generate_tab_params(kana, a)		-- 銀行 -- gin,kou -- gin|kou|yomi=o -- 文字 -- mon;mo,ji -- mon|k1=mo|ji|yomi=o -- 送り仮名 -- [oku]ri[ka;ga][na] -- oku|ri|ka|k3=ga|na|yomi=k -- 送仮名 -- [oku:ri][ka;ga][na] -- oku|o1=ri|ka|k3=ga|na|yomi=k -- 満漢全席 -- man-kan zen,seki -- man|kan|zen|seki|yomi=o -- 犬 -- inu -- inu

-- because pressing shift is effort

local yomi

local params = {}

if find(kana, '%[') then yomi = 'k'			for yomigana in itermatch(kana, '%[(.-)%]') do				table.insert(params, yomigana) end elseif sc_Hani:countCharacters(pagename) > 0 then if find(kana, ',') then yomi = 'o'			end kana = replace(kana, '%^', '') kana = replace(kana, '[ .-]', ',') if kana ~= '' then params = split(kana, ',') end end

for i, yomigana in ipairs(params) do yomigana = replace(yomigana, ';', '|k' .. i .. '=') yomigana = replace(yomigana, ':', '|o' .. i .. '=') params[i] = yomigana end

for i, _ in ipairs(params) do			params[i] = require('Module:ja').kata_to_hira(params[i]) end

if a['r'] then table.insert(params, 'r=y') end

if a['ate'] then table.insert(params, 'ateji=' .. (a['ate'] == '' and 'y' or a['ate'])) end

yomi = a['y'] or yomi if yomi then table.insert(params, 'yomi=' .. yomi) end

if a['ll'] then table.insert(params, 'alt=' .. replace(a['ll'], '、', ',')) end

return params end local function generate_links_list(text) -- 3密 -- * {{ja-l|3密}} -- 3密 sanmitu -- * {{ja-r|3密|さんみつ}} -- 3密 sanmitu,sense=Sense\q=Qualifier\三つの密 mittu no mitu\gloss=Gloss -- * {{ja-r|3密|さんみつ}} -- * {{sense|Sense}} {{q|Qualifier}} {{ja-r|三つの密|みっつ の みつ|gloss=Gloss}} -- いい_ね -- * {{ja-r|いい ね}}

local params = {}

for i, item in ipairs(split(text, '[,、]')) do			item = split(item, '[\\￥]')

local q			local sense local output_link = {}

local main_found = false local pagename, kana local r_or_l while #item > 0 do				if find(item[1], '^q=') then q = replace(item[1], '^q=', '') elseif find(item[1], '^sense=') then sense = replace(item[1], '^sense=', '') elseif not main_found then pagename, kana = match(item[1], '^([^ ]-)[ 　](.+)$') pagename = pagename or item[1] -- if match returns nil pagename = (pagename and replace(pagename, '_', ' ') or pagename) kana = (kana and replace(kana, '　', ' ') or kana) kana = (kana and waapuro_to_kana(kana) or kana) if not kana then kana = export.extract_ja_readings(pagename) if #kana == 1 then kana = kana[1] else kana = nil end end

r_or_l = needs_reading(pagename, kana) and 'l' or 'r'

table.insert(output_link, 'ja-' .. r_or_l) table.insert(output_link, pagename) table.insert(output_link, kana)

main_found = true else table.insert(output_link, item[1]) end table.remove(item, 1) end

table.insert(				params,				'* ' .. (sense and '{{sense|' .. sense .. '}} ' or '') .. (q and '{{q|' .. q .. '}} ' or '') .. '{{' .. table.concat(output_link, '|') .. '}}'			) end return params end

local kana = a[1] local kana_no_hyphens

-- convert kana from romaji to kana kana = waapuro_to_kana(kana) kana = mw.ustring.toNFC(kana) -- wtf?

-- convert fullwidth CJK symbols to halfwidth kana = replace(kana, '(.)', {		['「'] = '[',		['；'] = ';',		['：'] = ':',		['」'] = ']',		['、'] = ',',		['. '] = '.',		['　'] = ' ',	})

-- generate t:ja-kanjitab local tab if a['ll'] or sc_Hani:countCharacters(pagename) > 0 then local tab_params = generate_tab_params(kana, a)		tab_params = table.concat(tab_params, '|') tab = '{{ja-kanjitab' .. (tab_params and '|' .. tab_params) .. '}}'	end

-- remove markup for generating t:ja-kanjitab from kana kana = replace(kana, '[0-9]', '') -- kun kana = replace(kana, '(%[)([^%[%]]-):([^%[%]]-)(%])', '%1%2%4%3') kana = replace(kana, '(%[)([^%[%]]-);([^%[%]]-)(%])', '%3') kana = replace(kana, '(%[)([^%[%]]-)(%])', '%2') -- on	kana = replace(kana, '([ .,-])([^ .,-]-;)', '%1') kana = replace(kana, '^([^ .,-]-;)', '') kana = replace(kana, ',', '')

-- for t:ja-pron kana_no_hyphens = replace(kana, '[-^]', '')

-- blank if it's the same as the pagename. avoid unnecessary template input if kana == pagename then kana = '' end if kana_no_hyphens == pagename then kana_no_hyphens = '' end

-- automatic |head= using |1= (kana) if replace(kana, '[ %^%-%.]+', '') == pagename and not a['head'] then a['head'] =  .. replace(kana, '([ %^%-%.]+)', '%1') ..  a['head'] = replace(a['head'], '%[%[([はがをにのもで])%]%]', '%1') end

-- process etymology -- process usage notes if a['e'] then -- expand shortcuts for magic_word, f in pairs(etymology_magic_words) do a['e'] = replace(a['e'], '\\' .. magic_word, f)			a['e'] = replace(a['e'], ' }}{{', '}} {{') end -- automatically fill empty {{bor}}/{{der}}/{{cog}} if a[2][2] and match(a[2][2], '%[%[(.-)%]%]') then a['e'] = replace(a['e'], '({{[a-z]+[+]?|ja|[a-z]+)(}})', function(template_head, template_tail) return template_head .. '|' .. match(a[2][2], '%[%[(.-)%]%]') .. template_tail end) a['e'] = replace(a['e'], '(see {{cog|en|)(}})', function(template_head, template_tail) return template_head .. match(a[2][2], '%[%[(.-)%]%]') .. template_tail end) end -- add final period if not find(a['e'], '%.$') and not find(a['e'], 'UNIQ.-QINU') and not find(a['e'], '{{rendaku2[^}]+}}$') and not find(a['e'], '{{pedia[^}]+}}$') and not find(a['e'], '%-%->$') and not find(a['e'], '{{rfe[^\n]+$') then a['e'] = a['e'] .. '.'		end -- add leading 'From' if find(a['e'], '^{{compound|') then a['e'] = 'From ' .. a['e'] end -- change leading t:bor, t:der to t:bor+, t:der+ if find(a['e'], '^{{bor|') or find(a['e'], '^{{der|') then a['e'] = replace(a['e'], '^{{bor|', '{{bor+|') a['e'] = replace(a['e'], '^{{der|', '{{der+|') end end if a['un'] then -- expand shortcuts a['un'] = replace(a['un'], '\\([a-z]+)', usage_notes_magic_words) end

-- automatically make an {{etymid}} if a['en'] and not a['eid'] then if match(pagename, '^[㐀-鿕]+$') then a['eid'] = kana_no_hyphens elseif a['ll'] then a['eid'] = match(a['ll'], '^([^、,]+)') end end

-- write Japanese header -- write etymology section -- write etymology -- write t:wikipedia -- write t:ja-kanjitab -- write alternative forms

if match(this_content, 'ja%-readings') and not a['en'] then a['en'] = 0 end if a['en'] then -- en = 0 -- for adding new sections under a single character entry like 字, where you want an Etymology section for sanity, but just one, and no Japanese header

if a['en'] == 1 and not match(this_content, 'ja%-readings') then wrh('Japanese') end hl(1) wrh('Etymology' .. (a['en'] == 0 and '' or ' ' .. a['en'])) wr(a['eid'] and ('{{etymid|ja|' .. a['eid'] .. '}}') or nil) hl(1) wr(tab and tab or nil) wr(a['w'] and ('{{wikipedia|lang=ja' .. (a['w'] ==  and  or '|' .. a['w']) .. '}}') or nil) wr(a['wen'] and ('{{wikipedia|lang=en|' .. a['wen'] .. '}}') or nil) wr(a['file'] == 'rfi' and '{{rfi|ja}}' or nil) if a['file'] == 'rfi' then a['file'] = nil end wr(a['file'] and ('') or nil) wr(a['e']) if a['l'] then wrh('Alternative forms') wr(table.concat(generate_links_list(a['l']), '\n')) end else wrh('Japanese') hl(1) wr(tab and tab or nil) wr(a['w'] and ('{{wikipedia|lang=ja' .. (a['w'] ==  and  or '|' .. a['w']) .. '}}') or nil) wr(a['wen'] and ('{{wikipedia|lang=en|' .. a['wen'] .. '}}') or nil) wr(a['file'] == 'rfi' and '{{rfi|ja}}' or nil) if a['file'] == 'rfi' then a['file'] = nil end wr(a['file'] and ('') or nil) if a['l'] then wrh('Alternative forms') wr(table.concat(generate_links_list(a['l']), '\n')) end if a['e'] then wrh('Etymology') wr(a['e']) end end

if sc_Hani:countCharacters(pagename) > 0 and not a[2][1] and not a[2][2] then -- if title has kanji and not given pos or def -- write t:ja-see by combining title and reading if a['e'] then wrh('Definitions') end pagename_to_kana = pagename for char in itermatch(replace(replace(a[1], '[：:]', ), '[ぁ-ン]+[；;]', ), '[ぁ-ン]+') do			pagename_to_kana = replace(pagename_to_kana, '[㐀-龥]', char, 1) end wr('{{ja-see|' .. pagename_to_kana .. '}}') elseif a[2][1] and a[2][1] ~= '' and not find(a[2][1], '[a-z]') then -- if pos is non-latin -- write t:ja-see using that text if a['e'] then wrh('Definitions') end wr('{{ja-see|' .. table.concat(a[2], '|') .. '}}') else -- write t:ja-pron if not a['nop'] then -- 1DJR,2,3- -- 0DJR NHK

local pron_params = {}

if kana_no_hyphens ~= '' or needs_reading(pagename, kana) then table.insert(pron_params, kana_no_hyphens) end if a['a'] then for i, acc_item in ipairs(split(a['a'], '[,、]')) do					local acc, acc_ref = match(acc_item, '^(%d+)(.*)$') acc_ref = replace(acc_ref, ' ', ',') acc_ref = string.upper(acc_ref) if acc_ref == '' then acc_ref = 'DJR' elseif acc_ref == '-' then acc_ref = nil end

if acc_ref and not a['reflist'] then a['reflist'] = true end

table.insert(pron_params, 'acc' .. (i > 1 and i or '') .. '=' .. acc) table.insert(pron_params, acc_ref and 'acc' .. (i > 1 and i or '') .. '_ref=' .. acc_ref or nil) table.insert(pron_params, a['dev'] and 'dev=' .. a['dev'] or nil) end end

wrh('Pronunciation') wr('{{ja-pron' .. (#pron_params > 0 and '|' .. table.concat(pron_params, '|') or '') .. '}}') end

-- if no pos or def parameters -- then generate a default Noun and t:rfdef if a[2]['maxindex'] == 0 then a[2] = {''} a[2]['maxindex'] = 1 end

i = 1 while i <= a[2]['maxindex'] do			-- 犬 -- 犬 n			-- 赤い a			-- 赤い a,i -- 明らか a,na -- 画然 a,tari -- 異常 an			-- 食べる v,2

local pos_code local defs local type, infl

pos_code = a[2][i] or '' defs = { a[2][i + 1] or '{{rfdef|ja}}', a[2][i + 2] or '{{rfdef|ja}}', }

-- shortcut for {{lb}} -- [slang,dated] defs[1] = replace(defs[1], '^%[([^%[%]]+)%]', function(labels) return '{{lb|ja|' .. replace(labels, ',', '|') .. '}}' end) defs[1] = replace(defs[1], '# %[([^%[%]]+)%]', function(labels) return '# {{lb|ja|' .. replace(labels, ',', '|') .. '}}' end) defs[2] = replace(defs[2], '^%[([^%[%]]+)%]', function(labels) return '{{lb|ja|' .. replace(labels, ',', '|') .. '}}' end) defs[2] = replace(defs[2], '# %[([^%[%]]+)%]', function(labels) return '# {{lb|ja|' .. replace(labels, ',', '|') .. '}}' end)

local match_a, match_b = match(pos_code, '^(.+),(.+)$') if match_a then pos_code, type = match_a, match_b end

pos_code = pos_aliases[pos_code] or pos_code

if pos_code == 'v' and verb_types_corresp[type] then type = verb_types_corresp[type] end

-- default type if not type then if pos_code == 'an' then type = 'na' elseif pos_code == 'v' then type = '1' elseif pos_code == 'a' then type = 'i'				end end

-- adjectives use infl if pos_code == 'an' or pos_code == 'a' then infl = type type = nil end

-- suffixes are uhhh. uh. fuck it it's all 活用 anyway if pos_code == 'suffix' then if type == 'i' then infl = type type = nil end end

-- get data local pos_data = pos_datas[pos_code] -- create fallback data pos_data = pos_data or { {					['name'] = replace(pos_code, '^.', mw.ustring.upper), ['template'] = 'pos|' .. pos_code, },			}

-- write header, etc for ii, name_and_template in ipairs(pos_data) do				local is_a_or_v = false

name = name_and_template['name'] template = name_and_template['template'] is_a_or_v = match(template, '^adj') or match(template, '^verb') or match(template, '^pos|suffix') -- prevent inappropriate addition of parameters (`noun|infl=na`)

if (sc_Hani:countCharacters(pagename) > 0) then template = template .. (a['head'] and '|head=' .. a['head'] or '') template = template .. (kana ~=  and '|' .. kana or ) else template = template .. (a['head'] and '|' .. a['head'] or (kana ~= '' and '|' .. kana or '')) end template = template .. (a['ak'] and '|' .. replace(waapuro_to_kana(a['ak']), ',', '|') or '') if is_a_or_v then template = template .. (a['tr'] and '|tr=' .. a['tr'] or '') template = template .. (type and '|type=' .. type or '') template = template .. (infl and '|infl=' .. infl or '') end template = template .. (a['mw'] and '|count=' .. a['mw'] or '') template = template .. (a['kyu'] and '|kyu=' .. a['kyu'] or '') template = template .. (a['hh'] and '|hhira=' .. waapuro_to_kana(a['hh']) or '')

wrh(name) wr('{{ja-' .. template .. '}}') wr('') wr('# ' .. defs[ii])

if is_a_or_v then local kana_stem = sub(kana ~= '' and kana or pagename, 0, -2) kana_stem = (kana == '' and kana or kana_stem) -- the templates will be smart if you do not give it a reading local kana_last = sub(kana ~= '' and kana or pagename, -1) kana_last = lang_ja:transliterate(kana_last, sc_Hrkt) if type or pos_code == 's' or pos_code == 'suru' then hl(1) wrh('Conjugation') if type == '2' then wr('{{ja-ichi' .. (kana_stem ~= '' and '|' .. kana_stem or '') .. '}}') elseif pos_code == 's'or pos_code == 'suru' then wr('{{ja-suru' .. (kana ~= '' and '|' .. kana or '') .. '}}') elseif type == '1' then wr('{{ja-go-' .. kana_last .. (kana_stem ~= '' and '|' .. kana_stem or '') .. '}}') else wr('{{rfinfl|ja}}') end hl(-1) end if infl then hl(1) wrh('Inflection') if infl == 'na' then wr('{{ja-' .. infl .. (kana ~= '' and '|' .. kana or '') .. '}}') elseif infl == 'shiku' or infl == 'ku' then -- ja-conj-bungo will not be smart wr('{{ja-conj-bungo|' .. sub(kana ~= '' and kana or pagename, 0, -2) .. '|adj-' .. infl .. '}}') elseif infl == 'i' then wr('{{ja-' .. infl .. (kana_stem ~= '' and '|' .. kana_stem or '') .. '}}') else wr('{{rfinfl|ja}}') end hl(-1) end end

if i == 1 and ii == 1 then hl(1) if a['un'] then wrh('Usage notes') wr('* ' .. a['un']) end if a['sy'] then wrh('Synonyms') wr(table.concat(generate_links_list(a['sy']), '\n')) end if a['an'] then wrh('Antonyms') wr(table.concat(generate_links_list(a['an']), '\n')) end if a['hsup'] then wrh('Hypernyms') wr(table.concat(generate_links_list(a['hsup']), '\n')) end if a['hsub'] then wrh('Hyponyms') wr(table.concat(generate_links_list(a['hsub']), '\n')) end if a['co'] then wrh('Coordinate terms') wr('*' .. replace(table.concat(generate_links_list(a['co']), ','), '%*', '')) end if a['de'] then wrh('Derived terms') wr(table.concat(generate_links_list(a['de']), '\n')) end if a['re'] then wrh('Related terms') wr(table.concat(generate_links_list(a['re']), '\n')) end if a['desc'] then wrh('Descendants') wr('* ' .. a['desc']) end if a['al'] then wrh('See also') wr(table.concat(generate_links_list(a['al']), '\n')) end hl(-1) end end

-- advance i by the number of PoS headers produced i = i + 1 + (#pos_data) end end

if (a['reflist'] or (a['e'] and find(a['e'], 'UNIQ.-QINU'))) and (a['reflist'] ~= 'n') and (not a['en']) then if a['en'] then hl(-1) end wrh('References') wr(' ') end

if a['c'] or a['cln'] then wr('') if a['c'] then a['c'] = replace(a['c'], '^.', mw.ustring.upper) a['c'] = replace(a['c'], '(.)', function(a, b) return a .. mw.ustring.upper(b) end) wr('{{C|ja|' .. replace(a['c'], '[,]', '|') .. '}}') end if a['cln'] then wr('{{cln|ja|' .. replace(a['cln'], '[,]', '|') .. '}}') end end

output = table.concat(output, '\n')

-- html comments -- real html comments are discarded before the module ever knows output = replace(output, '<%-%-', ' 0 and hl + 1 or hl - 1) end -- header_{in|de}crement

local content = mw.title.new(character):getContent or ''

local simp_form = a['s'] or match(content, '|sim=(.)') or match(content, '|s=(.)') or false --local trad_form = a['t'] or match(content, '|tra=(.)') or match(content, '|t=(.)') or false local alt_form = a['alt'] or a['v'] or match(content, '|alt=([^|}]+)') or false

local zhwp_data = require('Module:User:Suzukaze-c/02/zhwp')

--

local add_han_etym = false

local candidates = mw.loadData('Module:User:Suzukaze-c/02/hz').hz

if en <= 1 then if find(candidates['hanetym'], character) or require("module:zh-glyph/phonetic/list")[character] then add_han_etym = true end end

local zh_see

if not a['ignore'] then if trad_form or m_zh.ts_determ(character) == 'simp' then zh_see = (trad_form or m_zh.st(character)) end end

if a[1] and match(a[1], '^[㐀-鿕𠀀-𬺡]$') then zh_see = a[1] .. (a[2] and '|' .. a[2] or '') end

local zh_forms = ''

if simp_form or m_zh.ts_determ(character) == 'trad' then zh_forms = zh_forms .. '|s=' .. (simp_form or m_zh.ts(character)) end if alt_form then zh_forms = zh_forms .. '|alt=' .. alt_form end

local zh_wp

if a['wp'] then if a['wp'] == 'y' then zh_wp = '' elseif a['wp'] == '' then zh_wp = false else zh_wp = '|' .. replace(a['wp'], ',', '|') end end

if zhwp_data.single_char_title[character] or zhwp_data.contains_astral[character] or zhwp_data.single_char_title[m_zh.ts(character)] then zh_wp = '' end

--

local output = {}

local function write(text) table.insert(output, text) end

--

if en <= 1 then write('==Chinese==') end

if en == 0 then if not zh_see then write('{{zh-forms' .. zh_forms .. '}}') end

if zh_wp then write('{{zh-wp' .. zh_wp .. '}}') end end

if a['ge'] or add_han_etym then write(head('Glyph origin')) if add_han_etym then write('{{Han etym}}') end if a['ge'] then write(a['ge']) end

if zh_see and en == 0 then write(head('Definitions')) end end

if en > 0 then write(head('Etymology ' .. tostring(en))) if not zh_see then write('{{zh-forms' .. zh_forms .. '}}') end

if zh_wp then write('{{zh-wp' .. zh_wp .. '}}') end end

if en > 0 then hl = hcr(1) end

if zh_see then write('{{zh-see|' .. zh_see .. '}}') else write(head('Pronunciation'))

local m, c, h, mn, w = a['m'] or false, a['c'] or false, a['h'] or false, a['mn'] or false, a['w'] or false local mc, oc = a['mc'] or false, a['oc'] or false

if m and find(m, '[ㄅ-ㄩ]') then m = replace(m, '[^．ˊˇˋㄅ-ㄩ]', '@') m = replace(m, '@+', '@') m = replace(m, '^@+', '') m = replace(m, '@+$', '') m = replace(m, '@', ',') m = require("module:cmn-pron").zhuyin_py(m) end if m and find(m, '[0-9]') then m = replace(m, '1', '̄') m = replace(m, '2', '́') m = replace(m, '3', '̌') m = replace(m, '4', '̀') m = replace(m, 'v', 'ü') end if not m then if require("module:zh/data/cmn-tag").MT[character] then -- if there is cmn-tag data m = character else m = mw.ustring.gsub(m_zh_new.pytemp(character,,,''), '，', ', ') or false -- based on line from zh-new if m == character or m == simp_form or m == m_zh.ts(character) then -- pinyin conversion failed m = false end end end if not c then c = m_zh.check_pron(character, 'yue', 1) or false if c and find(c, ',') then a['rhk'] = 'yes' end end if h and find(h, '[0-9]') then h = replace(h, 'w', 'ṳ') h = replace(h, '24', '̂') h = replace(h, '11', '̀') h = replace(h, '31', '́') h = replace(h, '55', '') h = replace(h, '2', '') h = replace(h, '5', '̍') end if not h then h = m_zh.check_pron(character, 'hak', 1) or false end if not mn then mn = m_zh.check_pron(character, 'nan-hbl', 1) or false end if w and find(w, '[PSQR]') then w = export.test_3(w) end if (not mc) and (mw.title.new('Module:zh/data/ltc-pron/' .. character).exists) then mc = 'y'		end if (not oc) and (mw.title.new('Module:zh/data/och-pron-BS/' .. character).exists or mw.title.new('Module:zh/data/och-pron-ZS/' .. character).exists) then oc = 'y'		end

if not m then write('') end if not m then write('{{rfp|cmn|Mandarin}}') end

write(head('Definitions')) write('{{head|zh|Han character}}') write('') if a['rcns'] then write('# {{lb|zh|Taiwan}} {{n-g|Only used in personal names.}}') else write('# ' .. (a[1] or '{{rfdef|zh}}')) end

if not a['noder'] then -- Lua error: not enough memory local der = '' local der_add = ''

if a['der'] then der_add = a['der'] der_add = replace(der_add, '[^㐀-鿕𠀀-𬺡]+', '|') -- any non-hanzi text becomes separator der_add = replace(der_add, '|+', '|') der_add = replace(der_add, '^|', '') der_add = replace(der_add, '|$', '') der_add = '|' .. der_add end if match(character, '[𠀀-𬺡]') then for title, _ in pairs(zhwp_data.contains_astral) do					if len(title) > 1 and match(title, character) then der_add = der_add .. '|' .. title end end end der = frame:preprocess('{{subst:zh-der/new' .. (der_add or '') .. ((m and not match(m, ',')) and '|p=' .. mw.ustring.toNFC(m) or '') .. '}}')

if match(der, 'memory') then write(head('Compounds')) write('{{su#bst:zh-der/new' .. (der_add or '') .. (m and '|p=' .. replace(m, ',.+', ) or ) .. '}}') -- let you add zh-der in a separate edit in case Lua returns "out of memory" elseif match(der, '[㐀-鿕𠀀-𬺡]') then write(head('Compounds')) write(der) end end

if a['also'] then write(head('See also')) write('* {{zh-l|' .. a['also'] .. '}}') end end

if en > 0 then hl = hcr(-1) end

if a['rtw'] or a['rhk'] or a['ryt'] or a['riso'] or a['rcns'] or a['rnan'] then write(head('References')) if a['rtw'] then write('* {{R:twedu|' .. a['rtw'] .. '}}') end if a['rhk'] then write('* {{R:yue:mfccd}}') end if a['ryt'] then write('* {{R:yue:jyut.net}}') end if a['riso'] then write('* {{R:yue:Jyutping Database}}') end if a['rcns'] then write('* {{R:zh:CNS|' .. replace(a['rcns'], '%-', '|') .. '}}') end if a['rnan'] then if find(a['rnan'], '%d') then write('* {{R:nan:thcwd|' .. a['rnan'] .. '}}') else write('* {{R:nan:thcwdq}}') end end end

return trim(table.concat(output, '\n')) end

function export.newhzmul(frame) local text = frame.args[3] local char = mw.title.getCurrentTitle.text local x = mw.title.new(char):getContent or '' local model = trim([==[ {{character info}}

Han character

 * 1) $Definition