Module:Jpan-headword

local m_ja = require("Module:ja") local m_ja_ruby = require("Module:ja-ruby") local m_str_utils = require("Module:string utilities")

local byteoffset = mw.ustring.byteoffset local concat = table.concat local insert = table.insert local kana_to_romaji = require("Module:Hrkt-translit").tr local maxn = table.maxn local moraify = m_ja.moraify local remove = table.remove local ufind = m_str_utils.find local ugmatch = m_str_utils.gmatch local ugsub = m_str_utils.gsub local ulen = m_str_utils.len local umatch = mw.ustring.match local usub = m_str_utils.sub

local export = {} local pos_functions = {}

local range = mw.loadData('Module:ja/data/range') local Jpan = require("Module:scripts").getByCode("Jpan")

local function remove_links(text) return (text:gsub("%[%^|%-|", "")		:gsub("%[%[", "")		:gsub("%]%]", "")) end

local function assign_kana_to_kanji(head, kana, pagename, template_name) local m_tu = require'Module:template utilities'

local kanji_pos = {[0] = { nil, 0}} local head_nolink = {} local link_border = 0 local function insert_kanji_pos(substr) insert(head_nolink, substr) for p1, w1 in ugmatch(substr, '([々' .. range.kanji .. '])') do			p1 = byteoffset(substr, p1) + link_border insert(kanji_pos, { p1, p1 + w1:len - 1 }) end end for p1, p2, w1 in m_tu.gfind_bracket(head, {['%[%['] = ']]'}) do		insert_kanji_pos(head:sub(link_border + 1, p1 - 1)) local p_pipe = w1:find'|' or 2 link_border = p1 + p_pipe - 1 insert_kanji_pos(w1:sub(p_pipe + 1, -3)) link_border = p2	end insert_kanji_pos(head:sub(link_border + 1)) head_nolink = concat(head_nolink)

local pagetext = mw.title.new(pagename):getContent if not pagetext then return head, kana end

local non_kanji = {} local last_kanji = 1 for p1 in ugmatch(head_nolink, '[々' .. range.kanji .. ']') do		insert(non_kanji, usub(head_nolink, last_kanji, p1 - 2)) last_kanji = p1	end insert(non_kanji, usub(head_nolink, last_kanji))

for kanjitab in pagetext:gmatch('{{%s*' .. template_name) do		kanjitab = select(3, m_tu.find_bracket(pagetext, m_tu.brackets_temp, kanjitab)) if not kanjitab then error('ill-formed t:' .. template_name:gsub('%%', '') .. ' syntax') end kanjitab = m_tu.parse_temp(kanjitab) local readings = {} local readings_len = {} for i = 1, maxn(kanjitab.args) do			local r_i = kanjitab.args[i] or '' local r_o = kanjitab.args['o' .. i] or '' if kanjitab.args['k' .. i] then readings[i] = kanjitab.args['k' .. i] .. r_o readings_len[i] = tonumber(r_i:match'^%s*%D*(%d*)%s*$') or 1 else local r_kana, r_len = r_i:match'^%s*(%D*)(%d*)%s*$' readings[i] = r_kana .. r_o readings_len[i] = tonumber(r_len) or 1 end end

local kana_decom = {} local reading_id = 1 local reading_len = 1 for i = 1, #non_kanji - 1 do			if reading_len <= 1 then reading_len = readings_len[reading_id] or 1

insert(kana_decom, non_kanji[i]) insert(kana_decom, readings[reading_id])

reading_id = reading_id + 1 else reading_len = reading_len - 1 end end insert(kana_decom, non_kanji[#non_kanji]) local function strip_nonkana(str, repl) return ugsub(str, '[^' .. range.kana .. ']+', repl) or nil end local xeno_reading = {strip_nonkana(kana, ''):match('^' .. strip_nonkana(concat(kana_decom), '(.-)') .. '$')} if #xeno_reading > 0 then local head_decom = {} reading_id = 1 reading_len = 1 for i = 1, #non_kanji - 1 do				if reading_len <= 1 then reading_len = readings_len[reading_id] or 1

insert(head_decom, head:sub(kanji_pos[i - 1][2] + 1, kanji_pos[i][1] - 1)) insert(head_decom, head:sub(kanji_pos[i][1], kanji_pos[i + reading_len - 1][2]))

reading_id = reading_id + 1 else reading_len = reading_len - 1 end end insert(head_decom, head:sub(kanji_pos[#non_kanji - 1][2] + 1)) if #head_decom ~= #kana_decom then error('number of parameters in t:' .. template_name:gsub('%%', '') .. ' is incorrect') end local n_xeno_reading = 0 for i = 1, #kana_decom, 2 do kana_decom[i] = ugsub(kana_decom[i], '[^' .. range.kana .. ']+', function					n_xeno_reading = n_xeno_reading + 1					if xeno_reading[n_xeno_reading] == '' then return nil					else return xeno_reading[n_xeno_reading] end				end) end return concat(head_decom, '%'), concat(kana_decom, '%') end end

return head, kana end

local en_numerals = { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen" }

local en_grades = { "first grade", "second grade", "third grade", "fourth grade", "fifth grade", "sixth grade", "secondary school", "jinmeiyō", "hyōgai" }

local aliases = { ['transitive']='tr', ['trans']='tr', ['intransitive']='in', ['intrans']='in', ['intr']='in', ['godan']='1', ['ichidan']='2', ['irregular']='irr' }

local function kana_to_romaji_with_pos_format(kana, data, args) if data.headword.pos_category == "combining forms" or data.headword.pos_category == "punctuation marks" or data.headword.pos_category == "iteration marks" or data.headword.pos_category == "symbols" then return "-" end local rom = remove_links(kana_to_romaji(kana, data.lang_code)) -- make adjustments for -u verbs and -i adjectives if args['infl'] == '1' or args['infl'] == '1s' or args['infl'] == 'godan' then rom = rom:gsub('ō$', 'ou'):gsub('ū$', 'uu') elseif args['infl'] == 'i' or args['infl'] == 'is' or args['infl'] == 'い' then rom = rom:gsub('ī$', 'ii') end -- hyphens for prefixes, suffixes, and counters (classifiers) if data.headword.pos_category == "prefixes" then rom = rom:gsub('%-?$', '-') elseif data.headword.pos_category == "suffixes" or data.headword.pos_category == "suffix forms" or data.headword.pos_category == "counters" or data.headword.pos_category == "classifiers" then rom = rom:gsub('^%-?', '-') elseif data.headword.pos_category == "proper nouns" then -- automatic caps for proper nouns, if not already specified if not kana:match'%^' then rom = ugsub(rom, '%f[^%s%c%p]%l', string.uupper) rom = ugsub(rom, "%w'%u", string.ulower) -- no caps after medial apostrophes end end return rom end

local function iterate_rare_chars(text) local ch, i	return function repeat ch, i = umatch(text, "([" .. range.kana .. range.kana_graph .. "!-/:-@%[\\-`○々〻〆〜〽・·゠＝～][゙゚]*)", i)		until not (ch and umatch(ch, "^[ぁ-ちっつて-ろんァ-チッツテ-ロンｦ-ﾟ]$")) return ch	end end

local function historical_kana(data, hist_kana, modern_kana) -- Disallow historical kana for syllables, as there's no one-to-one correspondence. if data.headword.pos_category == "syllables" then error("Cannot specify historical kana for syllables.") end local hist_kana_no_formatting = hist_kana:gsub("[%^%-%. %%]+", "") local rare_chars, lang_name, hc = {}, data.lang_name, data.headword.categories for ch in iterate_rare_chars(hist_kana_no_formatting) do		if not (modern_kana and modern_kana:find(ch)) then rare_chars[ch] = true end end for _, mora in ipairs(moraify((ugsub(hist_kana_no_formatting, "[^" .. range.kana .. "]+", " ")))) do		if not (mora:gsub(" +", ""):match("^.?[\128-\191]*$") or (modern_kana and modern_kana:find(mora))) then rare_chars[mora] = true end end for ch in pairs(rare_chars) do insert(hc, lang_name .. " terms historically spelled with " .. ch) end insert(data.info_hist, require("Module:ja-link").link({ lang = data.headword.lang, lemma = hist_kana, }, {		hist = true, face = 'head', disableSelfLink = true, })) end

local function detect_pagename_kana(data, digraphs) local pagename = data.pagename -- Exclude "&" and "@", which are part of %p (e.g. リズム&ブルース). local function remove_kana(m) return m:match("[&@]") or "" end if ugsub(pagename, '[%p%s%c' .. range.hiragana .. (digraphs and "ゟ" or "") .. ']', remove_kana) == "" then return 'hira' elseif ugsub(pagename, '[%p%s%c' .. range.katakana .. (digraphs and "ヿ" or "") .. ']', remove_kana) == "" then return 'kata' elseif ugsub(pagename, '[%p%s%c' .. range.kana .. (digraphs and "ゟヿ" or "") .. ']', remove_kana) == "" then return 'both' end end

-- go through args and build inflections by finding whatever kanas were given to us local function format_headword(args, data) local pagename = data.pagename data.pagename_kana = detect_pagename_kana(data) if args[1][1] and not args[1][1]:match'[\128-\255]' then -- filter out POS designations remove(args[1], 1) end

local suru_ending = data.headword.pos_category == "suru verbs" and 'する' or '' local rom_suru_ending = data.headword.pos_category == "suru verbs" and ' suru' or '' if data.pagename_kana then -- pure-kana-title entry if #args.head > 0 or args.head.default then insert(data.headword.categories, data.lang_name .. " terms with redundant head parameter") end -- {{ja-xxx}} vs {{ja-xxx|こ.うし}} vs {{ja-xxx|コウシ}} in こうし if not args[1][1] then args[1][1] = pagename elseif remove_links(args[1][1]:gsub("[%^%-%. %%]+", "")) ~= pagename then insert(args[1], 1, pagename) end for i, k in ipairs(args[1]) do			insert(data.headword.heads, {				term = k:gsub("[%^%-%. %%]+", "") .. suru_ending,				tr = '-',				q = args.label[i],			}) end for i = 1, math.max(args.rom.maxindex, 1) do			local rom = args.rom[i] or args.rom.default or kana_to_romaji_with_pos_format(args[1][1], data, args) if not data.headword.heads[i] then data.headword.heads[i] = {term = data.headword.heads[i-1].term} end if rom == "-" then data.headword.heads[i].tr = "-" elseif data.headword.lang:link_tr(Jpan) then data.headword.heads[i].tr = "" .. rom .. "" .. (rom_suru_ending == "" and "" or "" .. rom_suru_ending .. "") else data.headword.heads[i].tr = rom .. rom_suru_ending end end data.kanas[1] = pagename data.inflection_base.form = remove_links(args[1][1]:gsub("[%^%-%. %%]+", "")) .. suru_ending data.inflection_base.romaji = data.headword.heads[1].tr .. rom_suru_ending if args.hist[1] then historical_kana(data, args.hist[1], args[1][1]) end else -- non-pure-kana-title entry if #args[1] == 0 and not (data.headword.pos_category == "punctuation marks" or data.headword.pos_category == "iteration marks" or data.headword.pos_category == "symbols") then error("Kana form is required.") end if args.head.default == pagename then insert(data.headword.categories, data.lang_name .. " terms with redundant head parameter") end local rom_repetition_final = {} for i, k in ipairs(args[1]) do			local rom_auto = kana_to_romaji_with_pos_format(k, data, args) local head = args.head[i] or args.head.default or pagename if args.head[i] == pagename then insert(data.headword.categories, data.lang_name .. " terms with redundant head parameter") end local head_for_ruby, kana_for_ruby if ulen(head) > 1 and head:match'%%' == nil and k:match'%%' == nil then head_for_ruby, kana_for_ruby = assign_kana_to_kanji(head, k, pagename, data.lang_code .. '%-kanjitab') else head_for_ruby, kana_for_ruby = head, k			end local format_table = m_ja_ruby.parse_text(head_for_ruby, kana_for_ruby, {				try = 'force',				try_force_limit = 10000			}) local kana_bare = remove_links(k:gsub("[%^%-%. %%]+", "")) local rom = args.rom[i] or args.rom.default or rom_auto head = { term = m_ja_ruby.to_wiki(format_table, {					break_link = true,				}):gsub('(..-)', "%1") .. suru_ending, q = args.label[i], }			if rom == "-" or rom_repetition_final[rom] then head.tr = "-" elseif data.headword.lang:link_tr(Jpan) then head.tr = "" .. rom .. "" .. (rom_suru_ending == "" and "" or "" .. rom_suru_ending .. "") else head.tr = rom .. rom_suru_ending end insert(data.headword.heads, head) rom_repetition_final[rom] = true insert(data.kanas, kana_bare) if args.hist[i] then historical_kana(data, args.hist[i], k)			end if not data.inflection_base.form then data.inflection_base.form = remove_links(m_ja_ruby.to_markup(format_table)) .. suru_ending data.inflection_base.romaji = rom .. rom_suru_ending end end if #data.kanas > 1 then insert(data.headword.categories, data.lang_name .. " terms with multiple readings") else local lang_code = data.lang_code local lang_name = data.lang_name local content = mw.title.getCurrentTitle:getContent local loc1, loc2 = content:find("%f[^%z%s]==%s*" .. lang_name:gsub("%-", "%%%-") .. "%s*==") loc2 = content:find("%f[^%z%s]==[^\n=]+==", loc2) if loc1 then content = content:sub(loc1, loc2) local reading, first_reading for template, args in require("Module:template parser").findTemplates(content) do					if (						template == lang_code .. "-head" or						template == lang_code .. "-pos"					) and args[2] then reading = remove_links(args[2]):gsub("[ %-%.^%%]", "") elseif (						template == lang_code .. "-noun" or						template == lang_code .. "-verb" or						template == lang_code .. "-adj" or						template == lang_code .. "-phrase" or						template == lang_code .. "-verb form" or						template == lang_code .. "-verb-suru"					) and args[1] then reading = remove_links(args[1]):gsub("[ %-%.^%%]", "") elseif template == lang_code .. "-see" and args[1] then --	if ufind(args[1], "[^" .. range.kana .. "]") then -- TODO: check linked page --	else reading = remove_links(args[1]):gsub("[ %-%.^%%]", "") --	end end reading = reading and kana_to_romaji(reading, lang_code) first_reading = first_reading or reading if reading ~= first_reading then insert(data.headword.categories, lang_name .. " terms with multiple readings") break end end end end end end

local function add_transitivity(data, tr) tr = aliases[tr] or tr	if tr == "tr" then insert(data.info_mid, 'transitive') insert(data.headword.categories, data.lang_name .. " transitive verbs") elseif tr == "in" then insert(data.info_mid, 'intransitive') insert(data.headword.categories, data.lang_name .. " intransitive verbs") elseif tr == "both" then insert(data.info_mid, 'transitive or intransitive') insert(data.headword.categories, data.lang_name .. " transitive verbs") insert(data.headword.categories, data.lang_name .. " intransitive verbs") else insert(data.headword.categories, data.lang_name .. " verbs without transitivity") end end

local function add_inflections(data, inflection_type, cat_suffix) local lemma = data.inflection_base.form local romaji = data.inflection_base.romaji inflection_type = aliases[inflection_type] or inflection_type

local function replace_suffix(lemma_from, lemma_to, romaji_from, romaji_to) -- e.g. 持って来る, lemma = "[持](も)って来(く)る" -- lemma_from = "くる", lemma_to = {"き","きた"} local result = {} local pattern_from, n_from = lemma_from:gsub('.[\128-\191]*', function(c)			return '[' .. c .. m_ja.hira_to_kata(c) .. ']([^' .. range.kana .. ']*)'		end) pattern_from = pattern_from .. '$'		-- "[くク]([^kana range]*)[るル]([^kana range]*)$" for i_lemma_to, s_lemma_to in ipairs(lemma_to) do			local n_to = 0 local pattern_to = s_lemma_to:gsub('.[\128-\191]*', function(c)				if n_to < n_from then					n_to = n_to + 1					return c .. '%' .. n_to				else return c end			end) for i = n_to + 1, n_from do pattern_to = pattern_to .. '%' .. i			end -- "き%1%2", "き%1た%2" local lemma_inflected, success = ugsub(lemma, pattern_from, pattern_to) if success == 0 then return nil end local romaji_inflected romaji_inflected, success = romaji:gsub(romaji_from .. '(%]?%]?)$', romaji_to[i_lemma_to] .. "%1") if success == 0 then return nil end insert(result, { lemma = lemma_inflected, romaji = romaji_inflected }) end return result -- {{lemma="[持](も)って来(き)",romaji="motteki"},{lemma="[持](も)って来(き)た",romaji="mottekita"}} end

local function insert_form(label, ...) -- label = "stem" or "past" etc. -- ... = {lemma=...,romaji=...},{lemma=...,romaji=...} local labeled_forms = {label = label} for _, v in ipairs{...} do			local table_form = m_ja_ruby.parse_markup(v.lemma) local form_term = m_ja_ruby.to_wiki(table_form) if not form_term:find'%[%[.+%]%]' then form_term =  .. form_term ..  end insert(labeled_forms, {				term = form_term,				translit = v.romaji,			}) end insert(data.headword.inflections, labeled_forms) end

local inflected_forms if data.lang_code == 'ja' then if inflection_type == '1' or inflection_type == '1s' then insert(data.info_mid, 'godan ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " type 1 " .. cat_suffix) if cat_suffix == 'verbs' and ufind(data.inflection_base.romaji, '[ieIEīēĪĒ]ru$') then insert(data.headword.categories, data.lang_name .. " type 1 verbs that end in -iru or -eru") end end if inflection_type == '1' then inflected_forms = replace_suffix('く', {'き', 'いた'}, 'ku', {'ki', 'ita'}) or					replace_suffix('ぐ', {'ぎ', 'いだ'}, 'gu', {'gi', 'ida'}) or					replace_suffix('す', {'し', 'した'}, 'su', {'shi', 'shita'}) or					replace_suffix('つ', {'ち', 'った'}, 'tsu', {'chi', 'tta'}) or					replace_suffix('ぬ', {'に', 'んだ'}, 'nu', {'ni', 'nda'}) or					replace_suffix('ぶ', {'び', 'んだ'}, 'bu', {'bi', 'nda'}) or					replace_suffix('む', {'み', 'んだ'}, 'mu', {'mi', 'nda'}) or					replace_suffix('る', {'り', 'った'}, 'ru', {'ri', 'tta'}) or					replace_suffix('う', {'い', 'った'}, 'u', {'i', 'tta'}) if inflected_forms then insert_form('stem', inflected_forms[1]) insert_form('past', inflected_forms[2]) else require'Module:debug'.track'Jpan-headword/inflection failed/ja' end else inflected_forms = replace_suffix('る', {'り', 'った', 'い'}, 'ru', {'ri', 'tta', 'i'}) or --くださる replace_suffix('いく', {'いき', 'いった'}, 'iku', {'iki', 'itta'}) or --行く replace_suffix('う', {'い', 'うた'}, 'ou', {'oi', 'ōta'}) --問う if inflected_forms then insert_form('stem', inflected_forms[1], inflected_forms[3]) insert_form('past', inflected_forms[2]) else require'Module:debug'.track'Jpan-headword/inflection failed/ja' end end elseif inflection_type == '2' then insert(data.info_mid, 'ichidan ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " type 2 " .. cat_suffix) end inflected_forms = replace_suffix('る', {, 'た'}, 'ru', {, 'ta'}) if inflected_forms then insert_form('stem', inflected_forms[1]) insert_form('past', inflected_forms[2]) else require'Module:debug'.track'Jpan-headword/inflection failed/ja' end elseif inflection_type == 'suru' then insert(data.info_mid, 'suru ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " suru " .. cat_suffix) end inflected_forms = replace_suffix('する', {'し', 'した'}, 'suru', {'shi', 'shita'}) or				replace_suffix('ずる', {'じ', 'じた'}, 'zuru', {'ji', 'jita'}) if inflected_forms then insert_form('stem', inflected_forms[1]) insert_form('past', inflected_forms[2]) else require'Module:debug'.track'Jpan-headword/inflection failed/ja' end elseif inflection_type == 'kuru' then insert(data.info_mid, 'kuru ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " kuru " .. cat_suffix) end inflected_forms = replace_suffix('くる', {'き', 'きた'}, 'kuru', {'ki', 'kita'}) if inflected_forms then insert_form('stem', inflected_forms[1]) insert_form('past', inflected_forms[2]) else require'Module:debug'.track'Jpan-headword/inflection failed/ja' end elseif inflection_type == 'i' or inflection_type == 'い' then insert(data.info_mid, '-i ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " い-i " .. cat_suffix) end inflected_forms = replace_suffix('い', {'く'}, 'i', {'ku'}) if inflected_forms then insert_form('adverbial', inflected_forms[1]) else require'Module:debug'.track'Jpan-headword/inflection failed/ja' end elseif inflection_type == 'is' then insert(data.info_mid, '-i ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " い-i " .. cat_suffix) end inflected_forms = replace_suffix('いい', {'よく'}, 'ii', {'yoku'}) if inflected_forms then insert_form('adverbial', inflected_forms[1]) else require'Module:debug'.track'Jpan-headword/inflection failed/ja' end elseif inflection_type == 'na' or inflection_type == 'な' then insert(data.info_mid, '-na ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " な-na " .. cat_suffix) end inflected_forms = replace_suffix(, {'な', 'に'}, , {' na', ' ni'}) insert_form('adnominal', inflected_forms[1]) insert_form('adverbial', inflected_forms[2])

elseif inflection_type == "yo" then insert(data.info_mid, '{{sup| † }}yodan ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " yodan " .. cat_suffix) end elseif inflection_type == "kami ni" then insert(data.info_mid, '{{sup| † }}nidan ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " kami nidan " .. cat_suffix) end elseif inflection_type == "shimo ni" then insert(data.info_mid, '{{sup| † }}nidan ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " shimo nidan " .. cat_suffix) end elseif inflection_type == "rahen" then insert(data.info_mid, '{{sup| † }}-ri ') elseif inflection_type == "sahen" then insert(data.info_mid, '{{sup| † }}-se ') elseif inflection_type == "kahen" then insert(data.info_mid, '<abbr title="k-special conjugation (classical)">{{sup| † }}-ko ') elseif inflection_type == "nahen" then insert(data.info_mid, '<abbr title="n-special conjugation (classical)">{{sup| † }}-n ') elseif inflection_type == "nari" or inflection_type == "なり" then insert(data.info_mid, '<abbr title="-nari inflection (classical)">{{sup| † }}-nari ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " なり-nari " .. cat_suffix) end elseif inflection_type == 'tari' or inflection_type == 'たり' then insert(data.info_mid, '<abbr title="-tari inflection (classical)">{{sup| † }}-tari ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " たり-tari " .. cat_suffix) end inflected_forms = replace_suffix(, {'とした', 'たる', 'と', 'として'}, , {' to shita', ' taru', ' to', ' to shite'}) insert_form('adnominal', inflected_forms[1], inflected_forms[2]) insert_form('adverbial', inflected_forms[3], inflected_forms[4]) elseif inflection_type == "ku" or inflection_type == "く" then insert(data.info_mid, '<abbr title="-ku inflection (classical)">{{sup| † }}-ku ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " く-ku " .. cat_suffix) end elseif inflection_type == "shiku" or inflection_type == "しく" then insert(data.info_mid, '<abbr title="-shiku inflection (classical)">{{sup| † }}-shiku ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " しく-shiku " .. cat_suffix) end elseif inflection_type == "ka" or inflection_type == "か" then insert(data.info_mid, '<abbr title="-ka inflection (dialectal)">{{sup| † }}-ka ') if cat_suffix then insert(data.headword.categories, data.lang_name .. " か-ka " .. cat_suffix) end

elseif inflection_type == 'irr' then insert(data.info_mid, 'irregular') if cat_suffix then insert(data.headword.categories, data.lang_name .. " irregular " .. cat_suffix) end elseif inflection_type == '-' or inflection_type == 'un' then insert(data.info_mid, 'uninflectable') end --elseif data.lang_code == 'ryu' then ... end end

local function add_categories(data) local lang_name = data.lang_name local pagename = data.pagename local tc = data.headword.categories -- adds category [langname] terms spelled with jōyō kanji or [langname] terms spelled with non-jōyō kanji -- (if it contains any kanji) local number_of_kanji = 0 for c in ugmatch(pagename, '[々' .. range.kanji .. ']') do		number_of_kanji = number_of_kanji + 1 if c ~= "々" then -- Not a kanji for the purposes of categorisation. insert(tc, (lang_name .. " terms spelled with %s kanji"):format(en_grades[m_ja.kanji_grade(c)])) end end

-- categorize by number of kanji if number_of_kanji == 1 then insert(tc, lang_name .. " terms written with one Han script character") -- single-kanji terms if ulen(pagename) == 1 then insert(tc, lang_name .. " terms spelled with " .. pagename) insert(tc, lang_name .. " single-kanji terms") end elseif en_numerals[number_of_kanji] then insert(tc, (lang_name .. " terms written with %s Han script characters"):format(en_numerals[number_of_kanji])) end -- categorize by the script of the pagename or specific characters contained in it	-- if pagename is hiragana or katakana if detect_pagename_kana(data, true) == 'hira' then insert(tc, lang_name .. " hiragana") end if detect_pagename_kana(data, true) == 'kata' then insert(data.katakana_category, lang_name .. " katakana") end local p, n = ugsub(pagename, '[' .. range.kana .. range.kanji .. range.ideograph .. range.kana_graph .. range.punctuation .. ']+', '') if p ~= '' and n > 0 then insert(tc, lang_name .. " terms written in multiple scripts") end local pos = data.headword.pos_category local rare_chars = {} for ch in iterate_rare_chars(pagename) do		rare_chars[ch] = true end -- Categorise yōon, but exclude syllable entries since syllables aren't spelled with themselves. if pos ~= "syllables" then for _, mora in ipairs(moraify((ugsub(pagename, "[^" .. range.kana .. "]+", " ")))) do			if not mora:gsub(" +", ""):match("^.?[\128-\191]*$") then rare_chars[mora] = true end end end for ch in pairs(rare_chars) do insert(tc, lang_name .. " terms spelled with " .. ch) end if (		pos ~= "proverbs" and		pos ~= "phrases" and		ufind(ugsub(pagename, "[" .. range.katakana .. "]+", ""), "[" .. range.hiragana .. "]") and		ufind(ugsub(pagename, "[" .. range.hiragana .. "]+", ""), "[" .. range.katakana .. "]")	) then insert(tc, lang_name .. " terms spelled with mixed kana") end end

pos_functions["verbs"] = function(args, data) add_transitivity(data, args["tr"]) add_inflections(data, args["infl"], 'verbs') end

pos_functions["suffixes"] = function(args, data) add_inflections(data, args["infl"]) end

pos_functions["auxiliary verbs"] = function(args, data) insert(data.headword.categories, data.lang_name .. " auxiliary verbs") add_inflections(data, args["infl"]) data.headword.pos_category = "verbs" end

pos_functions["suru verbs"] = function(args, data) add_transitivity(data, args["tr"]) add_inflections(data, 'suru', 'verbs') data.headword.pos_category = "verbs" end

pos_functions["adjectives"] = function(args, data) add_inflections(data, args["infl"], 'adjectives') end

pos_functions["nouns"] = function(args, data) -- the counter (classifier) parameter, only relevant for nouns local counter = args["count"] or ""

if counter == "-" then insert(data.headword.inflections, {label = "uncountable"}) elseif counter ~= "" then insert(data.headword.inflections, {label = "counter", counter}) end end

--[==[ Generate categories by pagename, also optionally by POS Also for use in soft redirect pages (Module:ja-see). Sortkey is not provided. data = { pagename = ..., -- (required) lang = ..., -- (required) language object categories = {}, -- (required) receive categories katakana_category = {}, -- (required) receive katakana-sorted categories pos = ..., "noun", "verb", etc. no POS categories if not given } ]==] function export.cat(data) data.lang_name = data.lang:getCanonicalName data.pagename_kana = detect_pagename_kana(data) if data.pos then local pos = data.pos:gsub('x$', 'xe') .. 's' insert(data.categories, data.lang_name .. ' ' .. pos) insert(data.categories, data.lang_name .. ' ' .. require'Module:headword'.pos_lemma_or_nonlemma(pos, true) .. 's') end data.headword = { categories = data.categories } add_categories(data) end

--[==[ The main entry point. This is the only function that can be invoked from a template. ]==] function export.show(frame) local poscat = frame.args[2] or frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.") local params = { [1] = {list = true}, ['rom'] = {list = true, allow_holes = true, separate_no_index = true}, ['head'] = {list = true, allow_holes = true, separate_no_index = true}, ['label'] = {list = true, allow_holes = true}, ['hist'] = {list = true}, ['hhira'] = {alias_of = 'hist'}, ['hkata'] = {alias_of = 'hist'}, ['tr'] = {}, ['infl'] = {}, ['type'] = {alias_of = 'infl'}, ['decl'] = {alias_of = 'infl'}, ['count'] = {}, ['sort'] = {}, ['pagename'] = {}, }	-- For backwards compatibility with uses of {{ja-syllable}} with the script parameter. if poscat == "syllables" then params["sc"] = {} end local args = require('Module:parameters').process(frame:getParent.args, params)

local data = { headword = { pos_category = poscat, categories = {}, heads = {}, no_redundant_head_cat = true, inflections = {}, genders = {'m'}, -- placeholder nogendercat = true },		--custom info pagename = args.pagename or mw.loadData("Module:headword/data").pagename, pagename_kana = nil, -- "hira" "kata" "both", nil lang_code = frame.args[1], lang_name = nil, -- "Japanese", "Okinawan" ... katakana_category = {}, info_mid = {}, -- "godan", "intransitive" ... info_hist = {}, -- historical kana inflection_base = {}, -- base of inflections kanas = {}, -- kana id	} data.headword.lang = require("Module:languages").getByCode(data.lang_code) data.lang_name = data.headword.lang:getCanonicalName -- sort out all the kanas and do the romanization business format_headword(args, data)

-- add certain "inflections" and categories for adjectives, verbs, or nouns if pos_functions[poscat] then pos_functions[poscat](args, data) end -- categories add_categories(data) local sort_base = args.sort or data.kanas[1] or data.pagename data.headword.sort_key = data.headword.lang:makeSortKey(sort_base) local katakana_category = #data.katakana_category > 0 and require("Module:utilities").format_categories(			data.katakana_category,			data.headword.lang,			nil,			sort_base,			nil,			require("Module:scripts").getByCode("Kana")		) or "" -- output local i_kanas = 0 return katakana_category .. require('Module:headword').full_headword(data.headword):gsub(' .- ', function		return (#data.info_hist > 0 and '{{sup|←' .. concat(data.info_hist, ' or ') .. ' ?}} ' or ) .. ('' .. concat(data.info_mid, ' ') .. )	end):gsub('<strong .->.- ', function(m0)		i_kanas = i_kanas + 1		if data.kanas[i_kanas] then			return '<span id="' .. data.lang_name .. ':_' .. data.kanas[i_kanas] .. '" class="senseid">' .. m0 .. ' '		end	end):gsub('  ', ' '):gsub('  ', ' ') end

return export