Module:kanjitab

local export = {}

local m_str_utils = require("Module:string utilities") local m_utilities = require("Module:utilities") local m_ja = require("Module:ja") local show_labels = require("Module:labels").show_labels --[=[	Other modules used: Module:parameters ]=]

local concat = table.concat local convert_iteration_marks = require("Module:Hani").convert_iteration_marks local find = string.find local gsplit = m_str_utils.gsplit local gsub = string.gsub local kata_to_hira = m_ja.kata_to_hira local insert = table.insert local match = string.match local remove = table.remove local split = m_str_utils.split local sub = string.sub local ugsub = mw.ustring.gsub local ulen = m_str_utils.len local umatch = mw.ustring.match local usub = m_str_utils.sub

local PAGENAME = mw.loadData("Module:headword/data").pagename local NAMESPACE = mw.title.getCurrentTitle.nsText

local d_range = mw.loadData("Module:ja/data/range") local yomi_data = mw.loadData("Module:kanjitab/data")

local kanji_grade_links = { "Grade: 1", "Grade: 2", "Grade: 3", "Grade: 4", "Grade: 5", "Grade: 6", "Grade: S",		-- 7 "Jinmeiyō",	-- 8 "Hyōgai"		-- 9 }

-- this is the function that is called from templates function export.show(frame) local args = require("Module:parameters").process(frame:getParent.args, {		[1] = { list = true, allow_holes = true },		k = { list = true, allow_holes = true },		o = { list = true, allow_holes = true },		r = {},		sort = {},		yomi = {},		ateji = {},		alt = {},		alt2 = {},		kyu = { list = true },		y = {alias_of = "yomi"},		clearright = {type = "boolean"},		pagename = {},	}) local lang_code = frame.args[1] local lang = require("Module:languages").getByCode(lang_code) local lang_name = lang:getCanonicalName

if args.pagename and NAMESPACE == "" then require("Module:debug/track")("kanjitab/pagename param in mainspace") end local pagename = args.pagename or PAGENAME

local categories = {} local cells = {}

-- extract kanji and non-kanji local kanji = {} local non_kanji = {} -- 々 and 〻 pagename = convert_iteration_marks(pagename) local kanji_border = 1 ugsub(pagename, "([" .. d_range.kanji .. "々〻])", function(p1, w1, p2)		insert(non_kanji, usub(pagename, kanji_border, p1 - 1))		kanji_border = p2		insert(kanji, w1)	end) insert(non_kanji, usub(pagename, kanji_border))

-- kyujitai local kyu = args.kyu if kyu[1] == "-" then kyu = {} elseif kyu[1] == nil then local form_kyu = {non_kanji[1]} local kyu_data = mw.loadData("Module:ja/data/kyu") local has_kyu, has_kyu_nonsupple, has_shin = false, false, false for i, v in ipairs(kanji) do local v_kyu = match(kyu_data[1], v .. "(%S*)%s") if v_kyu == nil then insert(form_kyu, v)			elseif v_kyu == "" then has_shin = true break elseif v_kyu:sub(1, 1) == "&" then has_kyu = true insert(form_kyu, v_kyu) else has_kyu, has_kyu_nonsupple = true, true insert(form_kyu, v_kyu) end insert(form_kyu, non_kanji[i + 1]) end

if not has_shin and has_kyu then kyu[1] = (has_kyu_nonsupple and "" or pagename .. "|") .. concat(form_kyu) end

if find(pagename, "弁") then require("Module:debug/track")("kanjitab/ambiguous kyujitai for 弁") kyu[1] = "which 弁?" end end local all_yomi, missing_yomi if args.yomi then all_yomi = {} local keys = split(args.yomi, ",") for i, yomi, len in ipairs(keys) do			yomi, len = match(yomi, "^(%l*)(%d*)$") yomi = yomi_data[yomi] or error("The yomi type \"" .. yomi .. "\" in the input \"" .. args.yomi .. "\" is not recognized.") if len ~= "" then -- Disallow length 0 or leading zeroes, as a sanity check. len = match(len, "^[1-9]%d*$") and tonumber(len) or error("Cannot specify a length of " .. len .. " kanji.") -- Only one yomi with no length given: apply to all kanji. elseif i == 1 and #keys == 1 then len = #kanji else len = 1 end local yomi_type = yomi.type -- If the on'yomi is not specified as goon/kanon/toon/soon, only "on". if yomi_type == "on'yomi" then require("Module:debug/track")("kanjitab/unspecified on") elseif yomi_type == "jūbakoyomi" then require("Module:debug/track")("kanjitab/jubakoyomi") elseif yomi_type == "yutōyomi" then require("Module:debug/track")("kanjitab/yutoyomi") end -- If the yomi requires a specific number of kanji (e.g. jūbakoyomi, yutōyomi). local req_kanji = yomi.required_kanji if req_kanji and #kanji ~= req_kanji then error("The yomi type \"" .. yomi.type .. "\" is only applicable to terms with " .. req_kanji .. " kanji.") elseif yomi.type == "none" then missing_yomi = true end -- Insert yomi data for each applicable kanji. Wrap in a table first, as the range for this input yomi is determined by its identity, so that (e.g.) "kun,kun" is still treated as two separate inputs. yomi = {data = yomi} for _ = 1, len do				insert(all_yomi, yomi) end end -- If there are any yomi slots left, handle them as empty. if #all_yomi < #kanji then missing_yomi = true for _ = #all_yomi + 1, #kanji do				insert(all_yomi, {data = yomi_data.none}) end end elseif #kanji > 0 then missing_yomi = true end if missing_yomi then insert(categories, lang_name .. " terms with missing yomi") end -- process readings local readings = {} local readings_actual = {} local reading_length_total = 0 for i = 1, args[1].maxindex do		local reading_kana, reading_length = match(args[1][i] or "", "^(%D*)(%d*)$") reading_kana = reading_kana ~= "" and reading_kana or nil reading_length = reading_kana and tonumber(reading_length) or 1

insert(readings, {reading_kana, reading_length}) reading_length_total = reading_length_total + reading_length end if reading_length_total > #kanji then error("Readings for " .. reading_length_total .. " kanji are given, but this word has only " .. #kanji .. " kanji.") else for _ = reading_length_total + 1, #kanji do			insert(readings, {nil, 1}) end end

local table_head = [=[ {| class="wikitable kanji-table floatright" style="text-align: center; ]=] .. (args.clearright and " clear:right;" or "") .. [=[" ! ]=] .. (#kanji > 1 and "colspan=\"" .. #kanji .. "\" " or "") .. [=[style="font-weight: normal;" | Kanji in this term
 * - lang="]=] .. lang_code .. [=[" class="Jpan" style="font-size: 2em; background: white; line-height: 1em;"

]=]	if args.k.maxindex and args.k.maxindex > args[1].maxindex then error("kanjitab/too many k") end if args.o.maxindex and args.o.maxindex > args[1].maxindex then error("kanjitab/too many o") end local is_ateji = {} if args.ateji then local ateji = args.ateji local cat_ateji = false if ateji == "y" then for i = 1, #kanji do				is_ateji[i] = true end cat_ateji = true else for i in gsplit(ateji, ";") do				gsub(i, "^(%d+)$", function(a)					is_ateji[tonumber(a)] = true					cat_ateji = true				end) gsub(i, "^(%d+),(%d+)$", function (a, b)					for j = tonumber(a), tonumber(b) do						is_ateji[j] = true					end					cat_ateji = true				end) end end if cat_ateji then insert(categories, lang_name .. " terms spelled with ateji") end end

-- if hiragana readings were passed, -- make the "spelled with ..." categories, the readings cells on the lower level and build the sort key -- otherwise rely on the pagename to make the original kanjitab and categories local cells_above = {} local cells_below = {} local kanji_pos = 1 for i, reading in ipairs(readings) do		local reading_kana, reading_length = reading[1], reading[2] local cell = {}

if reading_length <= 1 then insert(cell, "| rowspan=\"2\" | ") else insert(cell, "| colspan =\"" .. reading_length .. "\" | ") end

-- display reading, actual reading and okurigana if reading_kana then if reading_kana ~= "" and reading_kana ~= "-" and umatch(reading_kana, "[^" .. d_range.kana .. "]") then error("Please remove any non-kana characters from the reading input " .. reading_kana .. ".") end

local actual_reading = args.k[i] local okurigana = args.o[i]

local okurigana_text = okurigana and "(" .. okurigana .. ")" or "" local actual_reading_text = actual_reading and " > " .. actual_reading .. okurigana_text or "" local text = reading_kana .. okurigana_text .. actual_reading_text

readings_actual[i] = {(actual_reading or reading_kana) .. (okurigana or ""), reading_length}

insert(cell, "" .. text .. " ") if reading_length <= 1 then insert(cell, " ") end else readings_actual[i] = {nil, 1} end

-- display kanji grade, categorize for j = kanji_pos, kanji_pos + reading_length - 1 do			local single_kanji = kanji[j] local kanji_grade = m_ja.kanji_grade(single_kanji) local ateji_text = is_ateji[j] and " (ateji) " or "" local type, compound if all_yomi then local yomi = all_yomi[j].data type, compound = yomi.type, yomi.compound_reading end if not reading_kana then if type ~= "irregular" then require("Module:debug/track")("kanjitab/no reading") end insert(categories, lang_name .. " terms spelled with " .. single_kanji) elseif reading_length ~= 1 or type == "irregular" then insert(categories, lang_name .. " terms spelled with " .. single_kanji) elseif compound then -- Re-enable once all bad jukujikun calls are fixed. -- error("The yomi type \"" .. type .. "\" is only applicable to compound character readings, so cannot apply to " .. single_kanji .. " read as " .. reading_kana .. ". If this is intended as part of a " .. type .. " reading, please enter the whole reading as one, followed by the number of kanji it applies to.") require("Module:debug/track")("kanjitab/single kanji with jukujikun") else -- Subcategorize by reading. insert(categories, lang_name .. " terms spelled with " .. single_kanji .. " read as " .. kata_to_hira(reading_kana)) end

if reading_length <= 1 then insert(cell, " " .. kanji_grade_links[kanji_grade] .. " " .. ateji_text) else insert(cells_below, "| " .. kanji_grade_links[kanji_grade] .. " " .. ateji_text) end end insert(cells_above, concat(cell)) kanji_pos = kanji_pos + reading_length end insert(cells, "|- style=\"background: white;\"") if #cells_below > 0 then insert(cells, concat(cells_above, "\n")) insert(cells, "|- style=\"background: white;\"") insert(cells, concat(cells_below, "\n")) else for i, v in ipairs(cells_above) do			cells_above[i] = gsub(v, "| rowspan=\"2\" | ", "| ") end insert(cells, concat(cells_above, "\n")) end local rendaku = args.r	if rendaku then insert(categories, lang_name .. " terms with rendaku") end if all_yomi then insert(cells, "|-") local len, all_on, yomi_cat = 1, true for i, yomi in ipairs(all_yomi) do -- If the next kanji has the same yomi table, it's part of the same range. if yomi == all_yomi[i + 1] then len = len + 1 else yomi = yomi.data local yomi_type = yomi.type local display = yomi.display or yomi_type local appendix = yomi.appendix insert(cells, "| colspan=\"" .. len .. "\" |" .. ( appendix == false and display or "" .. display .. "" ))				-- Categorise as irregular if any irregular yomi are found; otherwise, categorise if all yomi are of the same type. If yomi are of different types but are all on, on'yomi is used as a fallback. if yomi_cat ~= "irregular" then local cat_type = yomi_type if cat_type == "irregular" or yomi_cat == nil then yomi_cat = cat_type elseif yomi_cat ~= cat_type then yomi_cat = false end if not yomi.onyomi then all_on = false end end len = 1 end end if yomi_cat then -- Check yomi_data first, in case cat_type is "irregular"; if no match, must be some other type, so get it from the first yomi in all_yomi, since not all yomi types are yomi_data keys. yomi_cat = yomi_data[yomi_cat] or all_yomi[1].data elseif all_on then yomi_cat = yomi_data.on		elseif #all_yomi == 2 then local y1, y2 = all_yomi[1].data, all_yomi[2].data if ulen(pagename) == 2 then if y1.onyomi and y2.type == "kun'yomi" then yomi_cat = yomi_data.j -- jūbakoyomi elseif y1.type == "kun'yomi" and y2.onyomi then yomi_cat = yomi_data.y -- yutōyomi end end end if yomi_cat then local category = yomi_cat.reading_category if category ~= false then insert(categories, lang_name .. " " .. "terms read with " .. (category or yomi_cat.type)) end end end

local kanji_table if #kanji > 0 then kanji_table = table_head for _, v in ipairs(kanji) do kanji_table = kanji_table .. "| style=\"padding: 0.5em;\" | " .. v .. "\n" end kanji_table = kanji_table .. concat(cells, "\n") .. "\n|}" else kanji_table = "" end

local forms_table = "" if args.alt == "" or args.alt == "-" then args.alt = nil end if kyu[1] or args.alt then local forms = {}

-- |kyu= if kyu[1] == "which 弁?" then insert(forms, "Please specify the correct kyujitai for 弁 with the parameter \"kyu\". ") remove(kyu, 1) end

for _, form in ipairs(kyu) do			local form_linkto, form_display = match(form, "^(.+)|(.+)$") if not form_linkto then form_linkto, form_display = form, form end insert(forms, concat{				"" or "#" .. lang_name .. "|",				form_display,				" ",				show_labels {labels = {"kyūjitai"}, lang = lang, nocat = true },				" ",			}) end

-- |alt= if args.alt then for form in gsplit(args.alt, ",") do				local i_semicolon = find(form, ":") if i_semicolon then local altform = sub(form, 1, i_semicolon - 1) local altlabels = split(sub(form, i_semicolon + 1), " ") insert(forms, concat{						"",						altform,						" ",						show_labels { labels = altlabels, lang = lang, nocat = true },						" ",					}) else insert(forms, concat{						"",						form,						" "					}) end end end

forms_table = "\n" .. Alternative spelling .. (#forms == 1 and "" or "s") .. [[

end local forms_table2 = "" if args.alt2 and args.alt2 ~= "" and args.alt2 ~= "-" then local forms2 = {} for form in gsplit(args.alt2, ",") do insert(forms2, "" .. form .. " ") end forms_table2 = "\n" .. Variant form .. (#forms2 == 1 and "" or "s") .. "\n" .. [[	end
 * style="text-align:center;font-size:108%" | ]] .. concat(forms, " ") .. "\n|}"
 * style="text-align:center;font-size:108%" | ]] .. concat(forms, " ") .. "\n|}"
 * style="text-align:center;font-size:140%" | ]] .. concat(forms2, " ") .. "\n|}"

-- use user-provided sortkey if we got one, otherwise -- use the sortkey we've already made by combining the -- readings if provided, if we have neither then -- default to empty string and don't sort local sortkey if args.sort then sortkey = args.sort else sortkey = {non_kanji[1]} local id = 1 for _, v in ipairs(readings_actual) do			id = id + v[2] insert(sortkey, (v[1] or "") .. (non_kanji[id] or "")) end sortkey = concat(sortkey) end if sortkey == "" then sortkey = nil else sortkey = lang:makeSortKey(sortkey) end if sortkey ~= lang:makeSortKey(PAGENAME) then require("Module:debug/track"){"kanjitab/nonstandard sortkey", "kanjitab/nonstandard sortkey/" .. lang_code} end

return kanji_table .. forms_table .. forms_table2 .. m_utilities.format_categories(categories, lang, sortkey) end

return export