Module:category tree/poscatboiler/data/lang-specific/jpx

local labels = {} local handlers = {}

local m_str_utils = require("Module:string utilities")

local concat = table.concat local full_link = require("Module:links").full_link local insert = table.insert local Hani_sort = require("Module:Hani-sortkey").makeSortKey local match = m_str_utils.match local sort = table.sort local tag_text = require("Module:script_utilities").tag_text local ucfirst = m_str_utils.ucfirst

local Hira = require("Module:scripts").getByCode("Hira") local Jpan = require("Module:scripts").getByCode("Jpan") local kana_to_romaji = require("Module:Hrkt-translit").tr local m_numeric = require("Module:ConvertNumeric")

local kana_capture = "([-" .. require("Module:ja/data/range").kana .. "・]+)" local yomi_data = require("Module:kanjitab/data")

labels["adnominals"] = { description = " adnominals, or, which modify nouns, and do not conjugate or predicate.", parents = , }

labels["hiragana"] = { description = " terms with hiragana — — forms, sorted by conventional hiragana sequence. The hiragana form is a phonetic representation of that word. " .. "Wiktionary represents -language segments in three ways: in normal form (with kanji, if appropriate), in hiragana " .. "form (this differs from kanji form only when the segment contains kanji), and in romaji form.", additional = "See also Category: katakana", toc_template = "categoryTOC-hiragana", parents = { {name = "", raw = true}, "Category:Hiragana script characters", } }

labels["historical hiragana"] = { description = " historical hiragana.", additional = "See also Category: historical katakana.", toc_template = "categoryTOC-hiragana", parents = { "hiragana", {name = "", raw = true}, "Category:Hiragana script characters", } }

labels["katakana"] = { description = " terms with katakana — — forms, sorted by conventional katakana sequence. Katakana is used primarily for transliterations of foreign words, including old Chinese Hanzi not used in shinjitai.", additional = "See also Category: hiragana", toc_template = "categoryTOC-katakana", parents = { {name = "", raw = true}, "Category:Katakana script characters", } }

labels["historical katakana"] = { description = " historical katakana.", additional = "See also Category: historical hiragana.", toc_template = "categoryTOC-katakana", parents = { "katakana", {name = "", raw = true}, "Category:Katakana script characters", } }

labels["terms spelled with mixed kana"] = { description = " terms which combine hiragana and katakana characters, potentially with kanji too.", parents = { {name = "", raw = true}, "hiragana", "katakana", }, }

labels["honorifics"] = { topright = "", description = " honorifics.", parents = , }

labels["humble language"] = { description = " humble terms, or, which is a type of honorific speech that lowers the speaker in relation to the listener.", parents = "honorifics", }

labels["respectful language"] = { description = " respectful terms, or, which is a type of honorific speech that elevates the listener in relation to the speaker.", parents = "honorifics", }

labels["kanji"] = { topright = "", description = " symbols of the Han logographic script, which can represent sounds or convey meanings directly.", toc_template = "Hani-categoryTOC", umbrella = "Han characters", parents = "logograms", }

labels["kanji by reading"] = { description = " kanji categorized by reading.", parents = , }

labels["makurakotoba"] = { topright = "", description = " idioms used in poetry to introduce specific words.", parents = {"idioms"}, }

labels["terms by kanji readings"] = { description = " categories grouped with regard to the readings of the kanji with which they are spelled.", parents = , }

labels["terms by reading pattern"] = { description = " categories with terms grouped by their reading patterns.", parents = , }

local function handle_onyomi_list(category, category_type, cat_yomi_type) local onyomi, seen = {}, {} for _, yomi in pairs(yomi_data) do		if not seen[yomi] and yomi.onyomi then local yomi_catname = yomi[category_type] if yomi_catname ~= false then local yomi_type = yomi.type if yomi_type ~= "on'yomi" and yomi_type ~= cat_yomi_type then insert(onyomi, "Category: " .. category:gsub("", yomi_catname) .. "") end end end seen[yomi] = true end sort(onyomi) return onyomi end

local function add_yomi_category(category, category_type, parent, description) for _, yomi in pairs(yomi_data) do		local yomi_catname = yomi[category_type] if yomi_catname ~= false then local yomi_type = yomi.type local yomi_desc = yomi.link or yomi_catname if yomi.description then yomi_desc = yomi_desc .. "; " .. yomi.description end local label = { description = description .. " " .. yomi_desc .. ".",				breadcrumb = yomi_type, parents = , }			if yomi.onyomi then local onyomi = handle_onyomi_list(category, category_type, yomi_type) label.additional = "Categories of terms with " .. (yomi_type == "on'yomi" and "more" or "other") .. " specific types of on'yomi readings can be found in the following categories:\n* " .. concat(onyomi, "\n* ") if yomi_type ~= "on'yomi" then insert(label.parents, 1, {						name = (category:gsub("", yomi_data.on[category_type])),						sort = yomi_catname					}) end end labels[category:gsub("", yomi_catname)] = label end end end

add_yomi_category(	"terms read with ",	"reading_category",	"terms by reading pattern",	" terms read with" )

add_yomi_category(	"terms spelled with kanji with readings",	"kanji_category",	"terms by kanji reading type",	" categories with terms that are spelled with one or more kanji read with" )

labels["terms with missing yomi"] = { description = " terms where at least one yomi is missing from -kanjitab.", hidden = true, can_be_empty = true, parents = {"entry maintenance"}, }

labels["terms by kanji reading type"] = { description = " categories with terms grouped with regard to the types of readings of the kanji with which " .. "they are spelled; broadly, those of Chinese origin, readings, and those of non-Chinese origin,  readings.", parents = , }

labels["terms spelled with ateji"] = { topright = "", description = " terms containing one or more ateji — — which are kanji used to represent sounds rather than meanings (though meaning may have some influence on which kanji are chosen).", parents = , }

labels["terms spelled with daiyōji"] = { description = "Japanese terms spelled using daiyōji, categorized using .", parents = {"terms by etymology"}, }

labels["terms spelled with jukujikun"] = { description = " terms containing one or more jukujikun — — which are kanji used to represent meanings rather than sounds.", parents = , }

local function add_grade_categories(grade, desc, wp, only_one, parent, sort) local grade_kanji = grade .. " kanji" local topright = wp and (""):format(ucfirst(grade_kanji)) or nil labels[grade_kanji] = { topright = topright, description = " kanji " .. desc, toc_template = "Hani-categoryTOC", parents = , }	labels["terms spelled with " .. grade_kanji] = { topright = topright, description = " terms spelled with " .. (only_one and "at least one " or "") .. "kanji " .. desc, parents = , } end

for i = 1, 6 do	local ord = m_numeric.ones_position_ord[i] add_grade_categories(		ord .. " grade",		"taught in the " .. ord .. " grade of elementary school, as designated by the the official list of .",		false,		false,		"kyōiku",		i	) end

add_grade_categories(	"kyōiku",	"on the official list of .",	true,	false,	"jōyō" )

add_grade_categories(	"secondary school",	"on the official list of that are generally taught in secondary school.",	false,	false,	"jōyō" )

add_grade_categories(	"jōyō",	"on the official list of .",	true,	false )

add_grade_categories(	"tōyō",	"on the official list of, which was used from 1946–1981 until the publication of the list of .",	true,	false )

add_grade_categories(	"jinmeiyō",	"on the official list of .",	true,	true )

add_grade_categories(	"hyōgai",	"not included on the official list of or, known as  or .",	true,	true )

labels["terms with multiple readings"] = { description = " terms with multiple pronunciations (hence multiple kana spellings).", parents = , }

labels["kanji readings by number of morae"] = { description = " categories grouped with regard to the number of morae in their kanji readings.", parents = , }

labels["single-kanji terms"] = { description = " terms written as a single kanji.", parents = {"terms by orthographic property", {name = "character counts", sort = " "}}, }

labels["kanji with kun readings missing okurigana designation"] = { breadcrumb = "Kanji missing okurigana designation", description = " kanji entries in which one or more kun readings entered into -readings is missing a hyphen denoting okurigana.", toc_template = "Hani-categoryTOC", hidden = true, can_be_empty = true, parents = {"entry maintenance"}, }

labels["terms by the individual characters in their historical spellings"] = { breadcrumb = "Historical", description = " terms categorized by whether their spellings in the included certain individual characters.", parents = , }

labels["verbs without transitivity"] = { description = " verbs missing the =tr= parameter from their headword templates.", hidden = true, can_be_empty = true, parents = {"entry maintenance"}, }

labels["yojijukugo"] = { topright = "", description = " four-kanji compound terms,, with idiomatic meanings; typically derived from Classical Chinese, Buddhist scripture or traditional Japanese proverbs.", additional = "Compare Chinese and Korean .", umbrella = "four-character idioms", parents = {"idioms"}, }

-- FIXME: Only works for 0 through 19. local word_to_number = {} for k, v in pairs(m_numeric.ones_position) do	word_to_number[v] = k end

local periods = { historical = true, ancient = true, }

local function get_period_text_and_reading_type_link(period, reading_type) if period and not periods[period] then return nil end local period_text = period and period .. " " or nil

-- Allow periods (historical or ancient) by themselves; they will parse as reading types. if not period and periods[reading_type] then return nil, reading_type end

local reading_type_link = "" .. reading_type .. "" return period_text, reading_type_link end

local function get_sc(str) return match(str:gsub("[%s%p]+", ""), "[^" .. Hira:getCharacters .. "]") and Jpan or Hira end local function get_tagged_reading(reading, lang) return tag_text(reading, lang, get_sc(reading)) end

local function get_reading_link(reading, lang, period, link) local hist = periods[period] reading = reading:gsub("[%.%-%s]+", "") return full_link({		lang = lang,		sc = get_sc(reading),		term = link or reading:gsub("・", ""),		-- If we have okurigana, demarcate furigana.		alt = reading:gsub("^(.-)・", "%1 "),		tr = kana_to_romaji((reading:gsub("・", ".")), lang:getCode, nil, {keep_dot = true, hist = hist})			:gsub("^(.-)%.", " %1 "),		pos = reading:find("・", 1, true) and get_tagged_reading((reading:gsub("^.-・", "～")), lang) or nil	}, "term") end

local function is_on_subtype(reading_type) return reading_type:find(".on$") end

insert(handlers, function(data)	local count, plural = data.label:match("^terms written with (.+) Han script character(s?)$")	-- Make sure 'one' goes with singular and other numbers with plural.	if not count or (count == "one") ~= (plural == "") then		return	end	local num = word_to_number[count]	if not num then		return nil	end	return {		description = " terms written with " .. count .. " kanji.",		breadcrumb = num,		parents = ,	} end)

insert(handlers, function(data)	local label_pref, kana = data.label:match("^(terms historically spelled with )" .. kana_capture .. "$")	if not kana then		return	end	local lang = data.lang	return {		description = " terms spelled with " .. get_reading_link(kana, lang, "historical") .. " in the .",		displaytitle = " " .. label_pref .. get_tagged_reading(kana, lang),		breadcrumb = "historical",		parents = {			{name = "terms spelled with " .. kana, sort = " "},			{name = "terms by the individual characters in their historical spellings", sort = lang:makeSortKey(kana)}		},	} end)

insert(handlers, function(data)	local count, plural = data.label:match("^kanji readings with (.+) mora(e?)$")	-- Make sure 'one' goes with singular and other numbers with plural.	if not count or (count == "one") ~= (plural == "") then		return	end	local num = word_to_number[count]	if not num then		return nil	end	return {		description = " kanji readings containing " .. count .. " mora" .. plural .. ".",		breadcrumb = num,		parents = ,	} end)

insert(handlers, function(data)	local label_pref, period, reading_type, reading = match(data.label, "^(kanji with ([a-z]-) ?([%a']+) reading )" .. kana_capture .. "$")	if not period then		return	end	period = period ~= "" and period or nil	local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)	if not reading_type_link then		return	end	local lang = data.lang	-- Compute parents.	local parents, breadcrumb = {}	if reading:find("・", 1, true) then		local okurigana = reading:match("・(.*)")		insert(parents, { name = "kanji with " .. (period_text or "") .. reading_type .. " reading " .. reading:match("(.-)・"), -- Sort by okurigana, since all coordinate categories will have the same furigana. sort = (lang:makeSortKey(okurigana)) })		breadcrumb = "～" .. okurigana	else		insert(parents, { name = "kanji by " .. (period_text or "") .. reading_type .. " reading", sort = (lang:makeSortKey(reading)) })		breadcrumb = reading	end	if is_on_subtype(reading_type) then		insert(parents, {name = "kanji with " .. (period_text or "") .. "on reading " .. reading, sort = reading_type})	elseif period_text then		insert(parents, {name = "kanji with " .. period_text .. "reading " .. reading, sort = reading_type})	end	if not period_text then		insert(parents, {name = "kanji read as " .. reading, sort = reading_type})	end	return {		description = " kanji with the " .. (period_text or "") .. reading_type_link .. " reading " ..			get_reading_link(reading, lang, period or reading_type) .. ".",		displaytitle = " " .. label_pref .. get_tagged_reading(reading, lang),		breadcrumb = get_tagged_reading(breadcrumb, lang),		parents = parents,	} end)

insert(handlers, function(data)	local period, reading_type = match(data.label, "^kanji by ([a-z]-) ?([%a']+) reading$")	if not period then		return	end	period = period ~= "" and period or nil	local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)	if not reading_type_link then		return nil	end

-- Compute parents. local parents = { is_on_subtype(reading_type) and {name = "kanji by " .. (period_text or "") .. "on reading", sort = reading_type} or period_text and {name = "kanji by " .. reading_type .. " reading", sort = period} or		{name = "kanji by reading", sort = reading_type} }	if period_text then insert(parents, {name = "kanji by " .. period_text .. "reading", sort = reading_type}) end

-- Compute description. local description = " kanji categorized by " .. (period_text or "") .. reading_type_link .. " reading." return { description = description, breadcrumb = (period_text or "") .. reading_type, parents = parents, } end)

insert(handlers, function(data)	local label_pref, reading = match(data.label, "^(kanji read as )" .. kana_capture .. "$")	if not reading then		return	end	local params = {		["histconsol"] = {},	}	local args = require("Module:parameters").process(data.args, params)	local lang = data.lang	local parents, breadcrumb = {}	if reading:find("・", 1, true) then		local okurigana = reading:match("・(.*)")		insert(parents, { name = "kanji read as " .. reading:match("(.-)・"), -- Sort by okurigana, since all coordinate categories will have the same furigana. sort = (lang:makeSortKey(okurigana)) })		breadcrumb = "～" .. okurigana	else		insert(parents, { name = "kanji by reading", sort = (lang:makeSortKey(reading)) })		breadcrumb = reading	end	local addl	local period_text	if args.histconsol then		period_text = "historical"		addl = ("This is a historical reading, now " .. "consolidated with the modern reading of " .. get_reading_link(args.histconsol, lang, nil, ("Category:Japanese kanji read as %s"):format(args.histconsol)) .. ".")	end	return {		description = " kanji read as " .. get_reading_link(reading, lang, period_text) .. ".",		additional = addl,		displaytitle = " " .. label_pref .. get_tagged_reading(reading, lang),		breadcrumb = get_tagged_reading(breadcrumb, lang),		parents = parents,	}, true end)

insert(handlers, function(data)	local label_pref, reading = match(data.label, "^(terms spelled with kanji read as )" .. kana_capture .. "$")	if not reading then		return	end	-- Compute parents.	local lang = data.lang	local sort_key = (lang:makeSortKey(reading))	local mora_count = require("Module:ja").count_morae(reading)	local mora_count_words = m_numeric.spell_number(tostring(mora_count))	local parents = {		{name = "terms by kanji readings", sort = sort_key},		{name = "kanji readings with " .. mora_count_words .. " mora" .. (mora_count > 1 and "e" or ""), sort = sort_key},		{name = "kanji read as " .. reading, sort = " "},	}

local tagged_reading = get_tagged_reading(reading, lang) return { description = " terms that contain kanji that exhibit a reading of " .. get_reading_link(reading, lang) .. " in those terms prior to any sound changes.", displaytitle = " " .. label_pref .. tagged_reading, breadcrumb = tagged_reading, parents = parents, } end)

insert(handlers, function(data)	local kanji, reading = match(data.label, "^terms spelled with (.) read as " .. kana_capture .. "$")	if not kanji then		return nil	end	local params = {		[1] = {list = true},	}	local args = require("Module:parameters").process(data.args, params)	local lang = data.lang	if #args[1] == 0 then		error("For categories of the form \"" .. lang:getCanonicalName ..			" terms spelled with KANJI read as READING\", at least one reading type (e.g.  or  ) must be specified using ,  ,  , etc.")	end	local yomi_types, parents = {}, {}	for _, yomi, category in ipairs(args[1]) do		local yomi_data = yomi_data[yomi]		if not yomi_data then			error("The yomi type \"" .. yomi .. "\" is not recognized.")		end		category = yomi_data.kanji_category		if not category then			error("The yomi type \"" .. yomi .. "\" is not valid for this type of category.")		end		insert(yomi_types, yomi_data.link)		insert(parents, { name = "terms spelled with kanji with " .. category .. " readings", sort = (lang:makeSortKey(reading)) })	end	insert(parents, 1, {name = "terms spelled with " .. kanji, sort = (lang:makeSortKey(reading))})	insert(parents, 2, {name = "terms spelled with kanji read as " .. reading, sort = Hani_sort(kanji)})	yomi_types = (#yomi_types > 1 and "one of " or "") .. "its " ..		require("Module:table").serialCommaJoin(yomi_types, {conj = "or"}) ..		" reading" .. (#yomi_types > 1 and "s" or "")	local tagged_kanji = get_tagged_reading(kanji, lang)	local tagged_reading = get_tagged_reading(reading, lang)	return {		description = " terms spelled with with " ..			yomi_types .. " of " .. get_reading_link(reading, lang) .. ".",		displaytitle = " terms spelled with " .. tagged_kanji .. " read as " .. tagged_reading,		breadcrumb = "read as " .. tagged_reading,		parents = parents,	}, true end)

insert(handlers, function(data)	local kanji, daiyoji = match(data.label, "^terms with (.) replaced by daiyōji (.)$")	if not kanji then		return nil	end	local params = {		["sort"] = {},	}	local args = require("Module:parameters").process(data.args, params)	local lang = data.lang	if not args.sort then		error("For categories of the form \"" .. lang:getCanonicalName ..			" terms with KANJI replaced by daiyōji DAIYOJI\", the sort key must be specified using sort=")	end

local tagged_kanji = get_tagged_reading(kanji, lang) local tagged_daiyoji = get_tagged_reading(daiyoji, lang) return { description = " terms with replaced by daiyōji .", displaytitle = " terms with " .. tagged_kanji .. " replaced by daiyōji " .. tagged_daiyoji, breadcrumb = tagged_kanji .. " replaced by daiyōji " .. tagged_daiyoji, parents = , }, true end)

return {LABELS = labels, HANDLERS = handlers}