Module:category tree/poscatboiler/data/characters

local raw_categories = {} local handlers = {} local raw_handlers = {}

local m_str_utils = require("Module:string utilities")

local codepoint = m_str_utils.codepoint local insert = table.insert local ulen = m_str_utils.len local ulower = m_str_utils.lower local umatch = m_str_utils.match local toNFC = mw.ustring.toNFC local toNFD = mw.ustring.toNFD

local function track(page) require("Module:debug/track")("poscatboiler-characters/" .. page) return true end

- --                                                                        -- --                              RAW CATEGORIES                             -- --                                                                        -- -

raw_categories["Terms by their individual characters subcategories by language"] = { description = "Umbrella categories covering terms categorized by unusual characters contained in them.", additional = "", parents = { "Umbrella metacategories", {name = "terms by their individual characters", is_label = true, sort = " "}, }, }

-- FIXME! This should probably be deleted. raw_categories["Letters"] = { description = "Categories specifying individual letters, containing the languages that use those letters.", additional = "", parents = { "Fundamental", }, }

- --                                                                        -- --                                 HANDLERS                                -- --                                                                        -- -

-- 	If char is a combining character, returns a version with a dotted circle before it. local function add_dotted_circle(char, combining) return combining and "◌" .. char or char end

insert(handlers, function(data)	-- NOTE: The "character" in the title may actually be a description such as	-- "gershayim". In that case, char= is specified as a parameter indicating the	-- actual character.	local titlechar = data.label:match("^terms spelled with (.+)$")	if not titlechar then		return nil	end	local params = {		["char"] = {},		["sort"] = {},		-- Not sure what used to be done with the following parameters.		["context"] = {},		["context2"] = {},	}	local args = require("Module:parameters").process(data.args, params)	if args.context or args.context2 then		track("terms-spelled-with-context")	end

local special_cases = { numbers = { sort = "#", desc = "numeric digits", },		emoji = { sort = "⌚", -- the first emoji in our list in Module:headword/data },		parentheses = { sort = "(",		},		["square brackets"] = {			sort = "[",		},		["angle brackets"] = {			sort = "<",		},		braces = {			sort = "{",		},	}	if special_cases[titlechar] then		local sortkey = args.sort or special_cases[titlechar].sort		return {			description = " terms spelled with one or more " .. (special_cases[titlechar].desc or titlechar) .. ".",			parents = ,			breadcrumb = titlechar,			umbrella = {				breadcrumb = titlechar,				parents = 			},		}, true	end	local char = args.char or titlechar	local titlechar_is_desc = args.char and args.char ~= titlechar	if titlechar_is_desc then		track("titlechar_is_desc")	end	local lang = data.lang or require("Module:languages").getByCode("mul")	local combining = ulen(char) == 1 and require("Module:Unicode_data").is_combining(codepoint(char))	local specials = {["ß"] = "ẞ", ["ͅ"] = "ͅ"}	local upper = toNFD(char)		:gsub("[%z\1-\127\194-\244][\128-\191]*", function(m) return specials[m] or m:uupper end)	upper = toNFC(upper)	local standard_chars = lang:getStandardCharacters	-- FIXME: This should be able to handle non-atomic single characters (e.g. "Q̓").	if char ~= upper and ulen(char) == 1 then		-- We want uppercase characters; but unless we're careful, we run into an issue with		-- due to the weird behavior of this character,		-- which has standard "I" as its uppercase equivalent.		if standard_chars then			local function err				error("Category titles should use uppercase characters: '" .. data.label .. "'", 2)			end			if lang:getCode ~= "hi" and lang:getCode ~= "lo" then				if not umatch(standard_chars, upper) then					err				end			elseif not umatch(upper, "[" .. standard_chars .. "]") then				err			end		end	end	-- Compute description.	local character = require("Module:links").full_link( {			term = char, alt = combining and add_dotted_circle(char, true) or nil, lang = lang, tr = combining and "-" or nil, },		"term" )	-- If the letter has a lowercase form that's also not in the standard characters,	-- show it. This time, it's that causes	-- issues, because the lowercase equivalent is standard "i".	-- Note that ulower("İ") has a bug where it outputs "i" with a combining dot, instead	-- of plain "i", so this has to be accounted for.	local lower = ulower(char:gsub("İ", "I"))	if lower ~= char and not (standard_chars and umatch(lower, "[" .. standard_chars .. "]")) then		character = "upper case " .. character .. " or lower case " ..			require("Module:links").full_link( {					term = lower, lang = lang },				"term" )	end	if titlechar_is_desc then		character = character .. " (" .. titlechar .. ")"	end	local description = " terms spelled with " .. character .. "."

-- Set tagged character for displaytitle and breadcrumb. local tagged_titlechar = not titlechar_is_desc and require("Module:script utilities").tag_text(titlechar, lang, nil, "term") or nil local tagged_char = titlechar_is_desc and titlechar or		require("Module:script utilities").tag_text(add_dotted_circle(char, combining), lang, nil, "term") local han = umatch(char, "^[" .. require("Module:scripts").getByCode("Hani").characters .. "]+$") -- Make the sortkey. Always use Hani-sortkey for Han characters, as this circumvents any reading-based sortkey methods. local sortkey = args.sort or han and require("Module:Hani-sortkey").makeSortKey(char) or lang:makeSortKey(char) -- Use the char as a fallback. if sortkey == "" then sortkey = char end return { description = description, additional = "Note that categories of the form LANG terms spelled with CHAR are intended for characters not " .. "part of the standard repertoire of a language (e.g. Cyrillic characters in English or Latin characters in Russian).", displaytitle = not titlechar_is_desc and " terms spelled with " .. tagged_titlechar or nil, parents = , breadcrumb = tagged_char, umbrella = { displaytitle = not titlechar_is_desc and "Terms spelled with " .. tagged_titlechar .. " by language" or nil, breadcrumb = tagged_char, parents = },	}, true end)

- --                                                                        -- --                               RAW HANDLERS                              -- --                                                                        -- -

-- Special-cased categories that we allow, for Turkish letters. local letter_cat_allow_list = require("Module:table/listToSet") { "İi", }

insert(raw_handlers, function(data)	-- Only recognize cases consisting of an uppercase letter followed by the	-- corresponding lowercase letter, either as the entire category name or	-- followed by a colon (for cases like ). Cases that	-- don't fit this profile (e.g. for Turkish and -- ) need to call directly. Formerly this	-- handler was much less restrictive and would fire on categories named	--, , etc.	local upper, lower = umatch(data.category, "^(%u)(%l)%f[:%z]")	if not upper or not letter_cat_allow_list[data.category] and lower:uupper ~= upper then		return nil	end	return {		description = ('Languages that use the uppercase letter "%s" (lowercase equivalent "%s").'):format(upper, lower),		parents = {"Letters"},	} end)

return {RAW_CATEGORIES = raw_categories, HANDLERS = handlers, RAW_HANDLERS = raw_handlers}