Module:User:Benwing2/category tree/poscatboiler/data/language varieties

local raw_categories = {} local raw_handlers = {}

local m_languages = require("Module:languages") local m_table = require("Module:table") local parse_utilities_module = "Module:parse utilities" local pattern_utilities_module = "Module:pattern utilities" local labels_module = "Module:labels" local labels_utilities_module = "Module:labels/utilities" local rsplit = mw.text.split

local function track(page) -- Special:WhatLinksHere/Wiktionary:Tracking/poscatboiler/languages/PAGE return require("Module:debug/track")("poscatboiler/language-varieties/" .. page) end

local function pattern_escape(pattern) return require(pattern_utilities_module).pattern_escape(pattern) end

-- This module handles lect/variety categories of all sorts, e.g. regional lect categories such as -- Category:American English and Category:Provençal; temporal lect categories such as -- Category:Early Modern English; sociolect categories such as Category:Polari; and umbrella categories of the -- form e.g. Category:Varieties of English and Category:Regional French.

-- FIXME: Eliminate the word "dialect" here and in the parameter in favor of "lect" or "variety".

- --                                                                        -- --                              RAW CATEGORIES                             -- --                                                                        -- -

raw_categories["Language varieties"] = { description = "Categories that group terms in varieties of various languages (regional, temporal, sociolectal, etc.).", additional = "", parents = { "Fundamental", }, }

raw_categories["Regionalisms"] = { description = "Categories that group terms in regional varieties of various languages.", additional = "", parents = { "Fundamental", "Language varieties", }, }

- --                                                                        -- --                                RAW HANDLERS                             -- --                                                                        -- -

local function split_on_comma(term) if term:find(",%s") then return require(parse_utilities_module).split_on_comma(term) else return rsplit(term, ",") end end

local function ucfirst(text) return mw.getContentLanguage:ucfirst(text) end

local function lcfirst(text) return mw.getContentLanguage:lcfirst(text) end

-- Handle categories such as Category:Varieties of French and Category:Varieties of Ancient Greek. table.insert(raw_handlers, function(data)	local langname = data.category:match("^Varieties of (.*)$")	if langname then		local lang = require("Module:languages").getByCanonicalName(langname)		if lang then			return {				lang = lang:getCode,				description = "Categories containing terms in varieties of " .. lang:makeCategoryLink .. " (regional, temporal, sociolectal, etc.).",				parents = {					"",					{name = "Language varieties", sort = langname},				},				breadcrumb = "Varieties",			}		end	end end)

-- Handle categories such as Category:Regional French and Category:Regional Ancient Greek. table.insert(raw_handlers, function(data)	local langname = data.category:match("^Regional (.*)$")	if langname then		local lang = require("Module:languages").getByCanonicalName(langname)		if lang then			return {				lang = lang:getCode,				description = "Categories containing terms in regional varieties of " .. lang:makeCategoryLink .. ".",				additional = "This category sometimes also directly contains terms that are uncategorized regionalisms: such terms should be recategorized by the particular regional variety they belong to, or categorized as dialectal.",				parents = {					"Varieties of ",					{name = "Regionalisms", sort = langname},				},				breadcrumb = "Regional",			}		end	end end)

-- Fancy version of ine (if-not-empty). Converts empty string to nil, but also strips leading/trailing space. local function ine(arg) if not arg then return nil end arg = mw.text.trim(arg) if arg == "" then return nil end return arg end

-- Get the full language to return in the settings. local function get_returnable_lang(lang) if lang:hasType("family") then return "und" else return lang:getFullCode end end

local function infer_region_from_lang(pagename, lang) -- Try to figure out the region (used as the default breadcrumb and region description) from the language. If the -- language name is an etymology-only language, try to derive a region based on a parent etymology-only or full -- language. For example, if the pagename is 'Category:British English', the language is 'en-GB' (British English) -- and the same as the pagename, but we'd like to return a region 'British'. This is also called in cases where the -- language is explicitly given but we need to infer the region from the parent language; e.g.	-- Category:Lucerne Alemmanic German is a type of High Alemannic German but we want to infer 'Lucerne' based on -- the parent 'Alemannic German'. If this doesn't work and the language name has a space in it, we try using -- progressively smaller suffixes of the language. For example, for Category:Walser German', the language is	-- 'wae' (Walser German), but the parent is 'Highest Alemannic German', whose parent is 'Alemannic German' (a full	-- language), and just "German" is nowhere in the parent-child relationships but found as a suffix in the parent -- language. Another such case is with Category:Ionic Greek, whose parent is 'Ancient Greek'. local langname = lang:getCanonicalName local lang_to_check = lang if ucfirst(langname) == pagename then lang_to_check = lang_to_check:getParent end -- First check against the language name and progressively smaller suffixes; then repeat for any parents (of etymology	-- languages). If the language name is the same as the page name, we need to start with the parent; otherwise we will -- always match against a suffix, but that's not what we want. while lang_to_check do		local suffix = lang_to_check:getCanonicalName while true do region = pagename:match("^(.*) " .. pattern_escape(suffix) .. "$") if region then return region end suffix = suffix:match("^.- (.*)$") if not suffix then break end end lang_to_check = lang_to_check:getParent end

return nil end

-- Modeled after splitLabelLang in Module:auto cat. Try to split off a maximally long language (full or -- etymology-only) on the right, and return the resulting language object and the region preceding it. We need to -- check the maximally long language because of cases like 'English' vs 'Middle English' and 'Chinese Pidgin English'; -- Category:Late Middle English should split as 'Late' and 'Middle English', not as 'Late Middle' and 'English'. local function split_region_lang(pagename) local getByCanonicalName = require("Module:languages").getByCanonicalName local canonical_name local lang local region

-- Try the entire title as a language; if not, chop off a word on the left and repeat. local words = mw.text.split(pagename, " ") for i = 1, #words do		canonical_name = table.concat(words, " ", i, #words) lang = getByCanonicalName(canonical_name, nil, "allow etym", "allow family") if not lang then -- Some languages have lowercase-initial names e.g. 'the BMAC substrate', but the category begins with an -- uppercase letter. lang = getByCanonicalName(lcfirst(canonical_name), nil, "allow etym", "allow family") end if lang then if i == 1 then region = nil else region = table.concat(words, " ", 1, i - 1) end break end end

if not region and lang then -- The pagename is the same as a language name. Try to infer the region from the parent. See comment at function. region = infer_region_from_lang(pagename, lang) end

return lang, region end

-- Return the default parent cat for the given language and category. If the language and category are the same, we're -- dealing with the overall cat for an etymology-only language, so use the category of the parent language; otherwise -- we're dealing with a subcategory of a regular or etymology-only language (e.g. Category:Issime Walser, a -- subcategory of Category:Walser German), so use the language's category itself. If the resulting language is an -- etymology-only language or a family, the parent category is that language or family's category, which for -- etymology-only languages is named the same as the etymology-only language, and for families is named -- "FAMILY languages"; otherwise, use "Regional LANG" as the category unless `noreg` is given, in which case we use -- "Varieties of LANG". local function get_default_parent_cat_from_category(category, lang, noreg) if lang:getCode:find("^qsb%-") then -- substrate return "Substrate languages" end local lang_for_cat if ucfirst(lang:getCanonicalName) == category then lang_for_cat = lang:getParent if not lang_for_cat then error(("Category '%s' has a name the same as a full language; you probably need to explicitly specify a different language using |lang="):format(category)) end else lang_for_cat = lang end if lang_for_cat:hasType("etymology-only") or lang_for_cat:hasType("family") then return lang_for_cat:getCategoryName elseif noreg then return "Varieties of " .. lang_for_cat:getCanonicalName else return "Regional " .. lang_for_cat:getCanonicalName end end

-- Given a category (without the "Category:" prefix), look up the page defining the category, find the call to -- (if any), and return a table of its arguments. If the category page doesn't exist or doesn't have -- an invocation, return nil. local function scrape_category_for_auto_cat_args(cat) local cat_page = mw.title.new("Category:" .. cat) if cat_page then local contents = cat_page:getContent if contents then for name, args in require("Module:template parser").findTemplates(contents) do				-- The template parser automatically handles redirects and canonicalizes them, so uses of -- will also be found. if name == "auto cat" then return args end end end end return nil end

-- Find the labels that categorize into `category`. Only categories specified using the `regional_categories` and -- `plain_categories` fields will be returned. `lang` is the language object to use when looking up categories specified -- using the `regional_categories` field, which append the language onto the specified category prefix. If `lang` is a -- family or is omitted, no categories specified using `regional_categories` will be returned. Lang-specific modules for -- all languages will be checked for matching labels that specify `category` as their category using `plain_categories`; -- this helps e.g. with varieties of Chinese, whose labels are found in Module:labels/data/lang/zh. The return value -- is a table in the same format as returned by `find_labels_for_category` in Module:labels/utilities. local function find_labels_for_category(category, lang) local regional_cat_labels, plain_cat_labels local full_lang local m_labels_utilities = require(labels_utilities_module) if lang and lang:hasType("language") then full_lang = lang:getFull local regional_component = category:match("^(.-) " .. pattern_escape(full_lang:getCanonicalName) .. "$") if regional_component then regional_cat_labels = m_labels_utilities.find_labels_for_category(regional_component,				"regional", full_lang) end end plain_cat_labels = m_labels_utilities.find_labels_for_category(category, "plain", full_lang, "check all langs")

local all_labels if regional_cat_labels and plain_cat_labels then all_labels = regional_cat_labels for k, v in pairs(plain_cat_labels) do			all_labels[k] = v		end else all_labels = regional_cat_labels or plain_cat_labels end

return all_labels end

-- Find the labels for category `category` and language object `lang`. Then filter them down to those that are specified -- using a lang-specific module and sort them for use in checking properties such as parent and description. We filter -- down to only lang-specific labels because those specified in a general module (especially -- Module:labels/data/regional) won't be able to have proper descriptions and especially parents, which tend to be -- language-specific. The sort order prioritizes labels that match the category exactly (either through the canonical -- version or any alias); this is followed by labels that are a prefix of the category (again, either through the -- canonical version or any alias), so that labels whose categories are specified using `regional_categories` are -- prioritized. Any other labels are sorted last, so that e.g. if both the label "Alberta" and "Canada" (with alias -- "Canadian") for lang=en categorize into Category:Canadian English, we prefer the label "Canada". For cases where -- e.g. both labels match the category as prefixes, ties are broken by prioritizing the labels found in the -- lang-specific module whose language matches `lang`. -- -- Returns two items. The first is a table of all labels categorizing into `category` (subject to the provisos described -- in `find_labels_for_category`), in the same format as returned by `find_labels_for_category` in -- Module:labels/utilities. (Specifically, the values are objects containing all relevant information on a given -- label, and the keys are less important.) The second is a list of label objects after filtering and sorting, in the -- same format as the values in the `all_labels` table. The first return value will be nil if no labels could be found -- categorizing into `category`, and the second return value will be nil if no labels remain after filtering. local function get_sorted_labels(category, lang) local all_labels = find_labels_for_category(category, lang) if not all_labels then return nil end

local m_labels = require(labels_module) local lang_specific_pattern = "^" .. pattern_escape(m_labels.lang_specific_data_modules_prefix) local sorted_labels = {} for _, labelobj in pairs(all_labels) do		if labelobj.module:find(lang_specific_pattern) then table.insert(sorted_labels, labelobj) end end

local function sort_labelobj(a, b)		local function matches_exactly(labelobj) if labelobj.canonical == category then return true end for _, alias in ipairs(labelobj.aliases) do				if alias == category then return true end end return false end

local function matches_as_prefix(labelobj) if category:find("^" .. pattern_escape(labelobj.canonical) .. " ") then return true end for _, alias in ipairs(labelobj.aliases) do if category:find("^" .. pattern_escape(alias) .. " ") then return true end end return false end

local a_matches_lang = lang and a.lang:getFullCode == lang:getFullCode

local a_matches_exactly = matches_exactly(a) local b_matches_exactly = matches_exactly(b) if a_matches_exactly and not b_matches_exactly then return true elseif b_matches_exactly and not a_matches_exactly then return false elseif a_matches_exactly and b_matches_exactly then return a_matches_lang end

local a_matches_as_prefix = matches_as_prefix(a) local b_matches_as_prefix = matches_as_prefix(b) if a_matches_as_prefix and not b_matches_as_prefix then return true elseif b_matches_as_prefix and not a_matches_as_prefix then return false elseif a_matches_as_prefix and b_matches_as_prefix then return a_matches_lang end

return a_matches_lang end

table.sort(sorted_labels, sort_labelobj) if #sorted_labels > 0 then return all_labels, sorted_labels else return all_labels, nil end end

-- Find the categories (only of type `regional_categories` and `plain_categories`) that label `label` categorizes into. -- Return value is nil if the label couldn't be located at all, otherwise a list of categories (which may be empty). local function get_categories_for_label(label, lang) local m_labels = require(labels_module) local labret = m_labels.get_label_info { label = label, lang = lang } if not labret then return nil end local categories = m_labels.fetch_categories(labret.canonical or label, labret.data, lang, nil, nil,		{["plain_categories"] = true}) local reg_cats = m_labels.fetch_categories(labret.canonical or label, labret.data, lang, nil, nil,		{["regional_categories"] = true}) if #reg_cats > 0 then for _, cat in ipairs(reg_cats) do			table.insert(categories, cat) end end return categories end

local function get_default_parent_cat_from_sorted_labels(sorted_labels, category) for _, labobj in ipairs(sorted_labels) do		local parent = labobj.labdata.parent if parent then if parent == true then -- use default parent return nil, labobj end local cats = get_categories_for_label(parent, labobj.lang) if not cats then error(("Label '%s' for category '%s' (defined in module %s) specified parent label '%s' but that parent label couldn't be located"):format( labobj.canonical, category, labobj.module, parent)) end if #cats > 0 then return cats[1], labobj end -- FIXME: If the parent doesn't specify any categories, should we try the next parent or fall back -- to the parent determined through get_default_parent_cat_from_category (which is what we currently			-- do)? return nil, labobj end end return nil, nil end

-- To avoid the need to scrape every category, we keep a list of those categories that satisfy the following: -- (a) They are a dialect category; -- (b) They occur as the parent category of some other dialect category; -- (c) They are not the name of a known language (including etymology-only languages) or contain a known language as a --    suffix. -- Condition (c) is necessary because we automatically scrape categories that have a language suffix, since they're -- likely to be dialect categories. local dialect_parent_cats_to_scrape = m_table.listToSet { "Assyrian", "Babylonian", "Limburgan-Ripuarian transitional dialects", "North Sea Germanic", "Ripuarian Franconian", }

-- Handle dialect categories such as Category:New Zealand English, Category:Late Middle English, -- Category:Arbëresh Albanian, Category:Provençal or arbitrarily-named categories like -- Category:Issime Walser. We currently require that dialect=1 is specified to the call to to avoid -- overfiring. However, if called from inside, we are processing the breadcrumb for the parent (or conceivably the -- child) of a dialect category, and won't have any params set, so we can't rely on dialect=1. In that case, only fire -- if the category is or ends in the name of a full or etymology-only language, and scrape the category's call to -- to get the appropriate params. This means that nonstandardly-named categories like -- Category:Issime Walser can't be parents of other dialect categories. To work around this, either we have to -- relax the code below to operate on all raw categories (not necessarily a good idea), or we rename the -- nonstandardly-named categories (e.g. in the case above, to Category:Issime Walser German, since Walser German -- is a recognized etymology-only language). -- -- NOTE: We are able to handle categories for etymology-only families (currently only Category:Middle Iranian and -- Category:Old Iranian) and for etymology-only substrate languages (e.g. Category:The BMAC substrate). -- There is some special "family" code for the former. local function dialect_handler(category, raw_args, called_from_inside) -- Try to figure out if this variety is extinct or reconstructed, if type= not given. local function determine_lect_type(lang, default_parent_cat) if category:find("^Proto%-") or lang:getCanonicalName:find("^Proto%-") or lang:hasType("reconstructed") then -- Is it reconstructed? return "reconstructed" end if lang:getCode:find("^qsb%-") then -- Substrate. return "unattested" end if lang:hasType("full") then -- If a full language, scrape the call and check for extinct=1. local parent_args = scrape_category_for_auto_cat_args(lang:getCategoryName) if parent_args and ine(parent_args.extinct) and require("Module:yesno")(parent_args.extinct, false) then return "extinct" end end -- Otherwise, call the dialect handler recursively for the parent category. This is correct e.g. for -- things like subvarieties of Classical Persian, where the lang itself (Persian) isn't extinct but the -- parent category refers to an extinct variety. If the dialect handler fails to return a type, it's because -- the parent category doesn't exist or isn't defined using, and doesn't have a language as a -- suffix. In that case, if we're dealing with an etymology-only language, check the parent language. Finally, -- fall back to returning "extant" if all else fails. local parent_type if default_parent_cat then _, parent_type = dialect_handler(default_parent_cat, nil, true) end if parent_type then return parent_type end local parent_lang = lang:getParent if parent_lang then return determine_lect_type(parent_lang, nil) end return "extant" end

if called_from_inside then -- Avoid infinite loops from wrongly processing non-lect categories. We have a check around line 344 below -- for categories whose doesn't say dialect=1, but we still need the following in case of -- non-existent categories we're being asked to process (e.g. Category:User bcc ->		-- Category:Southern Balochi (nonexistent) -> Category:Regional Baluchi (nonexistent), which		-- causes an infinite loop without the check below.		if category:find("^Regional ") or category:find("^Varieties of ") or category:find("^Rhymes:") then			return nil		end

-- If called from inside we won't have any params available. See comment above about this. We scrape the -- category page's call to to get the appropriate params, and if that fails, we currently fall back -- to defaults based on the name of the category. Since the call from inside is only to get the parent category -- and breadcrumb, these defaults actually work in most cases but not all; e.g. in the chain -- Category:Regional Yoruba -> Category:Central Yoruba -> Category:Ekiti Yoruba -> -- Category:Akurẹ Yoruba, if we are forced to use default values, we will produce the right parent for -- Category:Central Yoruba but not for Category:Ekiti Yoruba, where the default parent would be -- Category:Regional Yoruba instead of the correct Category:Central Yoruba. local lang, breadcrumb = split_region_lang(category) if lang or dialect_parent_cats_to_scrape[category] then raw_args = scrape_category_for_auto_cat_args(category) if raw_args and not ine(raw_args.dialect) then -- We are scraping something like Category:American Sign Language that ends in a valid language but is not -- a dialect. return nil end if not raw_args then if not lang then -- We were instructed to scrape by virtue of `dialect_parent_cats_to_scrape`, but couldn't scrape -- anything. return nil end -- If we can't parse the scraped spec, return default values. This helps e.g. in converting -- from the old template and generally when adding new varieties. track("dialect") local default_parent_cat local all_labels, sorted_labels = get_sorted_labels(category, lang) if sorted_labels then default_parent_cat = get_default_parent_cat_from_sorted_labels(sorted_labels, category) end if not default_parent_cat then default_parent_cat = get_default_parent_cat_from_category(category, lang) end -- NOTE: When called from inside, the description doesn't matter; nor do any parents other than the -- first. This is because called_from_inside is only set when computing the breadcrumb trail, which -- only needs the language, first parent and breadcrumb. return { -- FIXME, allow etymological codes here lang = get_returnable_lang(lang), description = "Foo", parents = {default_parent_cat}, breadcrumb = breadcrumb or lang:getCanonicalName, umbrella = false, can_be_empty = true, }, determine_lect_type(lang, default_parent_cat) end else return nil end end

if not called_from_inside and not ine(raw_args.dialect) then return nil end

1. Process parameters. ---

local params = { [1] = {},		dialect = {type = "boolean"}, lang = {}, verb = {}, prep = {}, def = {}, fulldef = {}, addl = {}, nolink = {type = "boolean"}, noreg = {type = "boolean"}, -- don't make the default parent be "Regional LANG"; instead, "Varieties of LANG" type = {}, -- "extinct", "extant", "reconstructed", "unattested", "constructed" cat = {}, othercat = {}, -- comma-separated country = {}, -- comma-separated wp = {}, wikidata = {}, breadcrumb = {}, pagename = {}, -- for testing or demonstration }

local args = require("Module:parameters").process(raw_args, params)

local allowed_type_values = {"extinct", "extant", "reconstructed", "unattested", "constructed"} if args.type and not m_table.contains(allowed_type_values, args.type) then error(("Unrecognized value '%s' for type=; should be one of %s"):format( args.type, table.concat(allowed_type_values, ", "))) end

2. Determine the breadcrumb. ---

-- Also initialize regiondesc from the category name. It may be overridden later.

local lang, breadcrumb, regiondesc, langname local region category = args.pagename or category if not args.lang then lang, breadcrumb = split_region_lang(category) if not lang then error(("lang= not given and unable to parse language from category '%s'"):format(category)) end langname = lang:getCanonicalName regiondesc = breadcrumb else lang = m_languages.getByCode(args.lang, "lang", "allow etym") langname = lang:getCanonicalName if category == ucfirst(category) then -- breadcrumb and regiondesc should stay nil; breadcrumb will get `category` as a default, and the lack of -- regiondesc will cause an error to be thrown unless the user gave it explicitly or specified def=. else breadcrumb = category:match("^(.*) " .. pattern_escape(langname) .. "$") if not breadcrumb then -- Try to infer the region from the parent. See comment at function. breadcrumb = infer_region_from_lang(category, lang) end regiondesc = breadcrumb end end

-- If no breadcrumb, this often happens when the langname and category are the same (happens only with etym-only	-- languages), and the parent category is set below to the full parent, so the breadcrumb should show the -- language name (or equivalently, the category). If the langname and category are different, we should fall back to -- the category. E.g. for Singlish, lang=en is specified and we can't infer a breadcrumb because the dialect name -- doesn't end in "English"; in this case we want the breadcrumb to show "Singlish". breadcrumb = args.breadcrumb or breadcrumb or category

3. Initialize `additional` with user-specified additional text. ---

local additional = args.addl

local function append_addl(addl_text) if not addl_text then return end if additional then additional = additional .. "\n\n" .. addl_text else additional = addl_text end end

4. Augment `additional` with information about etymology-only codes. ---

local parents = {} local langname_for_desc local etymcodes = {} local function make_code(code) return (" "):format(code) end if lang:hasType("etymology-only") and ucfirst(langname) == category then langname_for_desc = lang:getParentName local langcode = lang:getCode table.insert(etymcodes, make_code(langcode)) -- Find all alias codes for the etymology-only language. -- FIXME: There should be a better/easier way of doing this. local ety_code_to_name = mw.loadData("Module:etymology languages/code to canonical name") for code, canon_name in pairs(ety_code_to_name) do			if canon_name == langname and code ~= langcode then table.insert(etymcodes, make_code(code)) end end local addl_etym_codes = ("Etymology-only language code: %s."):format(			m_table.serialCommaJoin(etymcodes, {conj = "or"})) append_addl(addl_etym_codes) else langname_for_desc = langname end

5. Determine labels categorizing into this category. ---

-- In the process we also add text to `additional` about these labels.

local all_labels, sorted_labels = get_sorted_labels(category, lang)

if all_labels then append_addl(m_labels_utilities.format_labels_categorizing(all_labels, nil, full_lang)) end

6. Determine parent categories. ---

local default_parent_cat = args.cat local label_with_parent

local function getprop(prop) return args[prop] or label_with_parent and label_with_parent.labdata[prop] end

if not default_parent_cat and sorted_labels then default_parent_cat, label_with_parent = get_default_parent_cat_from_sorted_labels(sorted_labels, category) end if not default_parent_cat then default_parent_cat = get_default_parent_cat_from_category(category, lang, getprop("noreg")) end

table.insert(parents, default_parent_cat)

local othercat = getprop("othercat") if othercat and type(othercat) == "string" then othercat = split_on_comma(othercat) end if othercat then for _, cat in ipairs(othercat) do			if not cat:find("^Category:") then cat = "Category:" .. cat end table.insert(parents, cat) end end

local countries = getprop("country") if countries and type(countries) == "string" then countries = split_on_comma(countries) end

if args[1] then regiondesc = args[1] else local regionprop = getprop("region") if regionprop then regiondesc = regionprop end end

countries = countries or {regiondesc} for _, country in ipairs(countries) do		if not country:find("[<=]") then country = require("Module:links").remove_links(country) local cat = "Category:Languages of " .. country local cat_page = mw.title.new(cat) if cat_page and cat_page.exists then table.insert(parents, cat) end end end

-- Try to figure out if this variety is extinct or reconstructed, if type= not given. local lect_type = getprop("type") if not lect_type then lect_type = determine_lect_type(lang, default_parent_cat) end local function prefix_addl(addl_text) if additional then additional = addl_text .. "\n\n" .. additional else additional = addl_text end end if lect_type == "extinct" then prefix_addl("This language variety is extinct.") table.insert(parents, "Category:All extinct languages") elseif lect_type == "reconstructed" then prefix_addl("This language variety is reconstructed.") table.insert(parents, "Category:Reconstructed languages") elseif lect_type == "unattested" then prefix_addl("This language variety is .") table.insert(parents, "Category:Unattested languages") elseif lect_type == "constructed" then prefix_addl("This language variety is constructed.") table.insert(parents, "Category:Constructed languages") end

7. Compute `description`. ---

local description

local fulldef = getprop("fulldef") if fulldef then description = fulldef .. "."	end

if not description then local def = getprop("def") if def then description = ("Terms or senses in %s."):format(def) end end

if not description then if not regiondesc then -- We need regiondesc for the description unless def= or fulldef= is given, which overrides the part that needs it. error(("1= (region) not given and unable to infer region from category '%s' given language name '%s'"):				format(category, langname)) end

local lang_en = m_languages.getByCode("en", true)

local linked_regiondesc = regiondesc if linked_regiondesc then if linked_regiondesc:find(" ") then if not countries then error(("Can't specify in region description '%s' when country= not given"):format(linked_regiondesc)) end -- Link the countries individually before calling serialCommaJoin, which inserts HTML. local linked_countries = {} for _, country in ipairs(countries) do					-- don't try to link if HTML or = sign found in country if not country:find("[<=]") then country = require("Module:links").full_link { lang = lang_en, term = country } end table.insert(linked_countries, country) end linked_countries = m_table.serialCommaJoin(linked_countries) linked_regiondesc = linked_regiondesc:gsub(" ", require(pattern_utilities_module).replacement_escape(linked_countries)) elseif not getprop("nolink") and not linked_regiondesc:find("[<=]") then -- Even if nolink not given, don't try to link if HTML or = sign found in linked_regiondesc, otherwise we're -- likely to get an error. linked_regiondesc = require("Module:links").full_link { lang = lang_en, term = linked_regiondesc } end end local verb = getprop("verb") or "spoken" local prep = getprop("prep")

description = ("Terms or senses in %s as %s%s %s."):format(			langname_for_desc, verb, prep == "-" and "" or " " .. (prep or "in"), linked_regiondesc) end

8. Compute the Wikipedia articles that go into `topright`. ---

local topright_parts = {} -- Insert Wikipedia article `article` for Wikimedia language `wmcode` into `topright_parts`, avoiding duplication. local function insert_wikipedia_article(wmcode, article) m_table.insertIfNot(topright_parts, (""):format( wmcode == "en" and "" or "|lang=" .. wmcode, article == category and "" or "|" .. article ))	end

local function insert_wikipedia_articles_for_wikipedia_specs(specs, default) for _, article in ipairs(specs) do			local foreign_wiki if article == true then article = default else if article:find(":[^ ]") then local actual_article foreign_wiki, actual_article = article:match("^([a-z][a-z][a-z-]*):([^ ].*)$") if actual_article then article = actual_article end end if article == "+" then article = default elseif article == "-" then article = nil else article = require("Module:yesno")(article, article) if article == true then article = default end end end if article then insert_wikipedia_article(foreign_wiki or "en", article) end end end

local function insert_wikipedia_articles_for_wikidata_specs(specs, lang) if not mw.wikibase then error(("Unable to retrieve data from Wikidata ID's '%s'; `mw.wikibase` not defined"):format(args.wikidata)) end local wikipedia_langs = require(labels_module).get_langs_to_extract_wikipedia_articles_from_wikidata(lang) local ids_without_wmcodes = {} local ids_with_wmcodes = {} for _, id in ipairs(specs) do			if id:find(":") then table.insert(ids_with_wmcodes, id) else table.insert(ids_without_wmcodes, id) end end for _, wmcode in ipairs(wikipedia_langs) do			for _, id in ipairs(ids_without_wmcodes) do local article = mw.wikibase.sitelink(id, wmcode .. "wiki") if article then insert_wikipedia_article(wmcode, article) end end end for _, id in ipairs(ids_with_wmcodes) do			local wmcode, wikidata_id = id:match("^(.-):(.*)$") local article = mw.wikibase.sitelink(wikidata_id, wmcode .. "wiki") if article then insert_wikipedia_article(wmcode, article) end end end

if args.wp or args.wikidata then if args.wp then insert_wikipedia_articles_for_wikipedia_specs(split_on_comma(args.wp), category) end if args.wikidata then insert_wikipedia_articles_for_wikidata_specs(rsplit(args.wikidata, "%s*,%s*"), lang) end elseif pagename == ucfirst(langname) then local topright_parts = {} local wikipedia_langs = require(labels_module).get_langs_to_extract_wikipedia_articles_from_wikidata(lang) for _, wmcode in ipairs(wikipedia_langs) do local article = lang:getWikipediaArticle("no category fallback", wmcode .. "wiki") if article then insert_wikipedia_article(wmcode, article) end end end if #topright_parts == 0 and sorted_labels then for _, labobj in pairs(all_labels) do			local wp_specs = labobj.labdata.Wikipedia if wp_specs then if type(wp_specs) ~= "table" then wp_specs = {wp_specs} end insert_wikipedia_articles_for_wikipedia_specs(wp_specs, labobj.canonical) end local wikidata_specs = labobj.labdata.Wikidata if wikidata_specs then if type(wikidata_specs) ~= "table" then wikidata_specs = {wikidata_specs} end insert_wikipedia_articles_for_wikidata_specs(wikidata_specs, labobj.lang) end end end

local topright if #topright_parts > 0 then topright = table.concat(topright_parts) end

9. Return the combined structure of all information. ---

track("dialect") return { -- FIXME, allow etymological codes here lang = get_returnable_lang(lang), topright = topright, description = description, additional = additional, parents = parents, breadcrumb = {name = breadcrumb, nocap = true}, umbrella = false, can_be_empty = true, }, lect_type end

-- Actual handler for dialect categories. See dialect_handler above. table.insert(raw_handlers, function(data)	local settings, _ = dialect_handler(data.category, data.args, data.called_from_inside)	return settings, not not settings end)

return {RAW_CATEGORIES = raw_categories, RAW_HANDLERS = raw_handlers}