Module:User:Wpstatus/languages

local export = {}

local function track(page, code) local tracking_page = "languages/" .. page if code then require("Module:debug/track"){tracking_page, tracking_page .. "/" .. code} else require("Module:debug/track")(tracking_page) end return true end

local checkObject = require("Module:utilities").check_object

local function make_language(code, data, useRequire) local function conditionalRequire(modulename) if useRequire then return require(modulename) else return mw.loadData(modulename) end end -- Temporarily convert various formatting characters to PUA to prevent them from being disrupted by the substitution process. local function doTempSubstitutions(text, subbedChars, keepCarets, noTrim) -- Clone so that we don't insert any extra patterns into the table in package.loaded. For some reason, using require seems to keep memory use down; probably because the table is always cloned. local patterns = require("Module:table").shallowcopy(require("Module:languages/data/patterns")) if keepCarets then table.insert(patterns, "((\\+)%^)") table.insert(patterns, "((%^))") end -- Ensure any whitespace at the beginning and end is temp substituted, to prevent it from being accidentally trimmed. We only want to trim any final spaces added during the substitution process (e.g. by a module), which means we only do this during the first round of temp substitutions. if not noTrim then table.insert(patterns, "^([\128-\191\244]*(%s+))") table.insert(patterns, "((%s+)[\128-\191\244]*)$") end -- Pre-substitution, of "" and "", which makes pattern matching more accurate. text = text :gsub("%f[%[]%[%[", "\1") :gsub("%f[%]]%]%]", "\2") local i, pe = #subbedChars, require("Module:utilities").pattern_escape for j, pattern in ipairs(patterns) do -- Patterns ending in \0 stand are for things like "" or ""), so the inserted PUA are treated as breaks between terms by modules that scrape info from pages.			local term_divider			pattern = pattern:gsub("%z$", function(divider) term_divider = divider == "\0" return "" end)			text = text:gsub(pattern, function(...) local m = {...} local m1New = m[1] for k = 2, #m do					local n = i + k - 1 subbedChars[n] = m[k] local byte2 = math.floor(n / 4096) % 64 + (term_divider and 128 or 136) local byte3 = math.floor(n / 64) % 64 + 128 local byte4 = n % 64 + 128 m1New = m1New:gsub(pe(m[k]), "\244" .. string.char(byte2) .. string.char(byte3) .. string.char(byte4), 1) end i = i + #m - 1 return m1New end)		end		text = text			:gsub("\1", "%[%[")			:gsub("\2", "%]%]")		return text, subbedChars	end	-- Reinsert any formatting that was temporarily substituted.	local function undoTempSubstitutions(text, subbedChars)		local pe = require("Module:utilities").pattern_escape		for i = 1, #subbedChars do			local byte2 = math.floor(i / 4096) % 64 + 128			local byte3 = math.floor(i / 64) % 64 + 128			local byte4 = i % 64 + 128			text = text:gsub("\244[" .. string.char(byte2) .. string.char(byte2+8) .. "]" .. string.char(byte3) .. string.char(byte4), pe(subbedChars[i]))		end		text = text			:gsub("\1", "%[%[")			:gsub("\2", "%]%]")		return text	end	-- Convert any HTML entities.	local function noEntities(text)		if text:match("&[^;]+;") then			return require("Module:utilities").get_entities(text)		else			return text		end	end	-- Check if the raw text is an unsupported title, and if so return that. Otherwise, remove HTML entities. We do the pre-conversion to avoid loading the unsupported title list unnecessarily.	local function checkNoEntities(text)		local textNoEnc = noEntities(text)		if textNoEnc ~= text and conditionalRequire("Module:links/data").unsupported_titles[text] then			return text		else			return textNoEnc		end	end	-- If no script object is provided (or if it's invalid or None), get one.	local function checkScript(text, self, sc)		if not checkObject("script", true, sc) or sc:getCode == "None" then			return self:findBestScript(text) else return sc		end end local function normalize(text, sc) text = sc:fixDiscouragedSequences(text) return sc:toFixedNFD(text) end -- Split the text into sections, based on the presence of temporarily substituted formatting characters, then iterate over each one to apply substitutions. This avoids putting PUA characters through language-specific modules, which may be unequipped for them. local function iterateSectionSubstitutions(text, subbedChars, keepCarets, self, sc, substitution_data, function_name) local pe = require("Module:utilities").pattern_escape local fail, cats, sections = nil, {} -- See Module:languages/data. if not text:match("\244") or conditionalRequire("Module:languages/data").contiguous_substitution[self:getCode] then sections = {text} else sections = mw.text.split(text, "[􀀀-􏿽]") end --error(dump(sections)) for i, section in ipairs(sections) do -- Don't bother processing empty strings or whitespace (which may also not be handled well by dedicated modules). if section:gsub("%s", "") ~= "" then --error(section) local sub, sub_fail, sub_cats = require("Module:User:Wpstatus/languages/sub")(section, self, sc, substitution_data, function_name) error(sub) -- Second round of temporary substitutions, in case any formatting was added by the main substitution process. However, don't do this if the section contains formatting already (as it would have had to have been escaped to reach this stage, and therefore should be given as raw text). if sub and subbedChars then local noSub for _, pattern in ipairs(require("Module:languages/data/patterns")) do if section:match(pattern .. "%z?") then noSub = true end end if not noSub then sub, subbedChars = doTempSubstitutions(sub, subbedChars, keepCarets, true) end end if (not sub) or sub_fail then text = sub fail = sub_fail cats = sub_cats or {} break end error(sub .. " !! " .. text:gsub(pe(section), pe(sub), 1) .. " ?? " .. text) text = sub and text:gsub(pe(section), pe(sub), 1) or text if type(sub_cats) == "table" then for _, cat in ipairs(sub_cats) do						table.insert(cats, cat) end end end end -- Trim, unless there are only spacing characters, while ignoring any final formatting characters. text = text and text :gsub("^([\128-\191\244]*)%s+(%S)", "%1%2") :gsub("(%S)%s+([\128-\191\244]*)$", "%1%2") -- Remove duplicate categories. if #cats > 1 then cats = require("Module:table").removeDuplicates(cats) end return text, fail, cats, subbedChars end -- Process carets (and any escapes). Default to simple removal, if no pattern/replacement is given. local function processCarets(text, pattern, repl) local rep repeat text, rep = text:gsub("\\\\(\\*^)", "\3%1") until rep == 0 return text :gsub("\\^", "\4") :gsub(pattern or "%^", repl or "") :gsub("\3", "\\") :gsub("\4", "^") end -- Remove carets if they are used to capitalize parts of transliterations (unless they have been escaped). local function removeCarets(text, sc) if not sc:hasCapitalization and sc:isTransliterated and text:match("%^") then return processCarets(text) else return text end end local Language = {} --[==[Returns the language code of the language. Example: lua for French.]==] function Language:getCode return self._code end --[==[Returns the canonical name of the language. This is the name used to represent that language on Wiktionary, and is guaranteed to be unique to that language alone. Example: lua for French.]==] function Language:getCanonicalName return self._rawData[1] end --[==[Returns the display form of the language. The display form of a language, family or script is the form it takes when appearing as the SOURCE in categories such as  or , and is also the displayed text in   links. For regular and etymology languages, this is the same as the canonical name, but for families, it reads "NAME languages" (e.g. lua), and for scripts, it reads "NAME script" (e.g. lua).]==] function Language:getDisplayForm if not self._displayForm then local form = self:getCanonicalName -- Add article and " substrate" if a substrate that lacks them. if self:getFamilyCode == "qfa-sub" then if not (form:find("^[Tt]he ") or form:find("^[Aa] ")) then form = "a " .. form end if not form:find("[Ss]ubstrate") then form = form .. " substrate" end end self._displayForm = form end return self._displayForm end --[==[Returns a table of the "other names" that the language is known by, excluding the canonical name. The names are not guaranteed to be unique, in that sometimes more than one language is known by the same name. Example: lua for Manx. If  is given and is non-lua, only names explicitly listed in the   field are returned; otherwise, names listed under ,   and   are combined together and returned. For example, for Manx, Manx Gaelic is listed as an alias, while Northern Manx and Southern Manx are listed as varieties. It should be noted that the  field itself is deprecated, and entries listed there should eventually be moved to either   or  .]==] function Language:getOtherNames(onlyOtherNames) if #self._stack == 1 then self:loadInExtraData end return require("Module:language-like").getOtherNames(self, onlyOtherNames) end --[==[Returns a table of the aliases that the language is known by, excluding the canonical name. Aliases are synonyms for the language in question. The names are not guaranteed to be unique, in that sometimes more than one language is known by the same name. Example: lua for German.]==] function Language:getAliases if #self._stack == 1 then self:loadInExtraData end return self._rawData.aliases or (self._extraData and self._extraData.aliases) or {} end --[==[Returns a table of the known subvarieties of a given language, excluding subvarieties that have been given explicit etymology language codes. The names are not guaranteed to be unique, in that sometimes a given name refers to a subvariety of more than one language. Example: lua for Aymara. Note that the returned value can have nested tables in it, when a subvariety goes by more than one name. Example: lua for Azerbaijani. Here, for example, Afshar, Afshari, Afshar Azerbaijani and Afchar all refer to the same subvariety, whose preferred name is Afshar (the one listed first). To avoid a return value with nested tables in it, specify a non-lua value for the  parameter; in that case, the return value would be lua.]==] function Language:getVarieties(flatten) if #self._stack == 1 then self:loadInExtraData end return require("Module:language-like").getVarieties(self, flatten) end --[==[Given a list of types as strings, returns true if the language has all of them. Possible types are explained in Module:languages/data/2 and Module:etymology languages/data.]==] function Language:hasType(...) if not self._type then self._type = {language = true} if self:getFullCode == self:getCode then self._type.full = true else self._type["etymology-only"] = true end for _, type in ipairs(mw.text.split(self._rawData.type, "%s*,%s*")) do				self._type[type] = true end end for _, type in ipairs{...} do			if not self._type[type] then return false end end return true end --[==[Returns a table containing  objects (see Module:wikimedia languages), which represent languages and their codes as they are used in Wikimedia projects for interwiki linking and such. More than one object may be returned, as a single Wiktionary language may correspond to multiple Wikimedia languages. For example, Wiktionary's single code  (Serbo-Croatian) maps to four Wikimedia codes:   (Serbo-Croatian),   (Bosnian),   (Croatian) and   (Serbian). The code for the Wikimedia language is retrieved from the  property in the data modules. If that property is not present, the code of the current language is used. If none of the available codes is actually a valid Wikimedia code, an empty table is returned.]==] function Language:getWikimediaLanguages if not self._wikimediaLanguageObjects then local m_wikimedia_languages = require("Module:wikimedia languages") self._wikimediaLanguageObjects = {} local wikimedia_codes = self:getWikimediaLanguageCodes for _, wlangcode in ipairs(wikimedia_codes) do				table.insert(self._wikimediaLanguageObjects, m_wikimedia_languages.getByCode(wlangcode)) end end return self._wikimediaLanguageObjects end function Language:getWikimediaLanguageCodes if not self._wikimediaLanguageCodes then self._wikimediaLanguageCodes = self._rawData.wikimedia_codes or {self:getCode} end return self._wikimediaLanguageCodes end --[==[Returns the name of the Wikipedia article for the language. If the property  is present in the data module it will be used first, otherwise a sitelink will be generated from   (if set). Otherwise  is used as fallback.]==] function Language:getWikipediaArticle if not self._wikipedia_article then if self._rawData.wikipedia_article then self._wikipedia_article = self._rawData.wikipedia_article elseif self:getWikidataItem and mw.wikibase then self._wikipedia_article = mw.wikibase.sitelink(self:getWikidataItem, 'enwiki') end if not self._wikipedia_article then self._wikipedia_article = self:getCategoryName:gsub("Creole language", "Creole") end end return self._wikipedia_article end function Language:makeWikipediaLink return "" .. self:getCanonicalName .. "" end --[==[Returns the Wikidata item id for the language or. This corresponds to the the second field in the data modules.]==] function Language:getWikidataItem if not self._WikidataItem then local item = self._rawData[2] if type(item) == "number" then self._WikidataItem = "Q" .. item else self._WikidataItem = item end end return self._WikidataItem end --[==[Returns a table of  objects for all scripts that the language is written in. See Module:scripts.]==] function Language:getScripts if not self._scriptObjects then self._scriptObjects = {} if self:getScriptCodes[1] == "All" then self._scriptObjects = conditionalRequire("Module:scripts/data") else for _, sc in ipairs(self:getScriptCodes) do					table.insert(self._scriptObjects, require("Module:scripts").getByCode(sc, nil, nil, useRequire)) end end end return self._scriptObjects end --[==[Returns the table of script codes in the language's data file.]==] function Language:getScriptCodes if not self._scriptCodes then self._scriptCodes = self._rawData[4] or {"None"} end return self._scriptCodes end --[==[Given some text, this function iterates through the scripts of a given language and tries to find the script that best matches the text. It returns a lua object representing the script. If no match is found at all, it returns the lua script object.]==] function Language:findBestScript(text, forceDetect) if (not text) or text == "" or text == "-" then return require("Module:scripts").getByCode("None", nil, nil, useRequire) end if table.concat(self:getScriptCodes) == "All" then return require("Module:scripts").findBestScriptWithoutLang(text) end local scripts = self:getScripts if not scripts[2] and not forceDetect then -- Necessary, because Hani covers the entire Han range (while the Hant & Hans lists don't list shared characters). if scripts[1]:getCode:match("^Han") and require("Module:scripts").getByCode("Hani", nil, nil, useRequire):countCharacters(text) > 0 then return scripts[1] elseif scripts[1]:countCharacters(text) > 0 then return scripts[1] else return require("Module:scripts").getByCode("None", nil, nil, useRequire) end end return require("Module:languages/findBestScript")(export, self, text, scripts, forceDetect, useRequire) end --[==[Returns a  object for the language family that the language belongs to. See Module:families.]==] function Language:getFamily if self._familyObject == nil then local familyCode = self:getFamilyCode if familyCode then self._familyObject = require("Module:families").getByCode(familyCode, useRequire) -- Still memoize a nil result. else self._familyObject = false end end return self._familyObject or nil end --[==[Returns the family code in the language's data file.]==] function Language:getFamilyCode if not self._familyCode then self._familyCode = self._rawData[3] end return self._familyCode end function Language:getFamilyName if self._familyName == nil then local family = self:getFamily if family then self._familyName = family:getCanonicalName else self._familyName = false end end return self._familyName or nil end --[==[Check whether the language belongs to `family` (which can be a family code or object). A list of objects can be given in place of `family`; in that case, return true if the language belongs to any of the specified families. Note that some languages (in particular, certain creoles) can have multiple immediate ancestors potentially belonging to different families; in that case, return true if the language belongs to any of the specified families.]==] function Language:inFamily(...) --checkObject("family", nil, ...) for _, family in ipairs{...} do			if type(family) == "table" then family = family:getCode end if not self:getFamilyCode then return false elseif self:getFamilyCode == family or self:getFamily:inFamily(family) then return true else local ancestors = self:getAncestors for _, ancestor in ipairs(ancestors) do					if ancestor:inFamily(family) then return true end end end end return false end function Language:getParent if self._parentObject == nil then local parentCode = self:getParentCode if parentCode then self._parentObject = export.getByCode(parentCode, nil, true, true, useRequire) else self._parentObject = false end end return self._parentObject or nil end function Language:getParentCode if not self._parentCode then self._parentCode = self._rawData[5] end return self._parentCode end function Language:getParentName if self._parentName == nil then local parent = self:getParent if parent then self._parentName = parent:getCanonicalName else self._parentName = false end end return self._parentName or nil end function Language:getParentChain if not self._parentChain then self._parentChain = {} local parent = self:getParent while parent do				table.insert(self._parentChain, parent) parent = parent:getParent end end return self._parentChain end function Language:hasParent(...) --checkObject("language", nil, ...) for _, otherlang in ipairs{...} do			for _, parent in ipairs(self:getParentChain) do				if type(otherlang) == "string" then if otherlang == parent:getCode then return true end else if otherlang:getCode == parent:getCode then return true end end end end return false end --[==[If the language is an etymology language, this iterates through parents until a regular language or family is found, and the corresponding object is returned. If the language is a regular language, then it simply returns the language.]==] function Language:getFull if not self._nonEtymologicalObject then local nonEtymologicalCode = self:getFullCode if nonEtymologicalCode ~= self:getCode then self._nonEtymologicalObject = export.getByCode(nonEtymologicalCode, nil, nil, nil, useRequire) else self._nonEtymologicalObject = self end end return self._nonEtymologicalObject end function Language:getFullCode return self._nonEtymologicalCode or self:getCode end function Language:getFullName if self._nonEtymologicalName == nil then local nonEtymological = self:getFull if nonEtymological then self._nonEtymologicalName = nonEtymological:getCanonicalName else self._nonEtymologicalName = false end end return self._nonEtymologicalName or nil end --[==[Returns a table of  objects for all languages that this language is directly descended from. Generally this is only a single language, but creoles, pidgins and mixed languages can have multiple ancestors.]==] function Language:getAncestors if not self._ancestorObjects then self._ancestorObjects = {} local ancestors = require("Module:table").shallowcopy(self:getAncestorCodes) if #ancestors > 0 then for _, ancestor in ipairs(ancestors) do					table.insert(self._ancestorObjects, export.getByCode(ancestor, nil, true, nil, useRequire)) end else local fam = self:getFamily local protoLang = fam and fam:getProtoLanguage or nil -- For the cases where the current language is the proto-language -- of its family, or an etymology language that is ancestral to that -- proto-language, we need to step up a level higher right from the -- start. if protoLang and (					protoLang:getCode == self:getCode or					(self:hasType("etymology-only") and protoLang:hasAncestor(self))				) then fam = fam:getFamily protoLang = fam and fam:getProtoLanguage or nil end while not protoLang and not (not fam or fam:getCode == "qfa-not") do					fam = fam:getFamily protoLang = fam and fam:getProtoLanguage or nil end table.insert(self._ancestorObjects, protoLang) end end return self._ancestorObjects end function Language:getAncestorCodes if not self._ancestorCodes then local function get_codes(lang) return lang._rawData.ancestors or {} end local codes = get_codes(self) -- Avoid a language being its own ancestor via class inheritance. We only need to check for this if the language has inherited an ancestor table from its parent, because we never want to drop ancestors that have been explicitly set in the data. -- Recursively iterate over ancestors until we find a loop/run out. If a loop is found that involves the language, drop that ancestor. if #codes > 0 and #self._stack > 1 and not self._stack[#self._stack].ancestors then local function check_ancestor(i, code, seen) if seen[code] then if code == self:getCode then table.remove(codes, i)						end else seen[code] = true local ancestor = export.getByCode(code, nil, true, nil, useRequire) for _, ancestorCode in ipairs(get_codes(ancestor)) do							check_ancestor(i, ancestorCode, seen) end end end for i, ancestorCode in ipairs(codes) do					local seen = {[self:getCode] = true} check_ancestor(i, ancestorCode, seen) end end self._ancestorCodes = codes end return self._ancestorCodes end --[==[Given a list of language objects or codes, returns true if at least one of them is an ancestor. This includes any etymology-only children of that ancestor. If the language's ancestor(s) are etymology-only languages, it will also return true for those language parent(s) (e.g. if Vulgar Latin is the ancestor, it will also return true for its parent, Latin). However, a parent is excluded from this if the ancestor is also ancestral to that parent (e.g. if Classical Persian is the ancestor, Persian would return false, because Classical Persian is also ancestral to Persian).]==] function Language:hasAncestor(...) --checkObject("language", nil, ...) local function iterateOverAncestorTree(node, func, parent_check) local ancestors = node:getAncestors local ancestorsParents = {} for _, ancestor in ipairs(ancestors) do				local ret = func(ancestor) or iterateOverAncestorTree(ancestor, func, parent_check) if ret then return ret end end -- Check the parents of any ancestors. We don't do this if checking the parents of the other language, so that we exclude any etymology-only children of those parents that are not directly related (e.g. if the ancestor is Vulgar Latin and we are checking New Latin, we want it to return false because they are on different ancestral branches. As such, if we're already checking the parent of New Latin (Latin) we don't want to compare it to the parent of the ancestor (Latin), as this would be a false positive; it should be one or the other). if not parent_check then return nil end for _, ancestor in ipairs(ancestors) do				local ancestorParents = ancestor:getParentChain for _, ancestorParent in ipairs(ancestorParents) do					if ancestorParent:getCode == self:getCode or ancestorParent:hasAncestor(ancestor) then break else table.insert(ancestorsParents, ancestorParent) end end end for _, ancestorParent in ipairs(ancestorsParents) do				local ret = func(ancestorParent) if ret then return ret end end end local parent_check = true for _, otherlang in ipairs{...} do			repeat if iterateOverAncestorTree(					self,					function(ancestor)						if type(otherlang) == "string" then							return ancestor:getCode == otherlang						else							return ancestor:getCode == otherlang:getCode						end					end,					parent_check				) then return true elseif type(otherlang) == "string" then otherlang = export.getByCode(otherlang, nil, true, nil, useRequire) end otherlang = otherlang:getParent parent_check = false until not otherlang end return false end function Language:getAncestorChain if not self._ancestorChain then self._ancestorChain = {} local step = self while true do				local ancestors = step:getAncestors step = #ancestors == 1 and ancestors[1] or nil if not step then break end table.insert(self._ancestorChain, 1, step) end end return self._ancestorChain end local function fetch_descendants(self, format) local languages = require("Module:languages/code to canonical name") local etymology_languages = require("Module:etymology languages/code to canonical name") local families = require("Module:families/code to canonical name") local descendants = {} local family = self:getFamily -- Iterate over all three datasets. for _, data in ipairs{languages, etymology_languages, families} do			for code in pairs(data) do				local lang = export.getByCode(code, nil, true, true, useRequire) -- Test for a descendant. Earlier tests weed out most candidates, while the more intensive tests are only used sparingly. if (					( -- Not an alias code. (not lang._rawData.main_code) or						lang._rawData.main_code == code ) and					code ~= self:getCode and -- Not self.					lang:inFamily(family) and -- In the same family.					( family:getProtoLanguageCode == self:getCode or -- Self is the protolanguage. self:hasDescendant(lang) or -- Full hasDescendant check. (lang:getFullCode == self:getCode and not self:hasAncestor(lang)) -- Etymology-only child which isn't an ancestor. )				) then if format == "object" then table.insert(descendants, lang) elseif format == "code" then table.insert(descendants, code) elseif format == "name" then table.insert(descendants, lang:getCanonicalName) end end end end return descendants end function Language:getDescendants if not self._descendantObjects then self._descendantObjects = fetch_descendants(self, "object") end return self._descendantObjects end function Language:getDescendantCodes if not self._descendantCodes then self._descendantCodes = fetch_descendants(self, "code") end return self._descendantCodes end function Language:getDescendantNames if not self._descendantNames then self._descendantNames = fetch_descendants(self, "name") end return self._descendantNames end function Language:hasDescendant(...) for _, lang in ipairs{...} do			if type(lang) == "string" then lang = export.getByCode(lang, nil, true, nil, useRequire) end if lang:hasAncestor(self) then return true end end return false end --[==[Returns the name of the main category of that language. Example: lua for French, whose category is at Category:French language. Unless optional argument  is given, the language name at the beginning of the returned value will be capitalized. This capitalization is correct for category names, but not if the language name is lowercase and the returned value of this function is used in the middle of a sentence.]==] function Language:getCategoryName(nocap) if not self._categoryName then local name = self:getCanonicalName -- Only add " language" if a regular language. if #self._stack == 1 then -- If the name already has "language" in it, don't add it. if not name:match("[Ll]anguage$") then name = name .. " language" end end self._categoryName = name end if nocap then return self._categoryName else return mw.getContentLanguage:ucfirst(self._categoryName) end end --[==[Creates a link to the category; the link text is the canonical name.]==] function Language:makeCategoryLink return "" .. self:getDisplayForm .. "" end function Language:getStandardCharacters(sc) if type(self._rawData.standardChars) ~= "table" then return self._rawData.standardChars else if sc and type(sc) ~= "string" then checkObject("script", nil, sc) sc = sc:getCode end if (not sc) or sc == "None" then local scripts = {} for _, script in pairs(self._rawData.standardChars) do					table.insert(scripts, script) end return table.concat(scripts) end if self._rawData.standardChars[sc] then return self._rawData.standardChars[sc] .. (self._rawData.standardChars[1] or "") end end end --[==[Make the entry name (i.e. the correct page name).]==] function Language:makeEntryName(text, sc) if (not text) or text == "" then return text, nil, {} end -- Set `unsupported` as true if certain conditions are met. local unsupported -- If there's an underscore. if text:find("_") then track("underscore") unsupported = true -- If it looks like an interwiki link. elseif text:find(":") and text ~= ":" then local m_utildata = conditionalRequire("Module:utilities/data") local prefix = text:gsub("^:*(.-):.*", string.ulower) if m_utildata.interwikis[prefix] or m_utildata.namespaces[prefix] then unsupported = true end end -- Check if the text is a listed unsupported title. local unsupportedTitles = conditionalRequire("Module:links/data").unsupported_titles if unsupportedTitles[text] then return "Unsupported titles/" .. unsupportedTitles[text], nil, {} end sc = checkScript(text, self, sc) local fail, cats text = normalize(text, sc) text, fail, cats = iterateSectionSubstitutions(text, nil, nil, self, sc, self._rawData.entry_name, "makeEntryName") error(text) text = mw.ustring.gsub(text, "^[¿¡]?([^%s%p]+)%s*[؟?!;՛՜ ՞ ՟？！︖︕।॥။၊་།]?$", "%1") or text text = unsupported and "Unsupported titles/" .. text or text return text, fail, cats end --[==[Generates alternative forms using a specified method, and returns them as a table. If no method is specified, returns a table containing only the input term.]==] function Language:generateForms(text, sc) if self._rawData.generate_forms then sc = checkScript(text, self, sc) return require("Module:" .. self._rawData.generate_forms).generateForms(text, self:getCode, sc:getCode) else return {text} end end --[==[Creates a sort key for the given entry name, following the rules appropriate for the language. This removes diacritical marks from the entry name if they are not considered significant for sorting, and may perform some other changes. Any initial hyphen is also removed, and anything parentheses is removed as well. The  setting for each language in the data modules defines the replacements made by this function, or it gives the name of the module that takes the entry name and returns a sortkey.]==] function Language:makeSortKey(text, sc) if (not text) or text == "" then return text, nil, {} end if text:find("<[^<>]+>") then track("track HTML tag") end -- Remove soft hyphens, strip markers and HTML tags. text = text:gsub("­", "") text = mw.text.unstrip(text) :gsub("<[^<>]+>", "") text = mw.uri.decode(text, "PATH") text = checkNoEntities(text) -- Remove initial hyphens and * unless the term only consists of spacing + punctuation characters. text = mw.ustring.gsub(text, "^([􀀀-􏿽]*)[-־ـ᠊*]+([􀀀-􏿽]*)(.*[^%s%p].*)", "%1%2%3") sc = checkScript(text, self, sc) text = normalize(text, sc) text = removeCarets(text, sc) -- For languages with dotted dotless i, ensure that "İ" is sorted as "i", and "I" is sorted as "ı". if self:hasDottedDotlessI then text = text :gsub(mw.ustring.toNFD("İ"), "i") :gsub("I", "ı") text = sc:toFixedNFD(text) end -- Convert to lowercase, make the sortkey, then convert to uppercase. Where the language has dotted dotless i, it is usually not necessary to convert "i" to "İ" and "ı" to "I" first, because "I" will always be interpreted as conventional "I" (not dotless "İ") by any sorting algorithms, which will have been taken into account by the sortkey substitutions themselves. However, if no sortkey substitutions have been specified, then conversion is necessary so as to prevent "i" and "ı" both being sorted as "I". -- An exception is made for scripts that (sometimes) sort by scraping page content, as that means they are sensitive to changes in capitalization (as it changes the target page). local fail, cats if not sc:sortByScraping then text = text:ulower end text, fail, cats = iterateSectionSubstitutions(text, nil, nil, self, sc, self._rawData.sort_key, "makeSortKey") if not sc:sortByScraping then if self:hasDottedDotlessI and not self._rawData.sort_key then text = text :gsub("ı", "I") :gsub("i", "İ") text = sc:toFixedNFC(text) end text = text:uupper end -- Remove parentheses, as long as they are either preceded or followed by something. text = text :gsub("(.)[]+", "%1") :gsub("[]+(.)", "%1") text = require("Module:string utilities").escape_risky_characters(text) return text, fail, cats end --[==[Create the form used as as a basis for display text and transliteration.]==] local function processDisplayText(text, self, sc, keepCarets, keepPrefixes) local subbedChars = {} text, subbedChars = doTempSubstitutions(text, subbedChars, keepCarets) text = mw.uri.decode(text, "PATH") text = checkNoEntities(text) sc = checkScript(text, self, sc) local fail, cats text = normalize(text, sc) text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, keepCarets, self, sc, self._rawData.display_text, "makeDisplayText") text = removeCarets(text, sc) -- Remove any interwiki link prefixes (unless they have been escaped or this has been disabled). if text:match(":") and not keepPrefixes then local m_utildata, rep = conditionalRequire("Module:utilities/data") repeat text, rep = text:gsub("\\\\(\\*:)", "\3%1") until rep == 0 text = text :gsub("\\:", "\4") while true do				local prefix = text:gsub("^(.-):.+", function(m1)					return m1:gsub("\244[\128-\191]*", "")				end) if not prefix or prefix == text then break end local lower_prefix = prefix:ulower if not (m_utildata.interwikis[lower_prefix] or prefix == "") then break end text = text:gsub("^(.-):(.*)", function(m1, m2)					local ret = {}					for subbedChar in m1:gmatch("\244[\128-\191]*") do						table.insert(ret, subbedChar)					end					return table.concat(ret) .. m2				end) end text = text :gsub("\3", "\\") :gsub("\4", ":") end return text, fail, cats, subbedChars end --[==[Make the display text (i.e. what is displayed on the page).]==] function Language:makeDisplayText(text, sc, keepPrefixes) if (not text) or text == "" then return text, nil, {} end local fail, cats, subbedChars text, fail, cats, subbedChars = processDisplayText(text, self, sc, nil, keepPrefixes) text = require("Module:string utilities").escape_risky_characters(text) return undoTempSubstitutions(text, subbedChars), fail, cats end --[==[Transliterates the text from the given script into the Latin script (see Transliteration and romanization). The language must have the  property for this to work; if it is not present, lua is returned. Returns three values: # The transliteration. # A boolean which indicates whether the transliteration failed for an unexpected reason. If lua, then the transliteration either succeeded, or the module is returning nothing in a controlled way (e.g. the input was lua). Generally, this means that no maintenance action is required. If lua, then the transliteration is lua because either the input or output was defective in some way (e.g. Module:ar-translit will not transliterate non-vocalised inputs, and this module will fail partially-completed transliterations in all languages). Note that this value can be manually set by the transliteration module, so make sure to cross-check to ensure it is accurate. # A table of categories selected by the transliteration module, which should be in the format expected by lua in Module:utilities. The  parameter is handled by the transliteration module, and how it is handled is specific to that module. Some transliteration modules may tolerate lua as the script, others require it to be one of the possible scripts that the module can transliterate, and will show an error if it's not one of them. For this reason, the  parameter should always be provided when writing non-language-specific code. The  parameter is used to override the default module that is used to provide the transliteration. This is useful in cases where you need to demonstrate a particular module in use, but there is no default module yet, or you want to demonstrate an alternative version of a transliteration module before making it official. It should not be used in real modules or templates, only for testing. All uses of this parameter are tracked by Tracking/module_override. Known bugs: * This function assumes {tr(s1) .. tr(s2) == tr(s1 .. s2)}. When this assertion fails, wikitext markups like ''' can cause wrong transliterations. * HTML entities like, often used to escape wikitext markups, do not work.]==] function Language:transliterate(text, sc, module_override) -- If there is no text, or the language doesn't have transliteration data and there's no override, return nil. if not (self._rawData.translit or module_override) then return nil, false, {} elseif (not text) or text == "" or text == "-" then return text, false, {} end -- If the script is not transliteratable (and no override is given), return nil. sc = checkScript(text, self, sc) if not (sc:isTransliterated or module_override) then return nil, true, {} end -- Remove any strip markers. text = mw.text.unstrip(text) -- Get the display text with the keepCarets flag set. local fail, cats, subbedChars text, fail, cats, subbedChars = processDisplayText(text, self, sc, true) -- Transliterate (using the module override if applicable). text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, true, self, sc, module_override or self._rawData.translit, "tr") -- Incomplete transliterations return nil. if text then if sc:countCharacters(text) > 0 then -- Remove any characters in (extended) Latin, which includes Latin characters also included in other scripts (as these are false positives). Anything remaining should only be script code "None" (e.g. numerals). local check_text = mw.ustring.gsub(text, "[" .. require("Module:scripts").getByCode("Latnx"):getCharacters .. "]", "") if require("Module:scripts").findBestScriptWithoutLang(check_text) ~= "None" then return nil, true, cats end end else return nil, true, cats end text = require("Module:string utilities").escape_risky_characters(text) text = undoTempSubstitutions(text, subbedChars) -- If the script does not use capitalization, then capitalize any letters of the transliteration which are immediately preceded by a caret (and remove the caret). if text and not sc:hasCapitalization and text:match("%^") then text = processCarets(text, "%^([\128-\191\244]*%*?)([^\128-\191\244][\128-\191]*)", function(m1, m2)				return m1 .. m2:uupper			end) end -- Track module overrides. if module_override ~= nil then track("module_override") end fail = text == nil and (not not fail) or false return text, fail, cats end function Language:overrideManualTranslit return not not self._rawData.override_translit end --[==[Returns lua if the language has a transliteration module, or lua if it doesn't.]==] function Language:hasTranslit return not not self._rawData.translit end function Language:link_tr return not not self._rawData.link_tr end --[==[Returns lua if the language uses the letters I/ı and İ/i, or lua if it doesn't.]==] function Language:hasDottedDotlessI return not not self._rawData.dotted_dotless_i end function Language:toJSON(returnTable) local entryNamePatterns = nil local entryNameRemoveDiacritics = nil if self._rawData.entry_name then entryNameRemoveDiacritics = self._rawData.entry_name.remove_diacritics if self._rawData.entry_name.from then entryNamePatterns = {} for i, from in ipairs(self._rawData.entry_name.from) do					table.insert(entryNamePatterns, {from = from, to = self._rawData.entry_name.to[i] or ""}) end end end if not self._type then self:hasType end local types = {} for type in pairs(self._type) do			table.insert(types, type) end local ret = { ancestors = self:getAncestorCodes, canonicalName = self:getCanonicalName, categoryName = self:getCategoryName("nocap"), code = self:getCode, entryNamePatterns = entryNamePatterns, entryNameRemoveDiacritics = entryNameRemoveDiacritics, family = self:getFamilyCode, otherNames = self:getOtherNames(true), aliases = self:getAliases, varieties = self:getVarieties, scripts = self:getScriptCodes, parent = self._parentCode or nil, nonEtymological = self._nonEtymologicalCode or nil, type = types, wikimediaLanguages = self:getWikimediaLanguageCodes, wikidataItem = self:getWikidataItem, }		ret = require("Module:table").deepcopy(ret) if returnTable then return ret else return require("Module:JSON").toJSON(ret) end end --[==[	 This function is not for use in entries or other content pages. Returns a blob of data about the language. The format of this blob is undocumented, and perhaps unstable; it's intended for things like the module's own unit-tests, which are "close friends" with the module and will be kept up-to-date as the format changes. -- Do NOT use these methods! -- All uses should be pre-approved on the talk page! ]==]	function Language:getRawData local rawData = {} for _, element in ipairs(self._stack) do			for k, v in pairs(element) do				rawData[k] = v			end end return rawData end --[==[ This function is not for use in entries or other content pages. Returns a blob of data about the language that contains the "extra data". Much like with getRawData, the format of this blob is undocumented, and perhaps unstable; it's intended for things like the module's own unit-tests, which are "close friends" with the module and will be kept up-to-date as the format changes.]==] function Language:getRawExtraData if #self._stack == 1 then self:loadInExtraData end return self._extraData end local function getRawExtraLanguageData(code) local modulename = export.getExtraDataModuleName(code) return modulename and conditionalRequire("Module:" .. modulename)[code] or nil end

function Language:loadInExtraData if not self._extraData then -- load extra data from module and assign to _extraData field -- use empty table as a fallback if extra data is nil self._extraData = getRawExtraLanguageData(self:getCode) or {} end end return Language end

local function make_stack(code, data, parent, useRequire) parent.__index = parent local lang = {_code = code} -- Full language. if not parent._stack then -- Create stack, accessed with rawData metamethod. lang._stack = parent._rawData and {parent._rawData, data} or {data} lang._rawData = setmetatable({}, {			__index = function(t, k)				-- Data that isn't inherited from the parent.				local function no_inherit(lang, t, k)					if ( k == "aliases" or						k == "varieties" or						k == "otherNames" ) then						return lang._stack[#lang._stack][k]					end				end				-- Data that is appended by each generation.				local function append_data(lang, t, k)					if k == "type" then						local parts = {}						for i = 1, #lang._stack do							table.insert(parts, lang._stack[i][k])						end						if type(parts[1]) == "string" then							return table.concat(parts, ", ")						end					end				end				-- Otherwise, iterate down the stack, looking for a match.				local function inherit_data(lang, t, k)					local i = #lang._stack					while not lang._stack[i][k] and i > 1 do						i = i - 1					end					return lang._stack[i][k]				end				local ret = no_inherit(lang, t, k) or					append_data(lang, t, k) or					inherit_data(lang, t, k)				if ( k == 4 or					k == "ancestors" or					k == "wikimedia_codes" ) then					if type(ret) == "table" then						return ret					elseif type(ret) == "string" then						return mw.text.split(ret, "%s*,%s*")					end				else					return ret				end			end,			-- Retain immutability (as writing to rawData will break functionality).			__newindex = function				error("not allowed to edit rawData")			end		}) -- Full code is the parent code. lang._nonEtymologicalCode = parent._code or code -- Etymology-only. else -- Copy over rawData and stack to the new object, and add new layer to stack. lang._rawData = parent._rawData lang._stack = parent._stack table.insert(lang._stack, data) -- Copy full code. lang._nonEtymologicalCode = parent._nonEtymologicalCode end return setmetatable(lang, parent) end

function export.getDataModuleName(code) if code:match("^%l%l$") then return "languages/data/2" elseif code:match("^%l%l%l$") then local prefix = code:sub(1, 1) return "languages/data/3/" .. prefix elseif code:match("^[%l-]+$") then return "languages/data/exceptional" else return nil end end

function export.getExtraDataModuleName(code) local dataModule = export.getDataModuleName(code) return dataModule and dataModule .. "/extra" or nil end

function export.makeObject(code, data, useRequire) if not data then return nil end code = data.main_code or code if data.type:find("family") and not data[5] then return require("Module:families").makeObject(code, data, useRequire) else local parent if data[5] then parent = export.getByCode(data[5], nil, true, true, useRequire) else parent = make_language(code, data, useRequire) end return make_stack(code, data, parent, useRequire) end end

--[==[Finds the language whose code matches the one provided. If it exists, it returns a  object representing the language. Otherwise, it returns lua, unless  is given, in which case an error is generated. If  is lua, a generic error message mentioning the bad code is generated; otherwise   should be a string or number specifying the parameter that the code came from, and this parameter will be mentioned in the error message along with the bad code. If  is specified, etymology language codes are allowed and looked up along with normal language codes. If  is specified, language family codes are allowed and looked up along with normal language codes.]==] function export.getByCode(code, paramForError, allowEtymLang, allowFamily, useRequire) if type(code) ~= "string" then local typ if not code then typ = "nil" elseif checkObject("language", true, code) then typ = "a language object" elseif checkObject("family", true, code) then typ = "a family object" else typ = "a " .. type(code) end error("The function getByCode expects a string as its first argument, but received " .. typ .. ".") end local function conditionalRequire(modulename) if useRequire then return require(modulename) else return mw.loadData(modulename) end end local modulename = export.getDataModuleName(code) local data = modulename and conditionalRequire("Module:" .. modulename)[code] or		(allowEtymLang and require("Module:etymology languages/track-bad-etym-code")(code) and conditionalRequire("Module:etymology languages/data")[code]) or		(allowFamily and conditionalRequire("Module:families/data")[code]) or		(allowEtymLang and allowFamily and conditionalRequire("Module:families/data/etymology")[code]) local retval = code and data and export.makeObject(code, data, useRequire) if not retval and paramForError then require("Module:languages/errorGetBy").code(code, paramForError, allowEtymLang, allowFamily) end return retval end

--[==[Like lua, except it also looks at the  listed in the full language data modules, and does not (currently) have options to look up etymology languages and families.]==] function export.getByName(name, errorIfInvalid) local byName = mw.loadData("Module:languages/by name") local code = byName.all and byName.all[name] or byName[name] if not code then if errorIfInvalid then error("The language name \"" .. name .. "\" is not valid. See List of languages.") else return nil end end return export.getByCode(code) end

--[==[Finds the language whose canonical name (the name used to represent that language on Wiktionary) or other name matches the one provided. If it exists, it returns a  object representing the language. Otherwise, it returns lua, unless  is given, in which case an error is generated. If  is specified, etymology language codes are allowed and looked up along with normal language codes. If  is specified, language family codes are allowed and looked up along with normal language codes. The canonical name of languages should always be unique (it is an error for two languages on Wiktionary to share the same canonical name), so this is guaranteed to give at most one result. This function is powered by Module:languages/canonical names, which contains a pre-generated mapping of full-language canonical names to codes. It is generated by going through the Category:Language data modules for full languages. When  is specified for the above function, Module:etymology languages/canonical names may also be used, and when   is specified for the above function, Module:families/canonical names may also be used.]==] function export.getByCanonicalName(name, errorIfInvalid, allowEtymLang, allowFamily, useRequire) local function conditionalRequire(modulename) if useRequire then return require(modulename) else return mw.loadData(modulename) end end local byName = conditionalRequire("Module:languages/canonical names") local code = byName and byName[name] if not code and allowEtymLang then byName = conditionalRequire("Module:etymology languages/canonical names") code = byName and byName[name] or			byName[name:gsub(" [Ss]ubstrate$", "")] or			byName[name:gsub("^a ", "")] or			byName[name:gsub("^a ", ""):gsub(" [Ss]ubstrate$", "")] or -- For etymology families like "ira-pro". -- FIXME: This is not ideal, as it allows " languages" to be appended to any etymology-only language, too. byName[name:match("^(.*) languages$")] end if not code and allowFamily then byName = conditionalRequire("Module:families/canonical names") code = byName and byName[name] or			byName[name:match("^(.*) languages$")] end local retval = code and export.getByCode(code, errorIfInvalid, allowEtymLang, allowFamily, useRequire) if not retval and errorIfInvalid then require("Module:languages/errorGetBy").canonicalName(name, allowEtymLang, allowFamily) end return retval end

--[==[Used by Module:languages/data/2 (et al.) to add default types to the entities returned.]==] function export.addDefaultTypes(data, regular, ...) for _, entity in pairs(data) do -- "regular" encompasses everything that doesn't have another type already assigned. if regular then entity.type = entity.type or "regular" end local types = table.concat({...}, ", ") if #types > 0 then entity.type = types .. (entity.type and (", " .. entity.type) or "") end end return data end

--[==[Used by Module:etymology languages/data and Module:families/data/etymology to finalize the data into the format that is actually returned.]==] function export.finalizeEtymologyData(data) local aliases = {} for code, entity in pairs(data) do -- Move parent to 5 and family to 3. data[code][5] = data[code][3] data[code][3] = data[code].family data[code].family = nil -- Assign any alias codes listed in alias_codes. The main_code field is used to make sure objects always use that to identify themselves, which means all aliases are fungible with their counterparts. if entity.alias_codes then entity.main_code = code for _, alias in ipairs(entity.alias_codes) do				aliases[alias] = entity end entity.alias_codes = nil end end for code, alias in pairs(aliases) do		data[code] = alias end return data end

--[==[For backwards compatibility only; modules should require the /error themselves.]==] function export.err(lang_code, param, code_desc, template_tag, not_real_lang) return require("Module:languages/error")(lang_code, param, code_desc, template_tag, not_real_lang) end

return export