Module:data consistency check

-- TODO: -- ietf_subtag field used with a 2/3-letter langauge/family code except qaa-qtz, or a 4-letter script code. -- Check against files containing up-to-date ISO data, to cross-check validity.

local m_languages = require("Module:languages") local m_language_data = require("Module:languages/data/all") local m_language_codes = require("Module:languages/code to canonical name") local m_language_canonical_names = require("Module:languages/canonical names") local m_etym_language_data = require("Module:etymology languages/data") local m_etym_language_codes = require("Module:etymology languages/code to canonical name") local m_etym_language_canonical_names = require("Module:etymology languages/canonical names") local m_family_data = require("Module:families/data") local m_family_codes = require("Module:families/code to canonical name") local m_family_canonical_names = require("Module:families/canonical names") local m_scripts = require("Module:scripts") local m_script_data = require("Module:scripts/data") local m_links = require("Module:links")

local m_script_utils = require("Module:script utilities") local m_str_utils = require("Module:string utilities") local m_table = require("Module:table") local Array = require("Module:array")

local codepoint = m_str_utils.codepoint local concat = table.concat local dump = mw.dumpObject local gcodepoint = m_str_utils.gcodepoint local insert = table.insert local split = m_str_utils.split local ugmatch = m_str_utils.gmatch local umatch = m_str_utils.match

local export = {} local messages

local function discrepancy(modname, ...) local ok, result = pcall(function(...) messages[modname]:insert(string.format(...)) end, ...) if not ok then mw.log(result, ...) end end

local all_codes = {}

local language_names = {} local etym_language_names = {} local family_names = {} local script_names = {}

local nonempty_families = {} local allowed_empty_families = {tbq = true} local nonempty_scripts = {} do local function link_lang(name) if name:find("[Ll]anguage$") then return "" .. name .. "" else return "" .. name .. " language" end end local function link_etym_lang(name) if name:find("[Ll]anguage$") then return name else return name .. " language" end end local function link_family(name) if name:find("[Ll]anguages$") then return "" .. name .. " family" else return "" .. name .. " family" end end function export.link(data) if not data[1] then return "???" end local type = data.type return type:match("etymology%-only") and link_etym_lang(data[1]) or			type:match("family") and link_family(data[1]) or			link_lang(data[1]) end end local link = export.link local function link_script(name) if not name then return "???" elseif name:find("[Cc]ode$") or name:find("[Ss]emaphore$") then return "" .. name .. "" else return "" .. name .. " script" end end

local function invalid_keys_message(modname, code, data, invalid_keys, is_script) local plural = #invalid_keys ~= 1 discrepancy(modname, "The data key%s %s for %s %s invalid.",		plural and "s" or "",		invalid_keys			:map( function(key) return ' ' end)			:concat(", "),		(is_script and link_script or link)(data[1]),		code,		plural and "are" or "is") end

local function check_data_keys(valid_keys, is_script) valid_keys = Array(valid_keys):to_set return function (modname, code, data) local invalid_keys for k in pairs(data) do			if not valid_keys[k] then invalid_keys = invalid_keys or Array invalid_keys:insert(k) end end if invalid_keys then invalid_keys_message(modname, code, data, invalid_keys, is_script) end end end

-- Modification of isArray in Module:table. -- This assumes all keys are either integers or non-numbers. -- If there are fractional numbers, the results might be incorrect. -- For instance, find_gap{"a", "b", [0.5] = true} evaluates to 3, but there -- isn't a gap at 3 in the sense of there being an integer key greater than 3. local function find_gap(t, can_contain_non_number_keys) local i = 0 for k in pairs(t) do		if not (can_contain_non_number_keys and type(k) ~= "number") then i = i + 1 if t[i] == nil then return i			end end end end

local function check_true_or_string_or_nil(modname, code, data, field_name) local field = data[field_name] if not (field == nil or field == true or type(field) == "string") then discrepancy(modname,			"%s has an   value that is not ,   or a string:  ",			link(data), code, field_name,			dump(data[field_name])		) end end

local function check_array(modname, code, canonical_name, data, array_name, subarray_name, can_contain_non_number_keys) local subtable = data if subarray_name then subtable = assert(data[subarray_name], subarray_name) end local array_type = type(subtable[array_name]) if array_type == "table" then local gap = find_gap(subtable[array_name], can_contain_non_number_keys) if gap then discrepancy(modname, "The %s array in %sthe data table for %s has a gap at index %d.",				array_name,				subarray_name and "the " .. subarray_name .. " field in " or "",				canonical_name,				code, gap) else return true end else discrepancy(modname, "The %s field in %sthe data table for %s should be an array (table) but is %s.",			array_name,			subarray_name and "the " .. subarray_name .. " field in " or "",			canonical_name,			code,			array_type == "nil" and "nil" or "a " .. array_type) end end

local function check_no_alias_codes(modname, mod_data) local lookup, discrepancies = {}, {} for k, v in pairs(mod_data) do		local check = lookup[v] if check then discrepancies[check] = discrepancies[check] or {" "} insert(discrepancies[check], " ") else lookup[v] = k		end end for _, v in pairs(discrepancies) do discrepancy(modname, "The codes " .. mw.text.listToText(v, ", ", " and ") .. " are currently alias codes. Only one code should be used in the data.") end end

local function check_wikidata_item(modname, code, data, key) local data_item = data[key] if data_item == nil then return elseif type(data_item) == "number" then if not require "Module:table".isPositiveInteger(data_item) then discrepancy(modname, "%g, the Wikidata item id for %s, is not a positive integer or a string in the correct format.",				data_item, data[1], code) end elseif type(data_item) == "string" then if not data_item:find "^Q%d+$" then discrepancy(modname, "%s, the Wikidata item id for %s, is not a string in the correct format or a positive integer.",				data_item, data[1], code) end end end

local function check_other_names_or_aliases(modname, code, canonical_name, data, data_key, allow_nested) local array = data[data_key] if not array then return end check_array(modname, code, canonical_name, data, data_key, nil, true)

local names = {} local function check_other_name(other_name) if other_name == canonical_name then discrepancy(modname,				"%s, the canonical name for, is repeated in the table of  .",				canonical_name, code, data_key) end if names[other_name] then discrepancy(modname,				"The name %s is found twice or more in the list of  for %s .",				other_name, data_key, canonical_name, code) end names[other_name] = true end

for _, other_name in ipairs(array) do		if type(other_name) == "table" then if not allow_nested then discrepancy(modname,					"A nested table is found in the list of  for %s, but isn't allowed.",					data_key, canonical_name, code) else for _, on in ipairs(other_name) do					check_other_name(on) end end else check_other_name(other_name) end end end

local function check_other_names_aliases_varieties(modname, code, canonical_name, data) if data.otherNames then check_other_names_or_aliases(modname, code, canonical_name, data, "otherNames") end if data.aliases then check_other_names_or_aliases(modname, code, canonical_name, data, "aliases") end if data.varieties then check_other_names_or_aliases(modname, code, canonical_name, data, "varieties", true) end end

local function validate_pattern(pattern, modname, code, data, standardChars) if type(pattern) ~= "string" then discrepancy(modname, '"%s", the %spattern for %s, is not a string.',			pattern, standardChars and 'standard character ' or '', code, data[1]) end local ranges for lower, higher in ugmatch(pattern, "(.)%-%%?(.)") do		if codepoint(lower) >= codepoint(higher) then ranges = ranges or Array insert(ranges, { lower, higher }) end end if ranges and ranges[1] then local plural = #ranges ~= 1 and "s" or "" discrepancy(modname, '%s specifies an invalid pattern ' ..			'for %scharacter detection:  . The first codepoint%s ' ..			'in the range%s %s %s must be less than the second.',			link(data), code, standardChars and 'standard ' or '', pattern, plural, plural,			ranges				:map( function(range) return range[1] .. "-" .. range[2] .. (" (U+%X, U+%X)") :format(codepoint(range[1]), codepoint(range[2])) end)				:concat(", "),			#ranges ~= 1 and "are" or "is") end if not pcall(umatch, "", "[" .. pattern .. "]") then discrepancy(modname, '%s specifies an invalid pattern for ' ..			(standardChars and 'standard' or '') .. ' character detection:  ',			link(data), code, pattern) end end

local remove_exceptions_addition = 0xF0000 local maximum_code_point = 0x10FFFF local remove_exceptions_maximum_code_point = maximum_code_point - remove_exceptions_addition

local function check_entry_name_or_sortkey(modname, code, data, replacements_name) local canonical_name = data[1] local replacements = data[replacements_name] if type(replacements) == "string" then if not (replacements_name == "sort_key" or replacements_name == "entry_name") then discrepancy(modname, "The %s field in the data table for %s must be a table.",				replacements_name, canonical_name, code) end return end if (replacements.from ~= nil) ~= (replacements.to ~= nil) then discrepancy(modname,			"The  and   arrays in the   table for %s  are not both defined or both undefined.",			replacements_name, canonical_name, code) elseif replacements.from then for _, key in ipairs { "from", "to" } do			check_array(modname, code, canonical_name, data, key, replacements_name) end end if replacements.remove_diacritics and type(replacements.remove_diacritics) ~= "string" then discrepancy(modname,			"The  field in the   table for %s  table must be a string.",			replacements_name, canonical_name, code) end if replacements.remove_exceptions then if check_array(modname, code, canonical_name, data, "remove_exceptions", replacements_name) then for sequence_i, sequence in ipairs(replacements.remove_exceptions) do				local code_point_i = 0 for code_point in gcodepoint(sequence) do					code_point_i = code_point_i + 1 if code_point > remove_exceptions_maximum_code_point then discrepancy(modname,							"Code point #%d (0x%04X) in field #%d of the  array for %s  is over U+%04X.",							code_point_i, code_point, sequence_i, canonical_name, code, remove_exceptions_maximum_code_point) end end end end end if replacements.from and replacements.to			and m_table.length(replacements.to) > m_table.length(replacements.from) then discrepancy(modname,			"The  array in the   table for %s  must be shorter or the same length as the   array.",			replacements_name, canonical_name, code) end end

do local function has_ancestor(lang, code) for _, anc in ipairs(lang:getAncestors) do			if code == anc:getCode or has_ancestor(anc, code) then return true end end end local function get_default_ancestors(lang) if lang:hasType("etymology-only") then local parent = lang:getParent if not has_ancestor(parent, lang:getCode) then return parent:getAncestorCodes end end local fam_code, def_anc = lang:getFamilyCode while fam_code and fam_code ~= "qfa-not" do			local fam = m_family_data[fam_code] def_anc = fam.protoLanguage or m_language_data[fam_code .. "-pro"] and fam_code .. "-pro" or m_etym_language_data[fam_code .. "-pro"] and fam_code .. "-pro" if def_anc and def_anc ~= lang:getCode then return {def_anc} end fam_code = fam[3] end end local function iterate_ancestor(code, data, modname, anc_code, lang) local anc = m_languages.getByCode(anc_code, nil, true) if not anc then discrepancy(modname,				"%s lists an invalid language code   as ancestor.",				link(data), code, anc_code) return end local anc_fam = anc:getFamily local anc_fam_code = anc_fam:getCode local def_ancs = get_default_ancestors(lang) if def_ancs then for _, def_anc in ipairs(def_ancs) do				def_anc = m_languages.getByCode(def_anc, nil, true) if def_anc and (					anc_code == def_anc:getCode or					has_ancestor(def_anc, anc_code) or					def_anc:hasParent(anc_code) and not has_ancestor(anc, def_anc:getCode)				) then discrepancy(modname,						"%s has the %s  listed in its ancestor field, which is redundant, since it is calculated to be ancestral automatically.",						link(data), code,						link(anc:getRawData), anc_code) end end end if not lang:inFamily(anc_fam_code) then discrepancy(modname,				"%s has %s  set as an ancestor, but is not in the %s .",				link(data), code,				link(anc:getRawData), anc_code,				link(anc_fam:getRawData), anc_fam_code) end local fam, proto = lang repeat fam = fam:getFamily proto = fam and fam:getProtoLanguage until proto or not fam or fam:getCode == "qfa-not" if proto and not (			proto:getCode == anc:getCode or			proto:hasAncestor(anc:getCode) or			anc:hasAncestor(proto:getCode)		) then local fam = lang:getFamily discrepancy(modname,				"%s is in the %s  and has %s  set as an ancestor, but it is not possible to form an ancestral chain between them.",				link(data), code,				link(fam:getRawData), fam:getCode,				link(anc:getRawData), anc_code) end end function export.check_ancestors(code, data, modname) local ancestors = data.ancestors if not ancestors then return elseif type(ancestors) == "string" then ancestors = split(ancestors, "%s*,%s*", true) end local lang = m_languages.getByCode(code, nil, true) for _, anc in ipairs(ancestors) do			iterate_ancestor(code, data, modname, anc, lang) end end end local function check_code_to_name_and_name_to_code_maps(		source_module_type,		source_module_description,		code_to_module_map, name_to_code_map,		code_to_name_modname, code_to_name_module,		name_to_code_modname, name_to_code_module) local aliases = require("Module:languages/data").aliases local function check_code_and_name(modname, code, canonical_name) -- Check the code is in code_to_module_map and that it didn't originate from the wrong data module. local check_mod = code_to_module_map[code] or code_to_module_map[aliases[code]] if not (check_mod and check_mod:match("^" .. source_module_type .. "/data")) then if not name_to_code_map[canonical_name] then discrepancy(modname,					"The code  and the canonical name %s should be removed; they are not found in %s.",					code, canonical_name, source_module_description) else discrepancy(modname,					", the code for the canonical name %s, is wrong; it should be .",					code, canonical_name, name_to_code_map[canonical_name]) end elseif not name_to_code_map[canonical_name] then local data_table = require("Module:" .. code_to_module_map[code])[code] discrepancy(modname,				"%s, the canonical name for the code, is wrong; it should be %s.",				canonical_name, code, data_table[1]) end end

for code, canonical_name in pairs(code_to_name_module) do		check_code_and_name(code_to_name_modname, code, canonical_name) end for canonical_name, code in pairs(name_to_code_module) do		check_code_and_name(name_to_code_modname, code, canonical_name) end end

local function check_extraneous_extra_data(		data_modname, data_module, extra_data_modname, extra_data_module) for code, _ in pairs(extra_data_module) do		if not data_module[code] then discrepancy(extra_data_modname,				"Language code  is not found in Module:%s, and should be removed from Module:%s.",				code, data_modname, extra_data_modname			) end end end

-- Just trying to not have a module error when someone puts a script code -- in the position of a language code. local function show_family_code(code) if type(code) == "string" then return " " else return require("Module:debug").highlight_dump(code) end end

local function check_languages local check_language_data_keys = check_data_keys{ 1, 2, 3, 4, -- canonical name, wikidata item, family, scripts "display_text", "generate_forms", "entry_name", "sort_key", "otherNames", "aliases", "varieties", "ietf_subtag", "type", "ancestors", "wikimedia_codes", "wikipedia_article", "standardChars", "translit", "override_translit", "link_tr", "dotted_dotless_i" }	local function check_language(modname, code, data, mainData, extraData) local canonical_name, lang_type = data[1], data.type check_language_data_keys(modname, code, data) if all_codes[code] then discrepancy(modname, "Code  is not unique; it is also defined in Module:%s.", code, all_codes[code]) else if not m_language_codes[code] then discrepancy("languages/code to canonical name", "The code  (%s) is missing.", code, canonical_name) end all_codes[code] = modname end if not canonical_name then discrepancy(modname, "Code  has no canonical name specified.", code) elseif language_names[canonical_name] then discrepancy(modname,				"%s has a canonical name that is not unique; it is also used by the code  .",				link(data), code, language_names[canonical_name]) else if not m_language_canonical_names[canonical_name] then discrepancy("languages/canonical names", "The canonical name %s is missing.", canonical_name, code) end language_names[canonical_name] = code end check_wikidata_item(modname, code, data, 2)

if extraData then check_other_names_aliases_varieties(modname, code, canonical_name, extraData) end if lang_type and not (lang_type == "regular" or lang_type == "reconstructed" or lang_type == "appendix-constructed") then discrepancy(modname, "%s is of an invalid type  .", link(data), code, data.type) end if mainData.aliases then discrepancy(modname, "%s has the   key. This must be moved to Module:" .. modname .. "/extra.", link(data), code) end if mainData.varieties then discrepancy(modname, "%s has the   key. This must be moved to Module:" .. modname .. "/extra.", link(data), code) end if mainData.otherNames then discrepancy(modname, "%s has the   key. This must be moved to Module:" .. modname .. "/extra.", link(data), code) end if not extraData then discrepancy(modname .. "/extra", "%s has data in Module:" .. modname .. ", but does not have corresponding data in Module:" .. modname .. "/extra.", link(data), code) --elseif extraData.otherNames then --	discrepancy(modname .. "/extra", "%s has   key, but these should be changed to either   or  .", link(data), code) end local sc = data[4] if sc then if type(sc) == "string" then sc = split(sc, "%s*,%s*", true) end if type(sc) == "table" then if not sc[1] then discrepancy(modname, "%s has no scripts listed.", link(data), code) else for _, sccode in ipairs(sc) do						local cur_sc = m_script_data[sccode] if not (cur_sc or sccode == "All" or sccode == "Hants") then discrepancy(modname,								"%s lists an invalid script code  .",								link(data), code, sccode) -- elseif not cur_sc.characters then -- 	discrepancy(modname,						-- 		"%s lists a script without characters   (%s).",						-- 		link(data), code, sccode, cur_sc[1]) end nonempty_scripts[sccode] = true end end else discrepancy(modname,					"The %s field for %s must be a table or string.",					4, link(data), code) end end if data.ancestors then export.check_ancestors(code, data, modname) end if data[3] then local family = data[3] if not m_family_data[family] then discrepancy(modname,					"%s has an invalid family code %s.",					link(data), code, show_family_code(family)) end nonempty_families[family] = true end if data.sort_key then check_entry_name_or_sortkey(modname, code, data, "sort_key") end if data.entry_name then check_entry_name_or_sortkey(modname, code, data, "entry_name") end

if data.display then check_entry_name_or_sortkey(modname, code, data, "display") end

if data.standardChars then if type(data.standardChars) == "table" then local sccodes = {} for _, sccode in ipairs(sc) do					sccodes[sccode] = true end for sccode in pairs(data.standardChars) do					if not (sccodes[sccode] or sccode == 1) then discrepancy(modname, "The field %s in the standardChars table for %s does not match any script for that language.",							sccode, link(data), code) end end elseif data.standardChars and type(data.standardChars) ~= "string" then discrepancy(modname, "The standardChars field in the data table for %s must be a string or table.",					link(data), code) end end check_true_or_string_or_nil(modname, code, data, "override_translit") check_true_or_string_or_nil(modname, code, data, "link_tr") if data.override_translit and not data.translit then discrepancy(modname,				"%s has   set, but no transliteration module",				link(data), code) end end local function check_module(modname, test) local mod_data = mw.loadData("Module:" .. modname) local extra_modname = modname .. "/extra" local extra_mod_data = mw.loadData("Module:" .. extra_modname) for code, data in pairs(mod_data) do			test(modname, code, data) check_language(modname, code, data, mod_data[code], extra_mod_data[code]) end check_no_alias_codes(modname, mod_data) check_no_alias_codes(extra_modname, extra_mod_data) check_extraneous_extra_data(modname, mod_data, extra_modname, extra_mod_data) end -- Check two-letter codes check_module(		"languages/data/2",		function(modname, code, data)			if not code:find("^[a-z][a-z]$") then				discrepancy(modname, "%s does not have a two-letter code.", link(data), code)			end		end	) -- Check three-letter codes for i = 0x61, 0x7A do -- a to z		local letter = string.char(i) check_module(			"languages/data/3/" .. letter,			function(modname, code, data)				if not code:find("^" .. letter .. "[a-z][a-z]$") then					discrepancy(modname, '%s does not have a three-letter code starting with " ".', link(data), code, letter)				end			end		) end -- Check exceptional codes check_module(		"languages/data/exceptional",		function(modname, code, data)			if code:find("^[a-z][a-z][a-z]?$") then				discrepancy(modname, '%s has a two- or three-letter code.', link(data), code)			end		end	) -- These checks must be done while all_codes only contains language codes: -- that is, after language data modules have been processed, but before -- etymology languages, families, and scripts have. check_code_to_name_and_name_to_code_maps(		"languages",		"a submodule of Module:languages",		all_codes, language_names,		"languages/code to canonical name", m_language_codes,		"languages/canonical names", m_language_canonical_names	) -- Check Template:langname-lite local frame = mw.getCurrentFrame local content = mw.title.new("Template:langname-lite"):getContent content = content:gsub("%<%!%-%-.-%-%-%>", "") -- remove comments local match = ugmatch(content, "\n\t*|#*([^\n]+)=([^\n]*)") while true do		local code, name = match if not code then return "OK" end if code:len > 1 and code ~= "default" then for _, code in pairs(split(code, "|", true)) do				local lang = m_languages.getByCode(code, nil, true, true) if name:match("etymcode") then local nonEtym_name = frame:preprocess(name) local nonEtym_real_name = lang:getFullName if nonEtym_name ~= nonEtym_real_name then discrepancy("Template:langname-lite", "Code: . Saw name: " .. nonEtym_name .. ". Expected name: " .. nonEtym_real_name .. ".") end name = frame:preprocess(name:gsub("", "1")) elseif name:match("familycode") then name = name:match("familycode|(.-)|") else name = name end if not lang then discrepancy("Template:langname-lite", "Code: . Saw name: " .. name .. ". Language not present in data.") else local real_name = lang:getCanonicalName if name ~= real_name then discrepancy("Template:langname-lite", "Code: . Saw name: " .. name .. ". Expected name: " .. real_name .. ".") end end end end end end

local function check_etym_languages local modname = "etymology languages/data" local check_etymology_language_data_keys = check_data_keys{ 1, 2, 3, 4, 5, -- canonical name, wikidata item, family, scripts, parent "display_text", "generate_forms", "entry_name", "sort_key", "otherNames", "aliases", "varieties", "ietf_subtag", "type", "main_code", "ancestors", "wikimedia_codes", "wikipedia_article", "standardChars", "translit", "override_translit", "link_tr", "dotted_dotless_i" }	for code, data in pairs(m_etym_language_data) do		local canonical_name, parent = data[1], data[5] check_etymology_language_data_keys(modname, code, data) if all_codes[code] then discrepancy(modname, "Code  is not unique; it is also defined in Module:%s.", code, all_codes[code]) else if not m_etym_language_codes[code] then discrepancy("etymology languages/code to canonical name", "The code  (%s) is missing.", code, canonical_name) end all_codes[code] = modname end if not canonical_name then discrepancy(modname, "Code  has no canonical name specified.", code) elseif language_names[canonical_name] then local m_canonical_lang = m_languages.getByCanonicalName(canonical_name, nil, true) if not m_canonical_lang then discrepancy(modname, "%s has a canonical name that cannot be looked up.",					link(data), code) elseif data.main_code ~= m_canonical_lang:getCode then discrepancy(modname,					"%s has a canonical name that is not unique; it is also used by the code  .",					link(data), code, language_names[canonical_name]) end else if not m_etym_language_canonical_names[canonical_name] then discrepancy("etymology languages/canonical names", "The canonical name %s is missing.", canonical_name, code) end etym_language_names[canonical_name] = code end check_other_names_aliases_varieties(modname, code, canonical_name, data) if parent then if type(parent) ~= "string" then discrepancy(modname,					"Etymology-only %s has a parent language or family code that is %s rather than a string.",					link(data), code, parent == nil and "nil" or "a " .. type(parent)) elseif not (m_language_data[parent] or m_family_data[parent] or m_etym_language_data[parent]) then discrepancy(modname,					"Etymology-only %s has invalid parent language or family code  .",					link(data), code, parent) end nonempty_families[parent] = true else discrepancy(modname,				"Etymology-only %s has no parent language or family code.",				link(data), code) end if data.ancestors then export.check_ancestors(code, data, modname) end if data[3] then local family = data[3] if not m_family_data[family] then discrepancy(modname,					"%s has an invalid family code %s.",					link(data), code, show_family_code(family)) end nonempty_families[family] = true end check_wikidata_item(modname, code, data, 2) end

local checked = {} for code, data in pairs(m_etym_language_data) do		local stack = {}

while data do			if checked[data] then break end if stack[data] then discrepancy(modname, "%s has a cyclic parental relationship to %s ",					link(data), code,					link(m_etym_language_data[data[5]]), data.parent or data[5]				) break end stack[data] = true code, data = data[5], data[5] and m_etym_language_data[data[5]] end for data in pairs(stack) do			checked[data] = true end end check_no_alias_codes(modname, m_etym_language_data) check_code_to_name_and_name_to_code_maps(		"etymology languages",		"Module:etymology languages/data",		all_codes, etym_language_names,		"etymology languages/code to canonical name", m_etym_language_codes,		"etymology languages/canonical names", m_etym_language_canonical_names) end

local function check_families local modname = "families/data" local check_family_data_keys = check_data_keys{ 1, 2, 3, -- canonical name, wikidata item, (parent) family "type", "ietf_subtag", "protoLanguage", "otherNames", "aliases", "varieties", }	for code, data in pairs(m_family_data) do		check_family_data_keys(modname, code, data) local canonical_name, family = data[1], data[3] if all_codes[code] then discrepancy(modname, "Code  is not unique; it is also defined in Module:%s.", code, all_codes[code]) else if not m_family_codes[code] then discrepancy("families/code to canonical name", "The code  (%s) is missing.", code, canonical_name) end all_codes[code] = modname end if not canonical_name then discrepancy(modname, "Code  has no canonical name specified.", code) elseif family_names[canonical_name] then discrepancy(modname,				"%s has a canonical name that is not unique; it is also used by the code  .",				link(data), code, family_names[canonical_name]) else if not m_family_canonical_names[canonical_name] then discrepancy("families/canonical names", "The canonical name %s is missing.", canonical_name, code) end family_names[canonical_name] = code end if data[2] and type(data[2]) ~= "number" then discrepancy(modname, "%s has a wikidata item value that is not a number or  : %s", link(data), code, dump(data[2])) end check_other_names_aliases_varieties(modname, code, canonical_name, data) if family then if family == code and code ~= "qfa-not" then discrepancy(modname,					"%s has itself as its family.",					link(data), code) elseif not m_family_data[family] then discrepancy(modname,					"%s has an invalid parent family code %s.",					link(data), code, show_family_code(family)) end nonempty_families[family] = true end check_wikidata_item(modname, code, data, 2) end for code, data in pairs(m_family_data) do		if not (nonempty_families[code] or allowed_empty_families[code]) then discrepancy(modname, "%s has no child families or languages.", link(data), code) end end

local checked = { ['qfa-not'] = true } for code, data in pairs(m_family_data) do		local stack = {}

while data do			if checked[code] then break end if stack[code] then discrepancy(modname, "%s has a cyclic parental relationship to %s ",					link(data), code,					link(m_family_data[data[3]]), data[3]				) break end stack[code] = true code, data = data[3], m_family_data[data[3]] end for code in pairs(stack) do			checked[code] = true end end check_no_alias_codes(modname, m_family_data) check_code_to_name_and_name_to_code_maps(		"families",		"Module:families/data",		all_codes, family_names,		"families/code to canonical name", m_family_codes,		"families/canonical names", m_family_canonical_names) end

local function check_scripts local modname = "scripts/data" local check_script_data_keys = check_data_keys({		1, 2, -- canonical name, writing systems		"canonicalName", "otherNames", "aliases", "varieties", "parent", "ietf_subtag",		"wikipedia_article", "ranges", "characters", "spaces", "capitalized", "translit", "direction",		"character_category", "normalizationFixes"	}, true) local m_script_codes = require('Module:scripts/code to canonical name') local m_script_canonical_names = require('Module:scripts/by name') -- Just to satisfy requirements of check_code_to_name_and_name_to_code_maps. local script_code_to_module_map = {} for code, data in pairs(m_script_data) do		local canonical_name = data[1] if not m_script_codes[code] and #code == 4 then discrepancy('scripts/code to canonical name', ' (%s) is missing', code, canonical_name) end check_script_data_keys(modname, code, data) if not canonical_name then discrepancy(modname, "Code  has no canonical name specified.", code) elseif script_names[canonical_name] then --[=[			discrepancy(modname,				"%s has a canonical name that is not unique; it is also used by the code  .",				link_script(data.names[1]), code, script_names[data.names[1]]) --]=]		else if not m_script_canonical_names[canonical_name] and #code == 4 then discrepancy('scripts/by name', '%s is missing', canonical_name, code) end script_names[canonical_name] = code end check_other_names_aliases_varieties(modname, code, canonical_name, data) if not nonempty_scripts[code] then discrepancy(modname,				"%s is not used by any language%s.",				link_script(canonical_name), code, data.characters and ""					or " and has no characters listed for auto-detection") --		elseif not data.characters then			discrepancy(modname, "%s has no characters listed for auto-detection.", link_script(canonical_name), code)		-- end

if data.characters then validate_pattern(data.characters, modname, code, data, false) end script_code_to_module_map[code] = modname end check_no_alias_codes(modname, m_script_data) check_code_to_name_and_name_to_code_maps(		"scripts",		"a submodule of Module:scripts",		script_code_to_module_map, script_names,		"scripts/code to canonical name", m_script_codes,		"scripts/by name", m_script_canonical_names) end

local function check_labels local check_label_data_keys = check_data_keys{ "display", "Wikipedia", "glossary", "plain_categories", "topical_categories", "pos_categories", "regional_categories", "sense_categories", "omit_preComma", "omit_postComma", "omit_preSpace", "deprecated", "track" }	local function check_label(modname, code, data) local typ = type(data) if typ == "table" then check_label_data_keys(modname, code, data) elseif typ ~= "string" then discrepancy(modname,				"The data for label  is a %s; only tables and strings are allowed.",				code, typ) end end for _, module in ipairs{"", "/regional", "/topical"} do local modname = "Module:labels/data" .. module module = require(modname) for label, data in pairs(module) do			check_label(modname, label, data) end end for code in pairs(m_language_codes) do local modname = "Module:labels/data/lang/" .. code local ok, module = pcall(require, modname) if ok then for label, data in pairs(module) do				check_label(modname, label, data) end end end end

local function check_zh_trad_simp local m_ts = require("Module:zh/data/ts") local m_st = require("Module:zh/data/st") local ruby = require("Module:ja-ruby").ruby_auto local lang = m_languages.getByCode("zh") local Hant = m_scripts.getByCode("Hant") local Hans = m_scripts.getByCode("Hans") local data = {[0] = m_st, m_ts} local mod = {[0] = "st", "ts"} local var = {[0] = "Simp.", "Trad."} local sc = {[0] = Hans, Hant} local function find_stable_loop(chars, other, j) local display = ruby({["markup"] = "[" .. other .. "](" .. var[(j+1)%2] .. ")"})		display = m_links.language_link{term = other, alt = display, lang = lang, sc = sc[(j+1)%2], tr = "-"} insert(chars, display) if data[(j+1)%2][other] == other then insert(chars, other) return chars, 1 elseif not data[(j+1)%2][other] then insert(chars, "not found") return chars, 2 elseif data[j%2][data[(j+1)%2][other]] ~= other then return find_stable_loop(chars, data[(j+1)%2][other], j + 1) else local display = ruby({["markup"] = "[" .. data[(j+1)%2][other] .. "](" .. var[j%2] .. ")"})			display = m_links.language_link{term = data[(j+1)%2][other], alt = display, lang = lang, sc = sc[j%2], tr = "-"} insert(chars, display .. " (")			display = ruby({["markup"] = "[" .. data[j%2][data[(j+1)%2][other]] .. "](" .. var[(j+1)%2] .. ")"})			display = m_links.language_link{term = data[j%2][data[(j+1)%2][other]], alt = display, lang = lang, sc = sc[(j+1)%2], tr = "-"}			insert(chars, display .. " etc.)") return chars, 3 end return chars end for i = 0, 1, 1 do		for char, other in pairs(data[i]) do			if data[(i+1)%2][other] ~= char then local chars, issue = {} local display = ruby({["markup"] = "[" .. char .. "](" .. var[i] .. ")"})				display = m_links.language_link{term = char, alt = display, lang = lang, sc = sc[i], tr = "-"} insert(chars, display) chars, issue = find_stable_loop(chars, other, i)				if issue == 1 or issue == 2 then local sc_this, mod_this, j = {} if chars[#chars-1]:match(var[(i+1)%2]) then j = 1 else j = 0 end mod_this = mod[(i+j)%2] sc_this = {[0] = sc[(i+j)%2], sc[(i+j+1)%2]} for k, char in ipairs(chars) do						chars[k] = m_script_utils.tag_text(char, lang, sc_this[k%2], "term") end if issue == 1 then discrepancy("zh/data/" .. mod_this, "character references itself: " .. concat(chars, " → ")) elseif issue == 2 then discrepancy("zh/data/" .. mod_this, "missing character: " .. concat(chars, " → ")) end elseif issue == 3 then for j, char in ipairs(chars) do						chars[j] = m_script_utils.tag_text(char, lang, sc[(i+j)%2], "term") end discrepancy("zh/data/" .. mod[i], "possible mismatched character: " .. concat(chars, " → ")) end end end end end

local function check_serialization(modname) local serializers = { ["Hani-sortkey/data/serialized"] = "Hani-sortkey/serializer", }	if not serializers[modname] then return nil end local serializer = serializers[modname] local current_data = require("Module:" .. serializer).main(true) local stored_data = require("Module:" .. modname) if current_data ~= stored_data then discrepancy(modname, " Important! Serialized data is out of sync. Use Module: ".. serializer .. " to update it. If you have made any changes to the underlying data, the serialized data must be updated before these changes will take effect. ") end end

-- Warning: cannot be called twice in the same module invocation because -- some module-global variables are not reset between calls. function export.do_checks(modules) messages = setmetatable({}, {		__index = function (self, k)			local val = Array			self[k] = val			return val		end	}) if modules["zh/data/ts"] or modules["zh/data/st"] then check_zh_trad_simp end check_languages check_etym_languages

-- families and scripts must be checked AFTER languages; languages checks fill out -- the nonempty_families and nonempty_scripts tables, used for testing if a family/script -- is ever used in the data check_families check_scripts if modules["labels/data"] then check_labels end for module in pairs(modules) do		check_serialization(module) end setmetatable(messages, nil) local function find_code(message) return string.match(message, " ") end find_code = require("Module:fun").memoize(find_code) local function comp(message1, message2) local code1, code2 = find_code(message1), find_code(message2) if code1 and code2 then return code1 < code2 else return message1 < message2 end end for _, msglist in pairs(messages) do		msglist:sort(comp) end local ret = messages messages = nil return ret end

function export.format_message(modname, msglist) local header; if modname:match("^Module:") or modname:match("^Template:") then header = "===" .. modname .. "===" else header = "===Module:" .. modname .. "===" end return header .. msglist :map(				function(msg)					return "\n* " .. msg				end) :concat end

function export.check_modules(args) local modules = {} for _, arg in ipairs(args) do		modules[arg] = true end local ret = Array local messages = export.do_checks(modules) for _, module in ipairs(args) do		local msglist = messages[module] if msglist then ret:insert(export.format_message(module, msglist)) end end return ret:concat("\n") end

function export.check_modules_t(frame) local args = m_table.shallowcopy(frame.args) return export.check_modules(args) end

function export.perform(frame) local messages = export.do_checks({}) -- Format the messages local ret = Array for modname, msglist in m_table.sortedPairs(messages) do		ret:insert(export.format_message(modname, msglist)) end -- Are there any messages? if i == 1 then return ' Glory to Arstotzka.' else ret:insert(1, ' Discrepancies detected:') return ret:concat('\n') end end

return export