Module:User:Erutuon/lang stuff

local export = {}

local script_key = 4

local all_scripts

local function _link_script (script_code) all_scripts = all_scripts or require "Module:scripts/data" local script_data = all_scripts[script_code] if not script_data then error("No script with code " .. tostring(script_code) .. ".") end local name = script_data[1] local last_word = name:match "%a+$":lower if last_word == "scripts" or last_word == "code" or last_word == "semaphore" then return "" .. script_code .. "" else return "" .. script_code .. "" end end

local cache = {} local function link_script (script_code) local link = cache[script_code] if not link then link = _link_script(script_code) cache[script_code] = link end return link end

local array_from_comma_list_or_array = require "Module:fun".memoize(function (list)	if type(list) == "table" then		return require "Module:array"(list)	end	local array = require "Module:array"	for name in string.gmatch(list, "[^%s,]+") do		array:insert(name)	end	return array end)

local function ToC_item(title) return ' \n==' .. title .. '==\n ' end

function export.lang_and_fam_name(frame) local language_name_to_code = require "Module:languages/canonical names" local family_data	 = require "Module:families/data" local Map			 = require "Module:User:Erutuon/lang_stuff/map" local is_combining	 = require "Module:Unicode data".is_combining local fun			 = require "Module:fun" local function link_name(name, family) if family then local catname = name:find "[Ll]anguages$" and name or name .. " languages" return "" .. catname .. "" else return "" .. name .. "" end end local get_sort_value = fun.memoize(function (canonical_name)		return mw.ustring.toNFD(canonical_name):gsub( "[\194-\244][\128-\191]+",			function (nonASCII_char) if is_combining(mw.ustring.codepoint(nonASCII_char)) then return "" end end)	end) local count = 0 local families_that_share_name_with_language = Map:new(family_data) :filter(			function (data)				count = count + 1				return language_name_to_code[data[1]] ~= nil			end) return ToC_item("Languages and language families with the same name") .. '\n{| class="wikitable sortable"\n|+ ' .. count .. ' pairs of languages and language families have the same canonical name\n! language !! code !! family !! code\n' .. families_that_share_name_with_language -- Convert to array and add language family code as "code" field in -- data table. :to_array("code") :sort(				function (family1, family2)					return get_sort_value(family1[1]) < get_sort_value(family2[1])				end) :map(				function (data)					local canonicalName = data[1]					return ("|-\n| %s ||  || %s ||  \n")						:format(link_name(canonicalName), language_name_to_code[canonicalName], link_name(canonicalName, true), data.code)				end) :concat .. '|}' end

function export.number_of_scripts(frame, number) local fun = require "Module:fun" local m_table = require "Module:table" local Map = require "Module:User:Erutuon/lang_stuff/map" local Array = require "Module:array" local minimum_number_of_scripts = number or tonumber(frame.args[1]) or error("Supply a number in parameter 1.") local get_length = fun.memoize(m_table.length) local languages = Map:new(require "Module:languages/data/all") :filter(				function (data)					return data[script_key] and #array_from_comma_list_or_array(data[script_key]) >= minimum_number_of_scripts				end) local count = languages:size

return ToC_item("Number of scripts") .. '\n{| class="wikitable sortable"\n|+ ' .. count .. ' languages use ' .. minimum_number_of_scripts .. ' or more scripts\n! canonical name !! code !! script count !! style="width: 8em;" | scripts\n' .. languages :filter(function(data, code) return not (code == "und" or code == "mul") end) :map(				function (data, code)					local canonical_name = data[1]					local scripts = array_from_comma_list_or_array(data[script_key])					return ('|-\n| %s ||  || %d || %s\n')						:format( canonical_name .. (canonical_name:find("language") and "" or " language"), canonical_name, code, #scripts, scripts:map(link_script):concat(", "))				end) :sorted_concat .. "|}" end

function export.census(frame) local alldata = require "Module:languages/data/all" local Map = require "Module:User:Erutuon/lang_stuff/map" local count = require "Module:count":new for code, data in pairs(alldata) do		local module_key if #code == 2 then module_key = 2 elseif #code == 3 then count[3] = count[3] + 1 module_key = 3 .. code:sub(1, 1) else module_key = "exceptional" end count[module_key] = count[module_key] + 1 end return ToC_item('Languages in each module') .. '\n{| class="wikitable sortable"\n|+ ' .. 'Total number of codes in each language data module\n! module !! count\n' .. Map:new(count) :map(				function(count, module_key)					local module					if module_key == 2 then						module = "data/2"					elseif module_key == 3 then						return ('|-\n| data-sort-value="%d" | three-letter codes || %d\n')							:format(module_key, count)					elseif module_key:sub(1, 1) == "3" then						module = "data/3/" .. module_key:sub(2, 2)					else						module = "data/exceptional"					end					return ('|-\n| data-sort-value="%s" | Module:languages/%s || %d\n')						:format(tostring(module_key), module, count)				end) :sorted_concat .. '|}' end

function export.exceptional_code_formats(frame) local Map			= require "Module:User:Erutuon/lang_stuff/map" local language_data = Map:new(require "Module:languages/data/exceptional") local function add(t, k, v)		local subtable = t[k] if not subtable then subtable = {} t[k] = subtable end table.insert(subtable, v)	end local codes_by_format = setmetatable({}, { __index = Map:new{ add = add } }) for code in language_data:sorted_pairs do		local code_repr = code:gsub("[^-]", "a") codes_by_format:add(code_repr, code) end local function get_sort_value(code_repr) return code_repr:gsub(			"[^-]+",			string.len) end local function compare(code_repr1, code_repr2) return get_sort_value(code_repr1) < get_sort_value(code_repr2) end return ToC_item('Exceptional code formats') .. '\n{| class="wikitable sortable"\n|+ ' .. 'Code formats in Module:languages/data/exceptional\n! format !! count\n' .. codes_by_format :map(				function(codes, code_repr)					codes = Map:new(codes)					return ('|-\n|  || title="%s" | %d\n'):format( code_repr, codes:sorted_concat(", "), #codes)				end) :sorted_concat("", compare) .. '|}' end

function export.script_combinations(frame) local Array = require "Module:array" local Map = require "Module:User:Erutuon/lang_stuff/map" local language_data = require "Module:languages/data/all" local function add(t, k, lang_code) local subtable = t[k] if not subtable then subtable = {} t[k] = subtable end table.insert(subtable, lang_code) end local script_combinations = setmetatable({}, { __index = Map:new{ add = add } }) for code, data in pairs(language_data) do		if not (code == "und" or code == "mul") then local script_list = data[script_key] if script_list == nil then script_combinations:add("None", code) else script_combinations:add(array_from_comma_list_or_array(script_list):concat ", ", code) end end end local count = script_combinations:size local number_of_languages_in_tooltip = 80 local function display_language(language_code) return language_data[language_code][1] .. " (" .. language_code .. ")" end return ToC_item('Script combinations') .. [[

{| class="wikitable sortable" ! style="width: 8em;" | script list !! script count !! languages ]]		.. script_combinations :map(				function (languages, script_list)					-- Count alphabetic characters at beginning of string or after |.					local script_count					script_list, script_count = script_list:gsub("[^, ]+", link_script)					local language_count = #languages					local language_list = languages[2]						and Array(languages)							:sort							:slice(1, number_of_languages_in_tooltip)							:map(display_language)							:concat ", "						or display_language(languages[1])					if languages[number_of_languages_in_tooltip + 1] then						language_list = language_list .. ", ..."					end					return ('|-\n| %s || %d || title="%s" | %d\n')						:format(script_list, script_count, language_list, language_count)				end) :sorted_concat(				"",				function (script_list1, script_list2)					return script_list1:lower < script_list2:lower				end) .. '|}' end
 * + ]] .. count .. [[ script combinations (sorted alphabetically) and the number of languages that use them

function export.count_data_items(frame) local counts = require "Module:count":new local Map = require "Module:User:Erutuon/lang_stuff/map" for _, data in pairs(require "Module:languages/data/all") do		for k in pairs(data) do			counts[k] = counts[k] + 1 end counts.total = counts.total + 1 end local info = { "canonical name", "Wikidata item", "family" } return ToC_item('Data item census') .. [[

{| class="wikitable sortable" ! data item !! count ]]		.. Map:new(counts) :map(				function (count, data_key)					if data_key == "total" then						return ("| total languages || %d"):format(count)					elseif info[data_key] then						return ("|  (%s) || %d"):format(data_key, info[data_key], count)					else						return ("|   || %d"):format(data_key, count)					end				end) :sorted_concat(				"\n|-\n",				function (data_key1, data_key2)					if data_key1 == "total" then -- Ensure "total languages" shows at the top.						return true					else						return counts[data_key1] > counts[data_key2]					end				end) .. "\n|}" end
 * + Number of languages with each data item in their table

function export.no_scripts(frame) local Map = require "Module:User:Erutuon/lang_stuff/map" return ToC_item('Languages with no scripts') .. [[

{| class="wikitable sortable" ! code !! name !! module ]] .. Map:new(require "Module:languages/data/all") :filter(			function (data)				return data[script_key] == nil			end) :map(			function(data, code)				local name = data[1]				local article = data.wikipedia_article					or data.wikidata_item and mw.wikibase.sitelink(data.wikidata_item, 'enwiki')					or name:find("[Ll]anguage") and name					or name .. " language"				local module =					#code == 3 and "data/3/" .. code:sub(1, 1)					or #code == 2 and "data/2"					or "data/exceptional"				return ('| %s || %s || %s')					:format(code, article, name, module, module)			end) :sorted_concat("\n|-\n") .. "\n|}" end
 * + Languages with no scripts

function export.entry_name_replacements(frame) local Array = require "Module:array" local Map = require "Module:User:Erutuon/lang_stuff/map" local add_dotted_circle = require "Module:Unicode data".add_dotted_circle local function script_tag(script_code, str) return '' .. str .. ' '	end local function show_from_or_to(from_or_to, script_code) if not (from_or_to and from_or_to[1]) then return "" end return script_tag(script_code, from_or_to[2]			and add_dotted_circle(Array(from_or_to):concat ", ")			or from_or_to[1]) end local header = 'Languages with entry name replacements' return ToC_item(header) .. ([[

{| class="wikitable sortable" ! language !! script !! replacements ]]):gsub('header', header)		.. Map:new(require "Module:languages/data/all")			:filter( function (data) return type(data.entry_name) == "table" end)			:map( function (data, code) local output = Array for script, replacements in pairs(data.entry_name) do						if replacements.from then local script = require "Module:languages".getByCode(code) :findBestScript(Array(replacements.from):concat) local script_code = script:getCode output:insert(('|-\n| %s || %s || %s &darr; %s')								:format(data[1], code, link_script(script:getCode), show_from_or_to(replacements.from, script_code), show_from_or_to(replacements.to, script_code))) end end return output:concat("\n") end)			:sorted_concat "\n|-\n"		.. "\n|}" end
 * + header

function export.wikimedia_languages(frame) local fun = require "Module:fun" local languages_with_Wikimedia_code = setmetatable({},		{			__index = function (self, key)				local val = {}				self[key] = val				return val			end,		}) local language_data = require "Module:languages/data/all" for code, data in pairs(language_data) do		if data.wikimedia_codes then for wikimedia_code in data.wikimedia_codes:gmatch "[^%s,]+" do				table.insert(languages_with_Wikimedia_code[wikimedia_code],					code) end end end for _, codes in pairs(languages_with_Wikimedia_code) do		if codes[2] then table.sort(codes) end end return ToC_item("Wiktionary languages by Wikimedia language") .. [[

{| class="wikitable sortable" ! Wikimedia language !! Wiktionary language ]] .. table.concat(		fun.mapIter( function (Wiktionary_codes, Wikimedia_code) return ("|-\n| %s || %s"):format(					Wikimedia_code,					Wikimedia_code,					table.concat( fun.map(							function (code)								return ("%s (%s)"):format( code, language_data[code][1], -- canonical name language_data[code][1])							end,							Wiktionary_codes), ", "))			end, require "Module:table".sortedPairs(languages_with_Wikimedia_code)),		"\n") .. "\n|}" end
 * + Languages by their Wikimedia language

function export.ambiguous_names(frame) local Map = require "Module:User:Erutuon/lang stuff/map" local Array = require "Module:array" local language_data = require "Module:languages/data/all" local language_objects = require "Module:languages/cache" local name_to_object = {} setmetatable(name_to_object, {		__index = function (self, key)			local val = Array			self[key] = val			return val		end	}) for code, data in pairs(language_data) do		local canonical_name = data[1] name_to_object[canonical_name]:insert(code) if data.otherNames then for _, name in ipairs(data.otherNames) do				name_to_object[name]:insert(code) end end end return ToC_item("Languages with ambiguous canonical or non-canonical names") .. [[

{| class="wikitable sortable" ! name !! languages]] .. Map:new(name_to_object) :filter(function (languages) return #languages > 1 end) :map(				function (lang_codes, name)					local languages = Array(lang_codes)						:map(function (lang_code) return language_objects[lang_code] end)						:sort(function (lang1, lang2) return lang1:getCanonicalName < lang2:getCanonicalName end)						:map(function (lang) return (lang:getCanonicalName == name									and " %s "									or "%s ") :format(lang:getCategoryName, lang:getCanonicalName, lang:getCode) end)						:concat(", ")					return ("\n|-\n| %s || %s"):format(name, languages)				end) :sortedConcat .. "\n|}" end
 * + Canonical or non-canonical names that correspond to more than one language

function export.languages_with_prefix_of_another_language(frame) local Array = require "Module:array" local all_languages = require "Module:languages/data/all" local language_name_to_code = require "Module:languages/canonical names" local make_language_object = require "Module:languages".makeObject local function get_category_name(canonical_name) return canonical_name:find("[Ll]anguage$") and canonical_name or canonical_name .. " language" end local function make_category_link(canonical_name) return ("%s") :format(get_category_name(canonical_name), canonical_name) end return require "Module:User:Erutuon/lang stuff/map":new(all_languages) :map(function (data, code)				local name = data[1]				local words = mw.text.split(name, " ", true)				for i = #words - 1, 1, -1 do					local prefix = table.concat(words, " ", 1, i)					if language_name_to_code[prefix] then						data.prefixes = data.prefixes or {}						table.insert(data.prefixes, prefix)					end				end				return data			end) :filter(function (data, code)				return data.prefixes ~= nil			end) :map(function (data, code)				local name = data[1]				return ("* %s : %s")					:format(make_category_link(name), code, Array(data.prefixes) :map(make_category_link) :concat(", "))			end) :sorted_concat("\n") end

function export.languages_with_otherNames_field(frame) local Array = require "Module:array" local all_languages = require "Module:languages/data/all" local make_language_object = require "Module:languages".makeObject local function get_category_name(canonical_name) return canonical_name:find("[Ll]anguage$") and canonical_name or canonical_name .. " language" end local function make_category_link(canonical_name) return ("%s") :format(get_category_name(canonical_name), canonical_name) end local get_data_module = require "Module:languages".getDataModuleName local function module_link(code) local module = get_data_module(code) return "" .. module:gsub("languages/", "") .. "" end return class="wikitable sortable" ! name !! code !! otherNames !! module  .. require "Module:User:Erutuon/lang stuff/map":new(all_languages) :filter(function (data)				return data.otherNames ~= nil			end) :map(function (data, code)				local name = data[1]				return ("|-\n| %s ||  || %s || %s\n")					:format(make_category_link(name), code, Array(data.otherNames) :concat(", "), module_link(code))			end) :sorted_concat .. "|}" end
 * + Languages with  field in their language data

function export.languages_with_odd_translit_modules(frame) local Array = require "Module:array" local all_languages = require "Module:languages/data/all" local function get_category_name(canonical_name) return canonical_name:find("[Ll]anguage$") and canonical_name or canonical_name .. " language" end local function make_category_link(canonical_name) return ("%s") :format(get_category_name(canonical_name), canonical_name) end local caption = "Languages with odd transliteration modules (not beginning with language or script code)" return ToC_item(caption) .. [[

{| class="wikitable sortable"
 * + ]] .. caption .. [[

! name !! code !! script !! transliteration module ]] .. require "Module:User:Erutuon/lang stuff/map":new(all_languages) :filter(function (data, code)				return data.translit ~= nil			end) :map(function (data, code)				local output				for script_code, translit in pairs(type(data.translit) == "table" and data.translit or {data.translit}) do					if type(translit) == "string" and translit ~= nil					and not (translit == code .. "-translit" or translit == "translit-redirect" or (data[script_key] and array_from_comma_list_or_array(data[script_key])							:some(function(script_code) return translit:find("^.*" .. script_code:gsub("%-", "%%-") .. ".*%-translit$") end)) )							then						local name = data[1]						output = output or Array						output:insert(("|-\n| %s ||  || %s || Module:%s\n") :format(								make_category_link(name), code,								type(script_code) == "string"									and require "Module:scripts".getByCode(script_code):makeCategoryLink									or "",								translit))					end				end				return output and output:concat "\n" or ""			end) :sorted_concat .. "|}" end

function export.data_item_combinations(frame) local Array = require "Module:array" local Map = require "Module:User:Erutuon/lang stuff/map" local all_languages = {} local function transfer(module_subpage) for code, data in pairs(require("Module:languages/" .. module_subpage)) do			all_languages[code] = data end end transfer "data/2" for b = ("a"):byte, ("z"):byte do transfer("data/3/" .. string.char(b)) end transfer "data/exceptional" local data_key_order = Array.keys(Map:new(all_languages)		:values		:fold( function(set, data) for k in pairs(data) do					set[k] = true end return set end, {})):invert local function make_sortkey(data_key_list) local sortkey = Array for _, key in pairs(data_key_list) do			local order = assert(data_key_order[key]) local char = string.char(("a"):byte + order - 1) sortkey:insert(char) end return sortkey:concat end local data_keys = Map:new(all_languages) :map(			function (data)				return Array.keys(data)			end) :values local data_key_counts = data_keys :fold(			function (counts, data_items)				local key = data_items:concat ", "				counts[key] = (counts[key] or {})				counts[key].count = (counts[key].count or 0) + 1				counts[key].items = data_items				return counts			end,			Map:new) :values data_key_counts = data_key_counts:sort(		function(counts1, counts2)			return counts1.count > counts2.count		end) local caption = "Count of each combination of data items" return ToC_item(caption) .. [[

{| class="wikitable sortable"
 * + ]] .. caption .. [[

! combination of data keys !! number of languages ]] .. data_key_counts :map(			function(count)				return ('|-\n| data-sort-value="%s" | %s || %d\n'):format(make_sortkey(count.items), count.items:concat ", ", count.count)			end) :concat .. "|}" end

function export.show(frame) local out = {} for i, function_name in ipairs(frame.args) do local func = export[function_name] or error("No exported function " .. function_name) xpcall(function			if function_name == "number_of_scripts" then				out[i] = func(frame, 3)			else				out[i] = func(frame)			end		end, function (err)			mw.addWarning("Error running export." .. function_name .. ":\n" .. tostring(err) .. "\n" .. debug.traceback)		end) end return table.concat(out, "\n") end

return export