Module:User:Erutuon/split language data modules

local export = {}

-- Version of deepcopy from Module:table that doesn't preserve -- references to the same table. If tables are recursive, this will cause -- a stack overflow! local function deepcopy(orig, level) if type(orig) == 'table' then local copy = {} for orig_key, orig_value in pairs(orig) do			copy[deepcopy(orig_key)] = deepcopy(orig_value) end return copy else -- number, string, boolean, etc return orig end end

-- Load data module containing multiple language data tables. -- mw.text.jsonEncode converts integer keys to strings if the data table -- contains any string keys. Have to convert them back. function export.load_json_language_data(json) local data = mw.text.jsonDecode(json) local fixed_data = {} for code, old_table in pairs(data) do		-- Key 1 (canonical name) is always present in the original -- language data, so key "1" is always present in old_table -- if it was decoded from a JSON object. if old_table["1"] then local new_table = {} for k, v in pairs(old_table) do				k = tonumber(k) or k				new_table[k] = v			end fixed_data[code] = new_table else fixed_data[code] = old_table end end return fixed_data end

function export.split(key_maker) local tables_by_key = {} for code, data in pairs(require "Module:languages/data/all") do		local key = key_maker(code, data) local subtable = tables_by_key[key] if not subtable then subtable = {} tables_by_key[key] = subtable end -- mw.text.jsonEncode refuses to directly encode the language data -- because of "circular references", which probably means -- the scripts fields that refer to the same tables containing -- {"Latn"}, {"Cyrl"}, {"Arab"}. -- Sequence tables (only canonical name, Wikidata item, and family) -- are encoded as JSON arrays, others as objects. -- Thus the number-indexed values will be under either string or -- number fields. Thus keys should be processed using -- tonumber(key) or key -- or the equivalent on the other side. subtable[code] = deepcopy(data) end return mw.text.jsonEncode(tables_by_key) end

function export.split_by_two_letter_prefix(frame) return export.split(		function(code)			return code:sub(1, 2)		end) end

function export.show(frame) local all_languages = export.split_by_two_letter_prefix return "length: " .. #all_languages .. "\n\n" .. all_languages end

return export