Module:User:Erutuon/script recognition/make

local export = {}

local excluded_scripts = { ["Latf"] = true; ["Latg"] = true; ["Hans"] = true; ["Hant"] = true; ["Kore"] = true; ["Jpan"] = true; ["Zyyy"] = true; }

local function get_script_ranges(module_text) local script_to_ranges = {} for index, data_literal in module_text:gmatch("m(%b[])%s*=%s*process_ranges(%b{})") do		local script_code = index:sub(3, -3) if not (script_code:find("-", 1, true) or excluded_scripts[script_code]) then local range_literal = data_literal:match("ranges%s*=%s*(%b{})") if range_literal then -- mw.log(script_code, range_literal) local ranges = {} local hex_iter = range_literal:gmatch("0x(%x+)") local function iter_code_point local hex = hex_iter if not hex then return end return tonumber(hex, 16) end while true do					local a, b = iter_code_point, iter_code_point if a and b then table.insert(ranges, {a, b}) else break end end -- mw.logObject(ranges, script_code) script_to_ranges[script_code] = ranges else mw.log("no range_literal for " .. script_code) end end end return script_to_ranges end

function export.make_sorted_range_script_list(script_to_ranges) local list = {} local i = 0 for script, ranges in pairs(script_to_ranges) do		for _, range in ipairs(ranges) do			i = i + 1 local a, b = unpack(range) list[i] = { a, b, script } end end table.sort(list, function(range1, range2)		if range1[1] == range2[1] then			return range1[2] < range2[2]		else			return range1[1] < range2[1]		end	end) return list end

function export.merge_sorted_range_script_list(orig_ranges) local ranges = mw.clone(orig_ranges) local i = 0 while true do		i = i + 1 local range1, range2 = ranges[i], ranges[i + 1] if not range2 then break end assert(range1[1] <= range2[1]) if range1[1] == range2[1] then assert(range1[2] <= range2[2]) end -- Types of intersection: -- {1, 2, a}, {1, 2, b} -> {1, 2, a, b} (should not happen because we try to eliminate scripts with identical ranges) -- {1, 2, a}, {1, 3, b} -> {1, 2, a, b}, {3, 3, b}		-- {1, 3, a}, {2, 2, b} -> {1, 1, a}, {2, 2, b}, {3, 3, a}		-- {1, 3, a}, {2, 3, b} -> {1, 1, a}, {2, 3, a, b}		-- {1, 3, a}, {2, 4, b} -> {1, 1, a}, {2, 3, a, b}, {4, 4, b}		if range2[1] <= range1[2] then local intersection = { range2[1], math.min(range1[2], range2[2]), unpack(range1, 3) } for j = 3, #range2 do				table.insert(intersection, range2[j]) end if range1[1] < range2[1] then local below_range2 = { range1[1], range2[1] - 1, unpack(range1, 3) } ranges[i] = below_range2 table.insert(ranges, i + 1, intersection) i = i + 1 else ranges[i] = intersection end end if range2[2] < range1[2] then local above_range2 = { range2[2] + 1, range1[2], unpack(range1, 3) } table.insert(ranges, i + 1, above_range2) i = i + 1 end end return ranges end

function export.log_ranges(ranges, log_function) for _, range in ipairs(ranges) do		local a, b = unpack(range) local s = ("U+%04X-%04X: "):format(a, b)		for i = 3, #range do			if i > 3 then s = s .. ", "			end s = s .. range[i] end log_function(s) end end

function export.make_script_recognition_data local script_to_ranges = get_script_ranges(mw.title.new("Module:scripts/data"):getContent) local range_list = export.make_sorted_range_script_list(script_to_ranges) return export.merge_sorted_range_script_list(range_list) end

function export.log_script_recognition_data return export.log_ranges(export.make_script_recognition_data, mw.log) end

function export.show local output = require "Module:array" export.log_ranges(export.make_script_recognition_data, function(val) output:insert("*" .. val) end) return output:concat("\n") end

return export