Module:User:Theknightwho/charsets

local concat = table.concat local dec_to_hex = require("Module:utilities").dec_to_hex local format = string.format local insert = table.insert local ipairs = ipairs local pairs = pairs local remove = table.remove local rep = string.rep local sort = table.sort local tonumber = tonumber local tostring = tostring local type = type

local data = require("Module:User:Theknightwho/UnicodeData.txt")

local function sorter(a, b)	local ta, tb = type(a), type(b) if ta ~= tb then return ta < tb	end if ta == "string" or ta == "number" then return a < b	end if ta == "boolean" then return tostring(a) < tostring(b) end return false -- Incomparable end

local function dump_charset(object) local doneTable = {} local doneObj = {} local ct = {} local function _dumpObject(object, indent, expandTable) local tp = type(object) if tp == "nil" or tp == "boolean" then return tostring(object) elseif tp == "number" then return "0x" .. dec_to_hex(object) elseif tp == "string" then return format("%q", object) elseif tp == "table" then if not doneObj[object] then local s = tostring(object) if s == "table" then ct[tp] = (ct[tp] or 0) + 1 doneObj[object] = "table#" .. ct[tp] else doneObj[object] = s					doneTable[object] = true end end if doneTable[object] or not expandTable then return doneObj[object] end doneTable[object] = true

local ret = {doneObj[object], " {\n"} local indentString = " "

local doneKeys = {} for key, value in ipairs(object) do				doneKeys[key] = true ret[#ret + 1] = rep(indentString, indent + 2) ret[#ret + 1] = _dumpObject(value, indent + 2, true) ret[#ret + 1] = ",\n" end local keys = {} for key in pairs(object) do				if not doneKeys[key] then keys[#keys + 1] = key end end sort(keys, sorter) for i = 1, #keys do				local key = keys[i] ret[#ret + 1] = rep(indentString, indent + 2) ret[#ret + 1] = "[" ret[#ret + 1] = _dumpObject(key, indent + 3, false) ret[#ret + 1] = "] = " ret[#ret + 1] = _dumpObject(object[key], indent + 2, true) ret[#ret + 1] = ",\n" end ret[#ret + 1] = rep(indentString, indent) ret[#ret + 1] = "}" return concat(ret) else if not doneObj[object] then ct[tp] = (ct[tp] or 0) + 1 doneObj[object] = tostring(object) .. "#" .. ct[tp] end return doneObj[object] end end return _dumpObject(object, 0, true) end

local cutoff = 96

local function get_chars(str) local chars, ranges = {} for cp, name in data:gmatch("%f[^%z\n]([^;]*);([^;]*);".. str) do		if name:match(", First>$") or name:match(", Last>$") then ranges = ranges or {} insert(ranges, tonumber(cp, 16)) else insert(chars, tonumber(cp, 16)) end end local i, v, start, diff = 1 repeat v = chars[i] start = start or v		if chars[i + 1] ~= v + 1 then diff = chars[i] - start if diff > cutoff then for _ = 1, diff do					remove(chars, i)					i = i - 1 end ranges = ranges or {} local j = 1 while ranges[j] and start > ranges[j] do					j = j + 2 end insert(ranges, j, start) insert(ranges, j + 1, v)			end start = nil end i = i + 1 until i == #chars local ret = {ranges = ranges} for _, v in ipairs(chars) do		ret[v] = true end return ret end

return dump_charset(get_chars("L"))