Module:User:Theknightwho/UCA

local char = string.char local concat = table.concat local floor = math.floor local insert = table.insert local ipairs = ipairs local split = require("Module:string utilities").split local sub = string.sub local tonumber = tonumber

--[==[local skipped_ranges = {} do local hex = require("hex").to_hex local udata = require("Module:User:Theknightwho/UnicodeData.txt") local last = 0 for line in udata:gmatch("[^\n]+") do		local cp = line:match("^%x+") local a = cp		if cp then cp = tonumber(cp, 16) if cp - last > 0x1000 then insert(skipped_ranges, {hex(last), hex(cp - 1)}) end last = cp		end end return skipped_ranges end]==]

local ducet = require("Module:User:Theknightwho/UCA/DUCET")

local implicit_ranges = { {0x3400, 0x4DBF}, -- CJK Unified Ideographs Extension A	{0x4E00, 0x9FFF}, -- CJK Unified Ideographs {0xAC00, 0xD7AF}, -- Hangul Syllables {0xD800, 0xF8FF}, -- Surrogates, Private Use Area {0x12550, 0x12F8F}, -- Unassigned {0x13460, 0x143FF}, -- Unassigned {0x14680, 0x167FF}, -- Unassigned {0x17000, 0x1AFEF}, -- Tangut, Tangut Components, Khitan Small Script, Tangut Supplement, Unassigned {0x1B170, 0x1BBFF}, -- Nushu, Unassigned {0x1BCB0, 0x1CEFF}, -- Unassigned {0x1DAB0, 0x1DEFF}, -- Unassigned {0x20000, 0x2A6DF}, -- CJK Unified Ideographs Extension B	{0x2A700, 0x2B73F}, -- CJK Unified Ideographs Extension C	{0x2B740, 0x2B81F}, -- CJK Unified Ideographs Extension D	{0x2B820, 0x2CEAF}, -- CJK Unified Ideographs Extension E	{0x2CEB0, 0x2EBEF}, -- CJK Unified Ideographs Extension F	{0x2EBF0, 0x2EE5F}, -- CJK Unified Ideographs Extension I	{0x30000, 0x3134F}, -- CJK Unified Ideographs Extension G	{0x31350, 0x323AF}, -- CJK Unified Ideographs Extension H }

local export = {}

do local escapes = { [0x07] = "\\a", [0x08] = "\\b", [0x09] = "\\t", [0x0A] = "\\n", [0x0B] = "\\v", [0x0C] = "\\f", [0x0D] = "\\r", [0x22] = "\\\"", [0x5C] = "\\\\"	}	local ranges	local function base_256(w)		w = tonumber(w, 16)		return char(w / 0x100) .. char(w % 0x100)	end	local function process_line(line, plane, output)		-- Get the codepoint(s), and return if not found.		local cp = line:match("^[%x ]+%f[ ]")		if not cp then			return		end		-- If there is more than one, (TODO)		cp = split(cp, " ")		if #cp > 1 then			-- TODO			return		end		-- Check this is the correct plane, and return if not. Planes range from 0x0 to 0x10 (17 in total), and each has 0x10000 characters, from U+(X)0000 to U+(X)FFFF.		cp = tonumber(cp[1], 16)		if floor(cp / 0x10000) ~= plane then			return		end		-- Normalize codepoint by removing the plane.		cp = cp % 0x10000		-- Get the weights and convert each the first two weights (ranging from 0x0000 to 0xFFFF) to 2-digit base-256 and store each digit as the corresponding character (e.g. 0xFFFD is "\255" and "\253"). The final weight can be stored as 1 digit, because it only ranges from 0x0000 to 0x001F. local weights = {} for var, w1, w2, w3 in line:gmatch("%[([*.])(%x+)%.(%x+)%.(%x+)%]") do insert(weights, var .. base_256(w1) .. base_256(w2) .. char(tonumber(w3, 16))) end -- If there is more than one set, (TODO) if #weights > 1 then -- TODO return else weights = weights[1] end for i = 1, #weights do			output[cp * 6 + i] = sub(weights, i, i)		end end function export.weights(plane) if not plane then error("Please enter a plane.") end -- Collate the implicit ranges for this plane (if any). ranges = {} for _, range in ipairs(implicit_ranges) do			if floor(range[1] / 0x10000) == plane then insert(ranges, range) end end local output = {} for line in ducet:gmatch("[^\n]+") do			process_line(line, plane, output) end -- Fill in any blanks with zeroes. for i = 1, 0x60000 do			output[i] = output[i] or "\0" end for i = #output, 1, -1 do			local b = output[i]:byte if b > 0x7E then output[i] = "\\" .. b			elseif b 0x0D and b < 0x20 then local nxt = output[i + 1] if nxt and nxt:match("^%d$") then b = ("%03d"):format(b) end output[i] = "\\" .. b			else output[i] = escapes[b] or output[i] end end return concat(output) end end

return export