Module:User:Theknightwho/sort

local export = {}

local append = require("Module:table").append local codepoint = mw.ustring.codepoint local concat = table.concat local explode_utf8 = require("Module:string utilities").explode_utf8 local floor = math.floor local format = string.format local insert = table.insert local pattern_escape = require("Module:utilities").pattern_escape

local data = require("Module:User:Theknightwho/sortkey/serialized") local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"

function export.sortkey(text) if type(text) == "table" then text = text.term.term end local chars = explode_utf8(text) local function table_insert(t, v)		if v ~= 0 then insert(t, v)		end end local primary = {} local secondary = {} local tertiary = {} local function insert_weights(w1, w2, w3) table_insert(primary, w1) table_insert(secondary, w2) table_insert(tertiary, w3) end for _, char in ipairs(chars) do		local cp = codepoint(char) if (cp >= 0x17000 and cp <= 0x18AFF) or (cp >= 0x18D00 and cp <= 0x18D8F) then insert_weights(0xFB00, 0x20, 2) insert_weights((cp - 0x17000) % 0x8000 + 0x8000, 0, 0) elseif cp >= 0x1B170 and cp <= 0x1B2FF then insert_weights(0xFB01, 0x20, 2) insert_weights((cp - 0x1B170) % 0x8000 + 0x8000, 0, 0) elseif cp >= 0x18B00 and cp <= 0x18CFF then insert_weights(0xFB02, 0x20, 2) insert_weights((cp - 0x18B00) % 0x8000 + 0x8000, 0, 0) elseif (cp >= 0x4E00 and cp <= 0x9FFF) or (cp >= 0xF900 and cp <= 0xFAFF) then insert_weights(0xFB40 + floor(cp / 0x8000), 0x20, 2) insert_weights(cp % 0x8000 + 0x8000, 0, 0) elseif (cp >= 0x3400 and cp <= 0x4DBF) or (cp >= 0x20000 and cp <= 0x2A6DF) or (cp >= 0x2A700 and cp <= 0x2EBEF) or (cp >= 0x30000 and cp <= 0x323AF) then insert_weights(0xFB80 + floor(cp / 0x8000), 0x20, 2) insert_weights(cp % 0x8000 + 0x8000, 0, 0) else if char == "\0" then char = "%z" end local char_data = data:match("\255(" .. pattern_escape(char) .. "[^\255]+)\255")			if not char_data then insert_weights(0xFBC0 + floor(cp / 0x8000), 0x20, 2) insert_weights(cp % 0x8000 + 0x8000, 0, 0) else for typ, w1, w2, w3 in char_data:gmatch("([\253\254])(" .. UTF8_char .. ")(" .. UTF8_char .. ")(" .. UTF8_char .. ")") do					insert_weights(codepoint(w1), codepoint(w2), codepoint(w3)) end end end end local key = append(primary, secondary, tertiary) for k, v in ipairs(key) do		key[k] = format("%04x", v)	end return concat(key) end

function export.sort(t) local max = math.max local memo = {} table.sort(t, function(k1, k2)		memo[k1] = memo[k1] or export.sortkey(k1)		memo[k2] = memo[k2] or export.sortkey(k2)		return memo[k1] < memo[k2]	end) return t end

return export