Module:User:Surjection/unpacker

local export = {}local export = {} --local data = "\001fi\004foo\019bar\001"

--description of format: --\001TOP_LEVEL_KEY\004 -> used to search a top level key --value is expected to be a table. table format: --		a key followed by \001, \003, \016-\020. --		if key is empty, the key is taken as the next available numeric index --				if the key is only \002, it is taken as the empty string --				if it starts with \002, the rest of the key is parsed as a number. --				if it starts with \005, the character code of the next character is used as a numeric index into common_keys --				otherwise it is taken as a string key. --		\001 means "end of all tables, a top-level key is up next" --		\003 means "end of this table only" --		\016-\020 are data types. --			\016 is NIL (followed by nothing), --			\017 is BOOL (followed by one byte, 0 = false, anything else is true) --			\018 is NUMBER. anything until a \001 (end of top-level table), \002 (next index), \003 (end of this table), \005 (compressed key), \016-\020 (data type), \031 (end of value, if ambiguous otherwise i.e. if a string key follows) is captured and converted into a number. --			\019 is STRING. anything until a \001 (end of top-level table), \002 (next index), \003 (end of this table), \005 (compressed key), \016-\020 (data type), \031 (end of value, if ambiguous otherwise i.e. if a string key follows) is captured and stored as a string. --			\020 is TABLE and starts a nested table. go back to step 1, expecting a key. \003 ends a nested table, \001 ends all tables. --				no \031 should follow, we know when a table ends and don't need a special terminator. --\001 -> end of top-level keys --in theory, we could have an escape code to allow these characters in keys and strings. that is not implemented, because there is currently no need.

local common_keys = { "from", "remove_diacritics", "type", "ancestors", "wikimedia_codes", "wikipedia_article", "translit", "link_tr", "display_text", "entry_name", "sort_key", "dotted_dotless_i", "standardChars" } local data = "\001fi\004\019Finnish\0181412\019urj-fin\019Latn\005\010\020\005\001\020\019’\003to\020\019'\003\005\002\019ˣ\003\005\011\020\005\001\020\019ø\019æ\019œ\019ß\003to\020\019o\019ae\019oe\019ss\003\005\002\019̧̀́̂̃̋̌':\003\005\003\019regular\005\009\020to\020\019’\003\005\001\020\019'\003\003\005\013\019AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö ',%-–…∅\001"

local function unpack_row(packed, index, common_keys) local result = {} local packed_len = packed:len while index <= packed_len do		-- find key local table_key_prefix = packed:byte(index) local table_key, table_value_type, table_value if table_key_prefix == 5 then table_key = common_keys[packed:byte(index + 1)] index = index + 3 else table_key = packed:match("([^\001\003\016-\020]*)", index) if not table_key then break end index = index + table_key:len + 1 end table_value_type = packed:byte(index - 1) - 16 if table_value_type < 0 then			-- end of table if table_value_type == -15 then		-- support nested end of table: \001 is end of top-level value - do not consume it				index = index - 1 end break end if table_key:len == 0 then -- next number table_key = #result + 1 elseif table_key:byte == 2 then -- \002 is a numeric key table_key = tonumber(table_key:sub(2)) or "" end if table_value_type == 0 then			-- \016 NIL table_value = nil elseif table_value_type == 1 then		-- \017 BOOL result[table_key] = packed:byte(index) > 0 index = index + 1 elseif table_value_type == 4 then		-- \020 TABLE table_value, index = unpack_row(packed, index, common_keys) else local capture = packed:match("([^\001-\003\005\016-\020\031]*)", index) if table_value_type == 2 then		-- \018 NUMBER table_value = tonumber(capture) else--if table_value_type == 3 then	-- \019 STRING table_value = capture end index = index + capture:len if packed:byte(index) == 31 then		-- skip value separator index = index + 1 end end result[table_key] = table_value end return result, index end

function export.find_key(key) local regex = "\001" .. key .. "\004"	local index, end_index = data:find(regex) if not index then error("Key not found") end return (unpack_row(data, end_index + 1, common_keys)) end

return export