Module:Unicode data/patterns

local export = {} local Array = require "Module:array"

local function numeric_character_reference(code_point) return ("&#x%04X;"):format(code_point) end

function export.all_ranges_per_value(data_module) local value_to_ranges = setmetatable({}, {		__index = function(self, key)			local value = Array			self[key] = value			return value		end,	})

for code_point, value in pairs(data_module.singles) do		value_to_ranges[value]:insert { code_point, code_point } end

for _, range in ipairs(data_module.ranges) do		local low, high, value = unpack(range) value_to_ranges[value]:insert { low, high } end return value_to_ranges end

function export.ranges_per_value(data_module, value_to_find) local ranges = Array

for code_point, value in pairs(data_module.singles) do		if value == value_to_find then ranges:insert { code_point, code_point } end end

for _, range in ipairs(data_module.ranges) do		local low, high, value = unpack(range) if value == value_to_find then ranges:insert { low, high } end end return ranges end

local function sort_ranges(ranges) table.sort(		ranges,		function (a, b)			return a[1] < b[1]		end) end

-- Makes a pattern suitable to put inside [...] or [^...] -- in a Lua pattern or regular expression. local function make_pattern(ranges, char_ref) local output = Array for _, range in ipairs(ranges) do		if char_ref then output:insert(numeric_character_reference(range[1])) else output:insert(mw.ustring.char(range[1])) end if range[1] ~= range[2] then output:insert "-" if char_ref then output:insert(numeric_character_reference(range[2])) else output:insert(mw.ustring.char(range[2])) end end end return output:concat end

-- Assumes ranges are sorted and that only one range has bad characters. -- Treats all characters U+0000-U+001F as invalid in wikitext, but only some are. local function sanitize_ranges(ranges) for i, range in ipairs(ranges) do		if 0 <= range[1] and range[1] <= 0x1F then if 0 <= range[2] and range[2] <= 0x1F then table.remove(ranges, i)				break else range[1] = 0x20 end end end end

function export.make_pattern(frame) local module_name = frame.args.module if not module_name then error("Provide name of submodule of Module:Unicode data in |module= parameter.") end local value = frame.args.value if not value then error("Provide value to search for in |value= parameter.") end local ranges = export.ranges_per_value(require("Module:Unicode data/" .. module_name), value) sanitize_ranges(ranges) return make_pattern(ranges, false) end

function export.show_all_patterns(frame) local module_name = frame.args.module if not module_name then error("Provide name of submodule of Module:Unicode data in |module=.") end local value_to_ranges = export.all_ranges_per_value(require("Module:Unicode data/" .. module_name)) for _, ranges in pairs(value_to_ranges) do		sort_ranges(ranges) end local output = Array for value, ranges in require "Module:table".sortedPairs(value_to_ranges) do output:insert("\n* " .. value .. ": ") output:insert " " end return output:concat end

return export