Module:User:Theknightwho/pattern simplifier

local byte = string.byte local concat = table.concat local gmatch = string.gmatch local match = string.match local setmetatable = setmetatable local sub = string.sub

local Parser, Node = require("Module:parser").new

local Pattern = Node:new_class("pattern")

function Pattern:new(this) return concat(this) end

function Parser:read(i, j)	local head, i = self.head, i or 0 return sub(self.text, head + i, head + (j or i)) end

function Parser:advance(n) self.head = self.head + (n or self[-1].step or 1) end

function Parser:consume local text, head, layer = self.text, self.head, self[-1] local loc, this = match(text, layer.pattern, head) if not loc then self:emit(sub(text, head)) return self:pop elseif loc ~= head then self:emit(sub(text, head, loc - 1)) self.head = loc end layer.step = #this return layer.handler(self, this) end

do local EscapeHandler = {} function EscapeHandler:__call(parser, nxt) local nxt = parser:read(1) return (self[nxt] or self[false])(parser, nxt) end function Parser:escape_handler(t, bad_groups) for group in gmatch(bad_groups, ".") do			t[group] = self.fail_route end return setmetatable(t, EscapeHandler) end end

local main_handler local capture_group

function main_handler(self, ...) main_handler = self:switch(main_handler, {		["%"] = self:escape_handler({ ["b"] = function(self) local chars = self:read(2, 3) if not match(chars, "^[^\128-\255]*$") then return self:fail_route end self:emit("%b" .. chars) self[-1].step = 4 end, ["Z"] = function(self) local nxt2 = self:read(2) if nxt2 == "?" then return self:fail_route end if nxt2 == "*" or nxt2 == "+" or nxt2 == "-" then self[-1].step = 3 self:emit("%Z" .. nxt2) else self[-1].step = 2 self:emit("[\1-\127\194-\244][\128-\191]*") end end, [false] = function(self, nxt) self:emit("%" .. nxt) self[-1].step = 2 end }, "acdlpsuwxACDLPSUWX"),		["("] = function(self) if self:read(1) == ")" then				return self:fail_route			end			local captures = self.captures			if captures == 32 then				return self:fail_route			end			self.captures = captures + 1			self:emit("(") end, ["."] = function(self) local nxt = self:read(1) if nxt == "?" then return self:fail_route end if nxt == "*" or nxt == "+" or nxt == "-" then self[-1].step = 2 self:emit("." .. nxt) else self:emit("[^\128-\191][\128-\191]*") end end, ["["] = function(self) if self:read(1) == "^" then return self:fail_route end self:emit("[") self[-1].handler = capture_group self[-1].pattern = "([%%%]\194-\244][\128-\191]*)" end, [""] = Parser.pop, [false] = function(self, this) local this_len = #this local nxt = self:read(this_len) if nxt == "*" or nxt == "-" or nxt == "?" then return self:fail_route elseif nxt ~= "+" then return self:emit(this) elseif this_len > 2 then return self_fail_route end self:emit(sub(this, 1, 1) .. "[" .. this .. "]*" .. sub(this, 2, 2)) self[-1].step = 3 end })	return main_handler(self, ...) end

function capture_group(self, ...) capture_group = self:switch(capture_group, {		["%"] = self:escape_handler({ [false] = function(self, nxt) self:emit("%" .. nxt) self[-1].step = 2 end }, "acdlpsuwxACDLPSUWXZ"),		["]"] = function(self)			self:emit("]")			self[-1].handler = main_handler			self[-1].pattern = "([%%(.[\194-\244][\128-\191]*)" end, [""] = Parser.fail_route, [false] = Parser.fail_route, })	return capture_group(self, ...) end

function Parser:do_parse local layer = self[-1] layer.handler, layer.pattern = main_handler, "([%%(.[\194-\244][\128-\191]*)"	self.captures = 0 end

local memo = {}

return function(pattern) local result = memo[pattern] if result ~= nil then return result elseif match(pattern, "^[^%%*+%-.?[%]]*$") then result = pattern else local success success, result = Parser:parse{ text = pattern, node = {Pattern}, route = {"do_parse"}, allow_fail = true }		result = success and result end memo[pattern] = result return result end