Module:User:Theknightwho/template parser/new

local concat = table.concat local gmatch = string.gmatch local insert = table.insert local lower = string.lower local match = string.match local rawset = rawset local sub = string.sub local tonumber = tonumber local tostring = tostring local type = type local ulower = string.ulower

local m_parser = require("Module:User:Theknightwho/test6") local data = mw.loadData("Module:template parser/data") local frame = mw.getCurrentFrame

local export = {}

-- -- Helper functions --

local function is_space(this) return this == " " or		this == "\t" or		this == "\n" or		this == "\v" or		this == "\f" or		this == "\r" end

-- Trims ASCII spacing characters. -- Note: loops + sub make this much faster than the equivalent string patterns. local function trim(str) local n	for i = 1, #str do		if not is_space(sub(str, i, i)) then n = i			break end end if not n then return "" end for i = #str, n, -1 do		if not is_space(sub(str, i, i)) then return sub(str, n, i)		end end end

-- -- Nodes --

local Node = m_parser.Node local Wikitext = m_parser.Wikitext

local Tag = Node:new("tag")

function Tag:__tostring local open_tag = {"<", self.name} if self.ignored then return "" elseif self.attributes then for attr, value in pairs(self.attributes) do insert(open_tag, " " .. attr .. "=\"" .. value .. "\"") end end if self.self_closing then insert(open_tag, "/>") return concat(open_tag) end insert(open_tag, ">") return concat(open_tag) .. concat(self) .. "" end

local Argument = Node:new("argument")

function Argument:__tostring if self[2] then local output, i = {"}")		return concat(output)	elseif self[1] then		return ""	else		return "argument"	end end

function Argument:next self.i = self.i + 1 if self.i <= 2 then return self[self.i]	end end

local Parameter = Node:new("parameter")

function Parameter:__tostring if self.key then return tostring(self.key) .. "=" .. Node.__tostring(self) end return Node.__tostring(self) end

local Template = Node:new("template")

function Template:__tostring if self[2] then local output, n = {"{{", tostring(self[1])}, 2 if self.colon then insert(output, ":") insert(output, tostring(self[3])) n = 3 end for i = n, #self do			insert(output, "|") insert(output, tostring(self[i])) end insert(output, "}}") return concat(output) elseif self[1] then return "" else return "template" end end

-- Explicit parameter keys are converted to numbers if: -- (a) They are integers, with no decimals (2.0) or leading zeroes (02). -- (b) They are <= 2^53 and >= -2^53. -- Note: Lua integers are only accurate to 2^53 - 1, so 2^53 and -2^53 have to be specifically checked for since Lua will evaluate 2^53 as equal to 2^53 + 1. function Template:get_params local params, implicit, key, value = {}, 0 for i = 2, self.len do		if self[i].key then key = trim(tostring(self[i].key)) if match(key, "^-?[1-9]%d*$") or key == "0" then local num = tonumber(key) key = (					num <= 9007199254740991 and num >= -9007199254740991 or					key == "9007199254740992" or					key == "-9007199254740992"				) and num or key end value = trim(Node.__tostring(self[i])) else implicit = implicit + 1 key = implicit value = tostring(self[i]) end params[key] = value end return params end

-- -- Parser --

local Parser = m_parser.Parser

-- Extension to the `new` method which also sets raw_head. do local _new = Parser.new function Parser:new(text) local parser = _new(self, text) rawset(parser, "raw_head", 1) return parser end end

-- Modified `advance` method which keeps track of raw_head. function Parser:advance(n) if n == 0 then return elseif not n or n > 0 then for _ = 1, n or 1 do			self.raw_head = self.raw_head + #(self.text[self.head] or "") self.head = self.head + 1 end else for _ = 1, -n do			self.head = self.head - 1 self.raw_head = self.raw_head - #(self.text[self.head] or "") end end end

-- Extension to the `get` method which also resets raw_head if a bad route is returned. do local _get = Parser.get function Parser:get(handler, first, ...) local raw_head = self.raw_head local layer = _get(self, handler, first, ...) if layer == self.n.bad_route then self.raw_head = raw_head end return layer end end

-- Argument. -- First value is the argument name. -- Second value is the argument's default value. -- Any additional values are ignored: "" is argument "a" with default value "b" (*not* "b|c"). do local function handle_argument(self, this) if this == "|" then self:emit(Wikitext:new(self:pop_sublayer)) self:push_sublayer elseif this == "}" and self:read(1) == "}" then if self:read(2) == "}" then self:emit(Wikitext:new(self:pop_sublayer)) self:advance(2) return self:pop end return self:fail_route elseif this == "" then return self:fail_route else return self:block_handler(this) end end

function Parser:argument local argument = self:get(handle_argument, self.push_sublayer) if argument == self.n.bad_route then self:template else if self.n.len == self.n.emit_pos then local inner = self:remove if type(argument[1]) == "table" then insert(argument[1], 1, inner) else argument[1] = Wikitext:new{inner, argument[1]} end end self.n.braces = self.n.braces - 3 self.n.brace_head = self.n.brace_head - 3 argument.pos = self.n.brace_head self:emit(Argument:new(argument)) end end end

-- Template. do local handle_name local handle_parameter function handle_name(self, this) if this == "|" then self:emit(Wikitext:new(self:pop_sublayer)) self.n.handler = handle_parameter self:push_sublayer elseif this == "}" and self:read(1) == "}" then self:emit(Wikitext:new(self:pop_sublayer)) self:advance return self:pop elseif this == "" then return self:fail_route else return self:block_handler(this) end end function handle_parameter(self, this) if this == "=" and not self.n.key and (			self:read(1) ~= "=" or			self:read(-1) ~= "\n" and self:read(-1) ~= ""		) then local key = self:pop_sublayer self:push_sublayer self.n.key = Wikitext:new(key) elseif this == "|" then self:emit(Parameter:new(self:pop_sublayer)) self:push_sublayer elseif this == "}" and self:read(1) == "}" then self:emit(Parameter:new(self:pop_sublayer)) self:advance return self:pop elseif this == "" then return self:fail_route else return self:block_handler(this) end end function Parser:template local template = self:get(handle_name, self.push_sublayer) if template == self.n.bad_route then self:advance(-1) for _ = 1, self.n.braces do				self:emit(self.n.emit_pos, "{") end self.n.braces = 0 else if self.n.len == self.n.emit_pos then local inner = self:remove if type(template[1]) == "table" then insert(template[1], 1, inner) else template[1] = Wikitext:new{inner, template[1]} end end self.n.braces = self.n.braces - 2 self.n.brace_head = self.n.brace_head - 2 template.pos = self.n.brace_head self:emit(Template:new(template)) end end function Parser:template_or_argument self:advance(2) self.n.braces = 2 while self:read == "{" do			self:advance self.n.braces = self.n.braces + 1 end self.n.emit_pos = self.n.len + 1 self.n.brace_head = self.raw_head repeat if self.n.braces == 1 then self:emit(self.n.emit_pos, "{") break elseif self.n.braces == 2 then self:template else self:argument end self:advance until self.n.braces == 0 self:advance(-1) end end

-- Text not in. function Parser:not_onlyinclude local this, nxt, nxt2 = self:read, self:read(1), self:read(2) while not (		this == "" or		this == "<" and nxt == "onlyinclude" and nxt2 == ">"	) do		self:advance this, nxt, nxt2 = nxt, nxt2, self:read(2) end self:advance(2) end

-- Tag. do local function is_ignored_tag(self, check) return self.transcluded and check == "includeonly" or			not self.transcluded and (				check == "noinclude" or				check == "onlyinclude"			) end -- Handlers. local handle_start local handle_ignored_tag_start local handle_ignored_tag local handle_after_tag_name local handle_before_attribute_name local handle_attribute_name local handle_before_attribute_value local handle_quoted_attribute_value local handle_unquoted_attribute_value local handle_after_attribute_value local handle_tag_block local handle_end function handle_start(self, this) if this == "/" then local check = lower(self:read(1)) if is_ignored_tag(self, check) then self.n.name = check self.n.ignored = true self:advance self.n.handler = handle_ignored_tag_start return end return self:fail_route end local check = lower(this) if is_ignored_tag(self, check) then self.n.name = check self.n.ignored = true self.n.handler = handle_ignored_tag_start elseif (			check == "noinclude" and self.transcluded or			check == "includeonly" and not self.transcluded		) then self.n.name = check self.n.ignored = true self.n.handler = handle_after_tag_name elseif data[check] then self.n.name = check self.n.handler = handle_after_tag_name else return self:fail_route end end function handle_ignored_tag_start(self, this) if this == ">" then return self:pop elseif this == "/" and self:read(1) == ">" then self.n.self_closing = true self:advance return self:pop elseif is_space(this) then self.n.handler = handle_ignored_tag else return self:fail_route end end function handle_ignored_tag(self, this) if this == ">" then return self:pop elseif this == "" then return self:fail_route end end function handle_after_tag_name(self, this) if this == "/" and self:read(1) == ">" then self.n.self_closing = true self:advance return self:pop elseif this == ">" then self.n.handler = handle_tag_block elseif is_space(this) then self.n.handler = handle_before_attribute_name else return self:fail_route end end function handle_before_attribute_name(self, this) if this == "/" and self:read(1) == ">" then self.n.self_closing = true self:advance return self:pop elseif this == ">" then self.n.handler = handle_tag_block elseif this ~= "/" and not is_space(this) then self:push_sublayer(handle_attribute_name) return self:consume elseif this == "" then return self:fail_route end end function handle_attribute_name(self, this) if this == "/" or this == ">" or is_space(this) then self:pop_sublayer return self:consume elseif this == "=" then local attr_name = ulower(concat(self:pop_sublayer)) self.n.attr_name = attr_name self.n.handler = handle_before_attribute_value elseif this == "" then return self:fail_route else self:emit(this) end end function handle_before_attribute_value(self, this) if this == "/" or this == ">" then handle_after_attribute_value(self, "") return self:consume elseif is_space(this) then handle_after_attribute_value(self, "") elseif this == "\"" or this == "'" then			self:push_sublayer(handle_quoted_attribute_value)			self.n.quoter = this		elseif this == "" then			return self:fail_route		else			self:push_sublayer(handle_unquoted_attribute_value)			return self:consume		end	end	function handle_quoted_attribute_value(self, this)		if this == ">" then			handle_after_attribute_value(self, concat(self:pop_sublayer))			return self:consume		elseif this == self.n.quoter then			handle_after_attribute_value(self, concat(self:pop_sublayer))		elseif this == "" then			return self:fail_route		else			self:emit(this)		end	end	function handle_unquoted_attribute_value(self, this)		if this == "/" or this == ">" then			handle_after_attribute_value(self, concat(self:pop_sublayer))			return self:consume		elseif is_space(this) then			handle_after_attribute_value(self, concat(self:pop_sublayer))		elseif this == "" then return self:fail_route else self:emit(this) end end function handle_after_attribute_value(self, attr_value) self.n.attributes = self.n.attributes or {} self.n.attributes[self.n.attr_name] = attr_value self.n.attr_name = nil self.n.handler = handle_before_attribute_name end function handle_tag_block(self, this) if (			this == "<" and			self:read(1) == "/" and			lower(self:read(2)) == self.n.name		) then local tag_end = self:get(handle_end, self.advance, 3) if tag_end == self.n.bad_route then self:emit("<") else return self:pop end elseif this == "" then return self:fail_route else self:emit(this) end end function handle_end(self, this) if this == ">" then return self:pop elseif not is_space(this) then return self:fail_route end end function Parser:tag local tag = self:get(handle_start, self.advance) if tag == self.n.bad_route then self:emit("<") else self:emit(Tag:new(tag)) end end end

-- Block handlers.

-- These are blocks which can affect template/argument parsing, since they're also parsed by Parsoid at the same time (even though they aren't processed until later).

-- All blocks (including templates/arguments) can nest inside each other, but an inner block must be closed before the outer block which contains it. This is why, for example, the wikitext "" will result in an unprocessed template, since the inner "" is treated as the opening of a wikilink block, which prevents "}}" from being treated as the closure of the template block. On the other hand, "" will process correctly, since the wikilink block is closed before the template closure. It makes no difference whether the block will be treated as valid or not when it's processed later on, so " }}" would also work, even though " }} " is not a valid wikilink.

-- Note that nesting also affects pipes and equals signs, in addition to block closures.

-- These blocks can be nested to any degree, so "" will not work, since only one of the three wikilink blocks has been closed. On the other hand, "" will work.

-- All blocks are implicitly closed by the end of the text, since their validity is irrelevant at this stage. do -- Headings -- Opens with "\n=" (or "=" at the start of the text), and closes with "\n". Note that it doesn't matter whether the heading will fail to process due to a premature newline (e.g. if there are no closing signs), so at this stage the only thing that matters for closure is the newline. -- Note: if directly inside a template block, a newline followed by a single equals sign is parsed as a parameter equals sign, not the opening of a new L1 heading block. This does not apply to any other heading levels. As such, "\n=}}" will successfully close a template, but "\n==}}" will not, since in the latter case the "}}" would fall inside the new heading block. local function handle_heading_block(self, this) if this == "\n" then self:emit("\n") return self:pop else return self:block_handler(this) end end -- Language conversion block. -- Opens with "-{" and closes with "}-". However, templates/arguments take priority, so "-{{" is parsed as "-" followed by the opening of a template/argument block (depending on what comes after). -- Note: Language conversion blocks aren't actually enabled on the English Wiktionary, but Parsoid still parses them at this stage. local function handle_language_conversion_block(self, this) if this == "}" and self:read(1) == "-" then self:advance self:emit("}") self:emit("-") return self:pop else return self:block_handler(this) end end -- Wikilink block. -- Opens with "" and closes with "". local function handle_wikilink_block(self, this) if this == "]" and self:read(1) == "]" then self:advance self:emit("]") self:emit("]") return self:pop else return self:block_handler(this) end end function Parser:block_handler(this) if this == "-" and self:read(1) == "{" then self:advance self:emit("-") if self:read(1) == "{" then self:template_or_argument else self:emit_tokens(self:get(handle_language_conversion_block)) end elseif this == "=" and (			self:read(-1) == "\n" or			self:read(-1) == ""		) then self:advance self:emit("=") self:emit_tokens(self:get(handle_heading_block)) elseif this == "[" and self:read(1) == "[" then self:advance self:emit("[") self:emit_tokens(self:get(handle_wikilink_block)) else return self:main_handler(this) end end end

function Parser:main_handler(this) if this == "<" then if (			self:read(1) == "!" and			self:read(2) == "-" and			self:read(3) == "-"		 ) then self:advance(4) local this, nxt, nxt2 = self:read, self:read(1), self:read(2) while not (				this == "" or				this == "-" and nxt == "-" and nxt2 == ">"			) do				self:advance this, nxt, nxt2 = nxt, nxt2, self:read(2) end self:advance(2) elseif (		 	self.onlyinclude and		 	self:read(1) == "/" and		 	self:read(2) == "onlyinclude" and		 	self:read(3) == ">"		) then self:advance(4) self:not_onlyinclude else self:tag end elseif this == "{" and self:read(1) == "{" then self:template_or_argument elseif this == "" then return self:pop else self:emit(this) end end

do -- If `transcluded` is true, then the text is checked for onlyinclude tags. If these are found (and are in the correct order), then the start of the page is treated as though it is preceded by a closing onlyinclude tag. -- Note that onlyinclude tags *can* be implicitly closed by the end of the text, but the hard requirement above means that this can only happen if there are multiple onlyinclude blocks. local function do_parse(self, str, transcluded, title) rawset(self, "title", title) if transcluded then rawset(self, "transcluded", true) if match(str, " ") and match(str, " ") then rawset(self, "onlyinclude", true) self:not_onlyinclude self:advance end end end function export.parse(str, transcluded, title) local text, start, n = {}, 1, 0 for loc, char in gmatch(str, "([%s!\"'%-/<=>%[%]{|}])") do			if loc > start then				n = n + 1				text[n] = sub(str, start, loc - 1)			end			n = n + 1			text[n] = char			start = loc + 1		end		if #str >= start then			n = n + 1			text[n] = sub(str, start)		end		return Parser:parse(			text,			Parser.main_handler,			do_parse,			str,			transcluded,			title		)	end end

function export.parseTemplate(text, not_transcluded) text = export.parse(text, not not_transcluded) if text and text.type == "template" then return trim(frame:preprocess(tostring(text[1]))), text:get_params end end

function export.findTemplates(text, not_transcluded) text = export.parse(text, not not_transcluded) text:new_iter("next_node") -- If text is itself a template object, return it on the first iteration. local self_ret, node = text.type == "template" return function if self_ret then self_ret = false return trim(frame:preprocess(tostring(text[1]))), text:get_params, tostring(text), 1 end repeat node = text:iterate until not node or node.type == "template" if node then return trim(frame:preprocess(tostring(node[1]))), node:get_params, tostring(node), node.pos end end end

return export