Module:User:Theknightwho/templateparser

local require = require local concat = table.concat local gmatch = string.gmatch local insert = table.insert local lower = string.lower local match = string.match local rawset = rawset local type = type local ulower = string.ulower

local m_parser = require("Module:parser")

local TAGS = { categorytree = true, ce = true, charinsert = true, chem = true, dynamicpagelist = true, gallery = true, graph = true, hiero = true, imagemap = true, indicator = true, inputbox = true, langconvert = true, mapframe = true, maplink = true, math = true, nowiki = true, poem = true, pre = true, ref = true, references = true, score = true, section = true, source = true, syntaxhighlight = true, talkpage = true, templatedata = true, templatestyles = true, thread = true, timeline = true }

local export = {}

-- -- Helper functions --

local function is_space(this) return this == " " or		this == "\t" or		this == "\n" or		this == "\v" or		this == "\f" or		this == "\r" end

local function tonumber_loose(text) if type(text) == "string" then local text_lower = lower(text) return text_lower ~= "inf" and text_lower ~= "-inf" and text_lower ~= "nan" and text_lower ~= "-nan" and tonumber(text) or text end return text end

-- -- Nodes --

local Node = m_parser.Node

local Wikitext = m_parser.Wikitext

local Tag = Node:new("tag")

function Tag:__tostring local open_tag = {"<", self.name} if self.ignored then return "" elseif self.attributes then for attr, value in pairs(self.attributes) do insert(open_tag, " " .. attr .. "=\"" .. value .. "\"") end end if self.self_closing then insert(open_tag, "/>") return concat(open_tag) end insert(open_tag, ">") return concat(open_tag) .. concat(self) .. "" end

local Argument = Node:new("argument")

function Argument:__tostring if self[2] then local output, i = {"}")		return concat(output)	elseif self[1] then		return ""	else		return "argument"	end end

function Argument:next self.i = self.i + 1 if self.i <= 2 then return self[self.i]	end end

local Parameter = Node:new("parameter")

function Parameter:__tostring if self.key then return tostring(self.key) .. "=" .. Node.__tostring(self) end return Node.__tostring(self) end

local Template = Node:new("template")

function Template:__tostring if self[2] then local output, n = {"{{", tostring(self[1])}, 2 if self.colon then insert(output, ":") insert(output, tostring(self[3])) n = 3 end for i = n, #self do			insert(output, "|") insert(output, tostring(self[i])) end insert(output, "}}") return concat(output) elseif self[1] then return "" else return "template" end end

function Template:get_params(args) local params, implicit, key, value, n = {}, 0 for i = 2, #self do		if self[i].key then key = tostring(self[i].key) if type(key) == "string" then key = tonumber_loose(key) end value = tostring(self[i]) else implicit = implicit + 1 key = implicit value = tostring(self[i]) end params[key] = value end return params end

-- -- Parser --

local Parser = m_parser.Parser

-- Argument. do local function handle_argument(self, this) if this == "|" then self:emit(Wikitext:new(self:pop_sublayer)) self:push_sublayer elseif this == "}" and self:read(1) == "}" then if self:read(2) == "}" then self:emit(Wikitext:new(self:pop_sublayer)) self:advance(2) return self:pop end return self:fail_route elseif this == "" then return self:fail_route else return self:block_handler(this) end end

function Parser:argument local argument = self:get(handle_argument, self.push_sublayer) if argument == self.bad_route then self:template else if #self:layer == self.emit_pos then local inner = self:remove if type(argument[1]) == "table" then insert(argument[1], 1, inner) else argument[1] = Wikitext:new{inner, argument[1]} end end self.braces = self.braces - 3 self.brace_head = self.brace_head - 3 argument.pos = self.brace_head self:emit(Argument:new(argument)) end end end

-- Template. do local handle_name local handle_parameter function handle_name(self, this) if this == "|" then self:emit(Wikitext:new(self:pop_sublayer)) self.handler = handle_parameter self:push_sublayer elseif this == "}" and self:read(1) == "}" then self:emit(Wikitext:new(self:pop_sublayer)) self:advance return self:pop elseif this == "" then return self:fail_route else return self:block_handler(this) end end function handle_parameter(self, this) if this == "=" and not self.key and (			self:read(1) ~= "=" or			self:read(-1) ~= "\n" and self:read(-1) ~= ""		) then local key = self:pop_sublayer self:push_sublayer rawset(self:layer, "key", Wikitext:new(key)) elseif this == "|" then self:emit(Parameter:new(self:pop_sublayer)) self:push_sublayer elseif this == "}" and self:read(1) == "}" then self:emit(Parameter:new(self:pop_sublayer)) self:advance return self:pop elseif this == "" then return self:fail_route else return self:block_handler(this) end end function Parser:template local template = self:get(handle_name, self.push_sublayer) if template == self.bad_route then self:advance(-1) for _ = 1, self.braces do				self:emit(self.emit_pos, "{") end self.braces = 0 else if #self:layer == self.emit_pos then local inner = self:remove if type(template[1]) == "table" then insert(template[1], 1, inner) else template[1] = Wikitext:new{inner, template[1]} end end template.title = self.title self.braces = self.braces - 2 self.brace_head = self.brace_head - 2 template.pos = self.brace_head self:emit(Template:new(template)) end end function Parser:template_or_argument self:advance(2) self.braces = 2 while self:read == "{" do			self:advance self.braces = self.braces + 1 end self.emit_pos = #self:layer + 1 self.brace_head = self.raw_head repeat if self.braces == 1 then self:emit(self.emit_pos, "{") break elseif self.braces == 2 then self:template else self:argument end self:advance until self.braces == 0 self:advance(-1) end end

-- Text not in. function Parser:not_onlyinclude local this, nxt, nxt2 = self:read(0, 1, 2) while not (		this == "" or		this == "<" and nxt == "onlyinclude" and nxt2 == ">"	) do		self:advance this, nxt, nxt2 = nxt, nxt2, self:read(2) end self:advance(2) end

-- Tag. do local function is_ignored_tag(self, check) return self.transcluded and check == "includeonly" or			not self.transcluded and (				check == "noinclude" or				check == "onlyinclude"			) end -- Handlers. local handle_start local handle_ignored_tag_start local handle_ignored_tag local handle_after_tag_name local handle_before_attribute_name local handle_attribute_name local handle_before_attribute_value local handle_quoted_attribute_value local handle_unquoted_attribute_value local handle_after_attribute_value local handle_tag_block local handle_end function handle_start(self, this) if this == "/" then local check = lower(self:read(1)) if is_ignored_tag(self, check) then self.name = check self.ignored = true self:advance self.handler = handle_ignored_tag_start return end return self:fail_route end local check = lower(this) if is_ignored_tag(self, check) then self.name = check self.ignored = true self.handler = handle_ignored_tag_start elseif (			check == "noinclude" and self.transcluded or			check == "includeonly" and not self.transcluded		) then self.name = check self.ignored = true self.handler = handle_after_tag_name elseif TAGS[check] then self.name = check self.handler = handle_after_tag_name else return self:fail_route end end function handle_ignored_tag_start(self, this) if this == ">" then return self:pop elseif this == "/" and self:read(1) == ">" then self.self_closing = true self:advance return self:pop elseif is_space(this) then self.handler = handle_ignored_tag else return self:fail_route end end function handle_ignored_tag(self, this) if this == ">" then return self:pop elseif this == "" then return self:fail_route end end function handle_after_tag_name(self, this) if this == "/" and self:read(1) == ">" then self.self_closing = true self:advance return self:pop elseif this == ">" then self.handler = handle_tag_block elseif is_space(this) then self.handler = handle_before_attribute_name else return self:fail_route end end function handle_before_attribute_name(self, this) if this == "/" and self:read(1) == ">" then self.self_closing = true self:advance return self:pop elseif this == ">" then self.handler = handle_tag_block elseif this ~= "/" and not is_space(this) then self:push_sublayer(handle_attribute_name) return self:consume elseif this == "" then return self:fail_route end end function handle_attribute_name(self, this) if this == "/" or this == ">" or is_space(this) then self:pop_sublayer return self:consume elseif this == "=" then self.attr_name = ulower(concat(self:pop_sublayer)) self.handler = handle_before_attribute_value elseif this == "" then return self:fail_route else self:emit(this) end end function handle_before_attribute_value(self, this) if this == "/" or this == ">" then handle_after_attribute_value(self, "") return self:consume elseif is_space(this) then handle_after_attribute_value(self, "") elseif this == "\"" or this == "'" then			self:push_sublayer(handle_quoted_attribute_value)			rawset(self:layer, "quoter", this)		elseif this == "" then			return self:fail_route		else			self:push_sublayer(handle_unquoted_attribute_value)			return self:consume		end	end	function handle_quoted_attribute_value(self, this)		if this == ">" then			handle_after_attribute_value(self, concat(self:pop_sublayer))			return self:consume		elseif this == self.quoter then			handle_after_attribute_value(self, concat(self:pop_sublayer))		elseif this == "" then			return self:fail_route		else			self:emit(this)		end	end	function handle_unquoted_attribute_value(self, this)		if this == "/" or this == ">" then			handle_after_attribute_value(self, concat(self:pop_sublayer))			return self:consume		elseif is_space(this) then			handle_after_attribute_value(self, concat(self:pop_sublayer))		elseif this == "" then return self:fail_route else self:emit(this) end end function handle_after_attribute_value(self, attr_value) self.attributes = self.attributes or {} self.attributes[self.attr_name] = attr_value self.attr_name = nil self.handler = handle_before_attribute_name end function handle_tag_block(self, this) if (			this == "<" and			self:read(1) == "/" and			lower(self:read(2)) == self.name		) then local tag_end = self:get(handle_end, self.advance, 3) if tag_end == self.bad_route then self:emit("<") else return self:pop end elseif this == "" then return self:fail_route else self:emit(this) end end function handle_end(self, this) if this == ">" then return self:pop elseif not is_space(this) then return self:fail_route end end function Parser:tag local tag = self:get(handle_start, self.advance) if tag == self.bad_route then self:emit("<") else self:emit(Tag:new(tag)) end end end

-- Block handlers. do local function handle_heading_block(self, this) if this == "\n" then self:emit("\n") return self:pop else return self:block_handler(this) end end local function handle_language_conversion_block(self, this) if this == "}" and self:read(1) == "-" then self:advance self:emit("}", "-") return self:pop else return self:block_handler(this) end end local function handle_wikilink_block(self, this) if this == "]" and self:read(1) == "]" then self:advance self:emit("]", "]") return self:pop else return self:block_handler(this) end end function Parser:block_handler(this) if this == "-" and self:read(1) == "{" then self:advance self:emit("-") if self:read(1) == "{" then self:template_or_argument else self:emit_tokens(self:get(handle_language_conversion_block)) end elseif this == "=" and (			self:read(-1) == "\n" or			self:read(-1) == ""		) then self:advance self:emit("=") self:emit_tokens(self:get(handle_heading_block)) elseif this == "[" and self:read(1) == "[" then self:advance self:emit("[") self:emit_tokens(self:get(handle_wikilink_block)) else return self:main_handler(this) end end end

function Parser:main_handler(this) if this == "<" then if (			self:read(1) == "!" and			self:read(2) == "-" and			self:read(3) == "-"		 ) then self:advance(4) local this, nxt, nxt2 = self:read(0, 1, 2) while not (				this == "" or				this == "-" and nxt == "-" and nxt2 == ">"			) do				self:advance this, nxt, nxt2 = nxt, nxt2, self:read(2) end self:advance(2) elseif (		 	self.onlyinclude and		 	self:read(1) == "/" and		 	self:read(2) == "onlyinclude" and		 	self:read(3) == ">"		) then self:advance(4) self:not_onlyinclude else self:tag end elseif this == "{" and self:read(1) == "{" then self:template_or_argument elseif this == "" then return self:pop else self:emit(this) end end

do local function do_parse(self, str, title, transcluded) rawset(self, "title", title) if transcluded then rawset(self, "transcluded", true) if match(str, " ") and match(str, " ") then rawset(self, "onlyinclude", true) self:not_onlyinclude self:advance end end end function export.parse(str, title, transcluded) local text = {} for chunk, char in gmatch(str, "([^%s!\"'%-/<=>%[%]{|}]*)(.?)") do			if #chunk > 0 then				insert(text, chunk)			end			if #char > 0 then				insert(text, char)			end		end		local tokens = Parser:parse(			text,			Parser.main_handler,			do_parse,			str,			title,			transcluded		)		return tokens	end end

function export.parseTemplate(text) text = export.parse(text) if text and text.type == "template" then return tostring(text[1]), text:get_params end end

function export.findTemplates(text) text = export.parse(text) text.i = 0 text._next = text.next local _, node return function repeat _, node = text:iterate until not node or (type(node) == "table" and node.type == "template") if node then return tostring(node[1]), node:get_params, tostring(node), node.pos end end end

return export