Module:User:Theknightwho/wikitext parser/data

local ipairs = ipairs local pairs = pairs

local function merge(...) local new = {} for _, t in ipairs{...} do		for k, v in pairs(t) do			new[k] = v		end end return new end

local function set(list) local set = {} for _, item in ipairs(list) do		set[item] = true end return set end

local d = {}

-- -- Character classes --

d.BIDI = set{"\226\128\142", "\226\128\143", "\226\128\170", "\226\128\171", "\226\128\172", "\226\128\173", "\226\128\174"}

-- Also U+E0000–E0FFF. d.IGNORED_IN_URI = set{"\194\133", "\194\173", "\205\143", "\216\156", "\225\133\159", "\225\133\160", "\225\158\180", "\225\158\181", "\225\160\139", "\225\160\140", "\225\160\141", "\225\160\142", "\226\128\139", "\226\128\140", "\226\128\141", "\226\128\142", "\226\128\143", "\226\128\168", "\226\128\169", "\226\128\170", "\226\128\171", "\226\128\172", "\226\128\173", "\226\128\174", "\226\129\160", "\226\129\161", "\226\129\162", "\226\129\163", "\226\129\164", "\226\129\165", "\226\129\166", "\226\129\167", "\226\129\168", "\226\129\169", "\226\129\170", "\226\129\171", "\226\129\172", "\226\129\173", "\226\129\174", "\226\129\175", "\227\133\164", "\239\184\128", "\239\184\129", "\239\184\130", "\239\184\131", "\239\184\132", "\239\184\133", "\239\184\134", "\239\184\135", "\239\184\136", "\239\184\137", "\239\184\138", "\239\184\139", "\239\184\140", "\239\184\141", "\239\184\142", "\239\184\143", "\239\187\191", "\239\190\160", "\239\191\176", "\239\191\177", "\239\191\178", "\239\191\179", "\239\191\180", "\239\191\181", "\239\191\182", "\239\191\183", "\239\191\184", "\240\155\178\160", "\240\155\178\161", "\240\155\178\162", "\240\155\178\163", "\240\157\133\179", "\240\157\133\180", "\240\157\133\181", "\240\157\133\182", "\240\157\133\183", "\240\157\133\184", "\240\157\133\185", "\240\157\133\186"}

d.NOWIKI = set{"\"", "&", "'", ";", "<", "=", ">", "[", "]", "{", "|", "}", "\194\160"}

d.NOWIKI_START = set{"\t", "\n", "\r", " ", "#", "*", ":"}

d.SPACE_SEPARATOR = set{"\t", " ", "\194\160", "\225\154\128", "\226\128\128", "\226\128\129", "\226\128\130", "\226\128\131", "\226\128\132", "\226\128\133", "\226\128\134", "\226\128\135", "\226\128\136", "\226\128\137", "\226\128\138", "\226\128\175", "\226\129\159", "\227\128\128"}

d.WIKILINK_SPACE = set{" ", "_", "\194\160", "\225\154\128", "\225\160\142", "\226\128\128", "\226\128\129", "\226\128\130", "\226\128\131", "\226\128\132", "\226\128\133", "\226\128\134", "\226\128\135", "\226\128\136", "\226\128\137", "\226\128\138", "\226\128\168", "\226\128\169", "\226\128\175", "\226\129\159", "\227\128\128"}

-- -- Strip markers --

d.STRIP_MARKERS_HEX = set{"categorytree", "ce", "charinsert", "chem", "dynamicpagelist", "gallery", "graph", "hiero", "imagemap", "indicator", "inputbox", "langconvert", "mapframe", "maplink", "math", "nowiki", "poem", "pre", "ref", "references", "score", "section", "source", "syntaxhighlight", "talkpage", "templatedata", "templatestyles", "thread", "timeline"}

d.STRIP_MARKERS_DEC = set{"h", "item"}

-- -- Magic links --

d.MAGIC_LINKS = set{"ISBN", "PMID", "RFC"}

-- -- Magic words --

d.MAGIC_WORDS_CS = set{"DISAMBIG", "EXPECTED_UNCONNECTED_PAGE", "EXPECTUNUSEDCATEGORY", "HIDDENCAT", "INDEX", "NEWSECTIONLINK", "NOGLOBAL", "NOINDEX", "NONEWSECTIONLINK", "STATICREDIRECT"}

d.MAGIC_WORDS_NOT_CS = set{"ARCHIVEDTALK", "FORCETOC", "NOCC", "NOCONTENTCONVERT", "NOEDITSECTION", "NOGALLERY", "NOTALK", "NOTC", "NOTITLECONVERT", "NOTOC", "TOC"}

-- -- External links --

d.EL_SCHEMES_SLASHED = set{"ftp", "ftps", "git", "gopher", "http", "https", "irc", "ircs", "mms", "nntp", "redis", "sftp", "ssh", "svn", "telnet", "worldwind"}

d.EL_SCHEMES_UNSLASHED = set{"bitcoin", "geo", "magnet", "mailto", "matrix", "news", "sip", "sips", "sms", "tel", "urn", "xmpp"}

-- -- HTML tags --

-- Attributes local ATTR_COMMON = set{"about", "aria-describedby", "aria-flowto", "aria-hidden", "aria-label", "aria-labelledby", "aria-level", "aria-owns", "class", "datatype", "dir", "id", "itemid", "itemprop", "itemref", "itemscope", "itemtype", "lang", "property", "resource", "role", "style", "tabindex", "title", "typeof"}

local ATTR_BLOCK = merge(	ATTR_COMMON,	set{"align"} )

local ATTR_CHANGES = merge(	ATTR_COMMON,	set{"cite", "datetime"} )

local ATTR_HR_PRE = merge(	ATTR_COMMON,	set{"width"} )

local ATTR_QUOTE = merge(	ATTR_COMMON,	set{"cite"} )

local ATTR_TABLE_CELL = merge(	ATTR_COMMON,	set{"abbr", "align", "axis", "bgcolor", "colspan", "headers", "height", "nowrap", "rowspan", "scope", "valign", "width"} )

-- Tags d.TAGS = { abbr = ATTR_COMMON, b = ATTR_COMMON, bdi = ATTR_COMMON, bdo = ATTR_COMMON, big = ATTR_COMMON, blockquote = ATTR_QUOTE, br = merge(		ATTR_COMMON,		set{"clear"}	), caption = ATTR_BLOCK, center = ATTR_COMMON, cite = ATTR_COMMON, code = ATTR_COMMON, data = merge(		ATTR_COMMON,		set{"value"}	), dd = ATTR_COMMON, del = ATTR_CHANGES, dfn = ATTR_COMMON, div = ATTR_BLOCK, dl = ATTR_COMMON, dt = ATTR_COMMON, em = ATTR_COMMON, font = merge(		ATTR_COMMON,		set{"color", "face", "size"}	), h1 = ATTR_BLOCK, h2 = ATTR_BLOCK, h3 = ATTR_BLOCK, h4 = ATTR_BLOCK, h5 = ATTR_BLOCK, h6 = ATTR_BLOCK, hr = ATTR_HR_PRE, i = ATTR_COMMON, ins = ATTR_CHANGES, kbd = ATTR_COMMON, li = merge(		ATTR_COMMON,		set{"type", "value"}	), link = set{"href", "itemprop", "title"}, mark = ATTR_COMMON, meta = set{"content", "itemprop"}, ol = merge(		ATTR_COMMON,		set{"reversed", "start", "type"}	), p = ATTR_BLOCK, pre = ATTR_HR_PRE, q = ATTR_QUOTE, rb = ATTR_COMMON, rp = ATTR_COMMON, rt = ATTR_COMMON, rtc = ATTR_COMMON, ruby = ATTR_COMMON, s = ATTR_COMMON, samp = ATTR_COMMON, small = ATTR_COMMON, source = merge(		ATTR_COMMON,		set{"src", "type"}	), span = ATTR_COMMON, strike = ATTR_COMMON, strong = ATTR_COMMON, sub = ATTR_COMMON, sup = ATTR_COMMON, table = merge(		ATTR_COMMON,		set{"align", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "rules", "summary", "width"}	), td = ATTR_TABLE_CELL, th = ATTR_TABLE_CELL, time = merge(		ATTR_COMMON,		set{"datetime"}	), tr = merge(		ATTR_COMMON,		set{"align", "bgcolor", "valign"}	), tt = ATTR_COMMON, u = ATTR_COMMON, ul = merge(		ATTR_COMMON,		set{"type"}	), var = ATTR_COMMON, wbr = ATTR_COMMON }

d.TAGS_SINGLE = set{"br", "dd", "dt", "hr", "li", "link", "meta", "td", "th", "tr", "wbr"}

d.TAGS_SINGLE_ONLY = set{"br", "hr", "link", "meta", "wbr"}

return d