Module:User:Erutuon/03

local export = {}

local Array = require "Module:array" local abbreviation_headers = Array("Abbreviation", "Abbreviations", "Acronym", "Initialism"):to_set

function export.script_tag_list(frame) local list = mw.text.split(mw.text.trim(frame.args[1]), "\n") return Array(list) :map(function (title) return require 'Module:script tag link'.tag_link(title) .. " (e)" end) :concat ", " end

function export.print_possibly_incorrect_header_table(header_and_titles) local function HTML_escape_helper(char) return '&#' .. char:byte .. ';'	end local function HTML_escape(chars) return (chars:gsub('.', HTML_escape_helper)) end local function header_escape(header) return (header:gsub('[%[%]{}\'|<>]', HTML_escape)	       :gsub('  +', function (spaces) return ('&#x20;'):rep(#spaces) end)) end -- An approximation of the Lua 5.2 and Lua 5.3 string.rep, which allows -- you to specify a string to separate the copies of the string. local function string_rep_5_3(str, n, sep) local vals = Array for i = 1, n do			vals:insert(str) end return vals:concat(sep) end local U = mw.ustring.char local Arabic_letter = "[" .. U(0x621) .. "-" .. U(0x64A) .. "]"	local Arabic_root3 = "^" .. string_rep_5_3(Arabic_letter, 3, " ") .. "$"	local Arabic_root4 = "^" .. string_rep_5_3(Arabic_letter, 4, " ") .. "$"	local Hebrew_letter = "[" .. U(0x5D0) .. "-" .. U(0x5EA) .. "]"	local Hebrew_root3 = "^" .. string_rep_5_3(Hebrew_letter, 3, "־") .. "$"	local Hebrew_root4 = "^" .. string_rep_5_3(Hebrew_letter, 4, "־") .. "$"	local utf8_find = mw.ustring.find local output = Array output:insert(require "Module:TemplateStyles"		"Template:User:Erutuon/possibly incorrect headers.css") output:insert '\n \n' for _, record in ipairs(header_and_titles) do		local header, titles = record.header, record.titles titles = Array(titles) :filter(				function (title)					return not ( title:find("/derived terms$") or utf8_find(title, Arabic_root3) or utf8_find(title, Arabic_root4) or utf8_find(title, Hebrew_root3) or utf8_find(title, Hebrew_root4))				end) if #titles > 0 then output:insert("==" .. header_escape(header) .. "==\n") output:insert(				"\n\n") end end output:insert " " return output:concat end

function export.show(frame) local header_and_titles = Array local pagename = "User:Erutuon/mainspace headers/possibly incorrect/data" local incorrect_headers = frame.args[1] or mw.title.new(pagename):getContent incorrect_headers = mw.text.trim(mw.text.unstripNoWiki(incorrect_headers)) for line in incorrect_headers:gmatch "[^\n]+" do		local header, titles = line:match "^([^\t]+)\t(.+)$" if not header then error(("Line '%s' does not match pattern"):format(line)) end local title_list = Array header_and_titles:insert{ header = header, titles = title_list } for title in titles:gmatch '[^\t]+' do			title_list:insert(title) end end return export.print_possibly_incorrect_header_table(header_and_titles) end

function export.show_from_json(frame) local pagename = "User:Erutuon/mainspace headers/possibly incorrect/json" local incorrect_headers = frame.args[1] or mw.title.new(pagename):getContent local header_and_titles = mw.text.jsonDecode(incorrect_headers) local include_abbreviations = require "Module:yesno"(frame.args.abbreviation) local include_numbered_pos = require "Module:yesno"(frame.args.numbered_pos) local poses = { "Adjective", "Adverb", "Determiner", "Kanji", "Noun", "Participle", "Particle", "Prefix", "Pronoun", "Proper noun", "Root", "Suffix", "Verb" }	local function is_numbered_pos(header) local before_number = header:match("^(.-)%s*%d+$") if before_number then return require "Module:table".contains(poses, before_number) end end local new_header_and_titles = {} for _, value in ipairs(header_and_titles) do		if abbreviation_headers[value.header] == include_abbreviations and is_numbered_pos(value.header) == include_numbered_pos then table.insert(new_header_and_titles, value) end end header_and_titles = new_header_and_titles return export.print_possibly_incorrect_header_table(header_and_titles) end

local minimum_header_level local function get_header_levels(header_data) local levels_found = {} for _, record in ipairs(header_data) do		for level, count in ipairs(record.counts) do			if count > 0 then levels_found[level + minimum_header_level - 1] = true end end end return Array.keys(levels_found) end

local function get_date_and_minimum_header_level(date) date = date or error("Provide date") local month, month_day = date:match "^20[12]%d%-(%d%d)%-(%d%d)$" if not month then error("Invalid date or date format") end month, month_day = tonumber(month), tonumber(month_day) if not (1 <= month and month <= 12) then error("Invalid month") elseif not (month_day == 1 or month_day == 20) then error("Dumps come out on 1st or 20th day of month") end if date < "2019-08-20" then minimum_header_level = 2 else minimum_header_level = 1 end return date end

-- id attribute can't contain ASCII whitespace -- https://html.spec.whatwg.org/multipage/dom.html#global-attributes local function escape_id_attribute(value) return (value:gsub('["& \t]', {			['"'] = "&quot;", ["&"] = "&amp;", [" "] = "_", ["\t"] = "_", })) end

function export.non_language_header_table(frame) local date = get_date_and_minimum_header_level(frame.args[1]) local data_page = "User:Erutuon/mainspace headers/data" local page = mw.title.new(data_page) local content = page:getContent local json = assert(			content:match("^\n* \n*(.-)\n*  \n*$"),			"pattern failed to match content") local data assert(pcall(function data = mw.text.jsonDecode(json) end), "bad JSON in " .. data_page) data = Array(data) local language_names = require "Module:languages/canonical names" data = data:filter(		function (record)			return not language_names[record.header]		end) local levels_found = get_header_levels(data) local header = ( class="wikitable sortable" ! rowspan="2" | header !! colspan="%d" | count by header level ):format(date, #levels_found + 1)
 * + Non-language headers in the mainspace from the %s dump

local output = Array output:insert(header) output:insert("! " .. levels_found:concat(" !! ") .. " !! total\n") for _, record in ipairs(data) do output:insert('|- id="' .. escape_id_attribute(record.header) .. '"\n|') output:insert(mw.text.nowiki(record.header)) local total = 0 for _, level in ipairs(levels_found) do			local count = record.counts[level - minimum_header_level + 1] total = total + count output:insert " || " output:insert(count) end output:insert " || " output:insert(total) output:insert "\n" end output:insert "|}\n" return output:concat end

function export.language_header_table(frame) local date = get_date_and_minimum_header_level(frame.args[1]) local data_page = "User:Erutuon/mainspace headers/data" local page = mw.title.new(data_page) local content = page:getContent local json = assert(			content:match("^\n* \n*(.-)\n*  \n*$"),			"pattern failed to match content") local data assert(pcall(function data = mw.text.jsonDecode(json) end), "bad JSON in " .. data_page) data = Array(data) local language_names = require "Module:languages/canonical names" data = data:filter(		function (record)			return language_names[record.header]		end) local levels_found = get_header_levels(data) local header = ([[

{| class="wikitable sortable" ! rowspan="2" | name !! rowspan="2" | code !! colspan="%d" | count by header level ]]):format(date, #levels_found == 1 and 1 or #levels_found + 1)
 * + Language headers in the mainspace from the %s dump

local output = Array output:insert(require "Module:TemplateStyles" "Template:User:Erutuon/language headers.css") output:insert(header) output:insert("! " .. levels_found:concat(" !! ")) if #levels_found > 1 then output:insert(" !! total") end output:insert "\n" for _, record in ipairs(data) do		output:insert '|- id="'		output:insert(escape_id_attribute(record.header))		output:insert '"\n|' output:insert(mw.text.nowiki(record.header)) output:insert " || " output:insert(mw.text.nowiki(language_names[record.header])) local total = 0 for _, level in ipairs(levels_found) do			local count = record.counts[level - minimum_header_level + 1] if not count then mw.logObject(record, "record") end total = total + count output:insert ' || class="count" | ' output:insert(count) end if #levels_found > 1 then output:insert ' || class="count" | ' output:insert(total) end output:insert "\n" end output:insert "|}\n" return output:concat end

return export