Module:User:Erutuon/07

local export = {}

local word_limit = 2000

local m_fun = require "Module:fun" local m_table = require "Module:table"

local decompose = mw.ustring.toNFD

local U = mw.ustring.char local acute = U(0x301) local grave = U(0x300) local circumflex = U(0x342)

-- matches U+0300-U+037F local diacritic = "[\204-\205][\128-\191]" local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"

local semicolon = "·"

local function match_to_array(str, patt, filter_func, process_func) local array = {} local i = 0 for match in str:gmatch(patt) do		if filter_func(match) then i = i + 1 array[i] = process_func(match) ---			if i == word_limit then				break			end			-- end end return array end

local replacements = { [grave] = acute, ["["] = "",	["]"] = "",	["'"] = "’",	["\""] = "",	[","] = "",	[semicolon] = "",	["."] = "",	["«"] = "",	["»"] = "",	[";"] = "", }

local process_word = m_fun.memoize(function (word)	local found_accent = false	return decompose(word)		-- Remove all but first accent in word.		-- Use replacements table.		:gsub( UTF8_char, function (char) if char == acute or char == grave or char == circumflex then if found_accent then return "" end found_accent = true if char == grave then return acute else return -- no change end end return replacements[char] end) end)

-- No macrons or breves in Odyssey text. local function make_entry_name(word) return word:gsub("’", "'") end

local function link(text) return ' ' .. text .. ' ' end

local function count(array) local count_map = {} for _, item in ipairs(array) do		count_map[item] = (count_map[item] or 0) + 1 end return count_map end

local function process_count(count, word) return "* " .. link(word) .. " (" .. count .. ")" end

local ugsub = mw.ustring.gsub local ulower = mw.ustring.lower local remove_diacritics = m_fun.memoize(function (word)	return ulower(word):gsub(diacritic, "") end)

local function count_comp_gen(count) return function(word1, word2) local count1, count2 = count[word1], count[word2] if count1 == count2 then return remove_diacritics(word1) < remove_diacritics(word2) else return count1 > count2 end end end

function export.show(frame) local content = mw.title.new("Module:User:Erutuon/07/documentation"):getContent local Odyssey1 = content:match"<!%-%-(.-)%-%->" local count = count(match_to_array(Odyssey1, "%S+", function(word) return word:find "[\128-\255]" end, process_word)) return table.concat(m_fun.mapIter(process_count, m_table.sortedPairs(count, count_comp_gen(count))), "\n") end

return export