Module:User:Malku H₂n̥rés/he-filter

local export = {} MH = require("Module:User:Sartma/he-translit").MH_tr --TO CHANGE WHEN MOVED lang = require("Module:languages").getByCode("he") local N = mw.text.nowiki

local s = mw.ustring.gsub local l = mw.ustring.lower local M = mw.ustring.match

local U = mw.ustring.char local bcc = "%s%z" .. U(0x061C) .. U(0x200E) .. U(0x200F) .. U(0x202A) .. "-" .. U(0x202E) .. U(0x2066) .. "-" .. U(0x2069) .. "%-" local word_end = "%f[" .. bcc .. "]" local word_start = "%f[^" .. bcc .. "]"

local a = "[áéíóú]" local V = "[aeiouáéíóú]"

local c = { --for what may appear on wiki {"(.)%1", "%1"},	{"[ḇw]", "v"}, {"ḡ", "g"}, {"ḏ", "d"}, {"[ḵẖḥħx]", "K"}, {"p̄", "f"}, {"[ṯṭ]", "t"}, {"[kc]h", "K"}, {"č", "C"}, {"ž", "Z"}, {"zh", "Z"}, {"š", "S"}, {"d[Zj]", "j"}, {"ṣ", "T"}, {"t[sz]", "T"}, {"[qḳ]", "k"}, {"ś", "s"}, {"sh", "S"}, {"‘׳", "'"},	{"“״", "″"},	{"['‘ʾʿʼʻʔʕˀˤə-]", ""}, --glottal stops, schwa, hyphen, 3ayin, hyphen {"ā́", "á"}, {"ḗ", "é"}, {"ī́", "í"}, {"ṓ", "ó"}, {"ū́", "ú"}, {"[āăâ]", "a"}, {"[ēĕê]", "e"}, {"[īĭî]", "i"}, {"[ōŏô]", "o"}, {"[ūŭû]", "u"}, {"(" .. V .. ")i", "%1y"}, {"(" .. V .. ")h(" .. word_end .. ")", "%1%2"} }

local r = { --for generated MH tr {"(" .. V .. ")h(" .. word_end .. ")", "%1%2"}, {"['-]", ""}, --glottal stop & hyphen {"(" .. V .. ")h(" .. word_end .. ")", "%1%2"}, {"([zsck])'h", "%1h"}, {"[kc]h", "K"}, {"zh", "Z"}, {"sh", "S"}, {"ts", "T"} }

local o = { --removing stress ["á"] = "a", ["é"] = "e", ["í"] = "i", ["ó"] = "o", ["ú"] = "u" }

function export.convert(g, m)	m = l(m) g = l(MH(g)) for z = 1, #c do		m = s(m, c[z][1], c[z][2]) end for z = 1, #r do		g = s(g, r[z][1], r[z][2]) end

--removing acute accents... --if (not M(m, a)) and M(g, a) then --...on the generated tr, if there's none on the manual			g = s(g, ".", o)	end-- if (not M(g, a)) and M(m, a) then --...on the manual one if monosyllabic m = s(m, ".", o)	end return g, m end

function export.filter(g, m) --For a POS section: g = headword spelling, m = manual romanization local g, m = export.convert(g, m)	if m == g then return true else return false end end

function export.print_filter(frame) local data_page_title = frame.args[1] local max_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 1000000 local content = mw.title.new(data_page_title):getContent local output = require "Module:array" local data = assert(content:match(" %s*(.-)%s* "), "expected pre tag in data page") local i = 0

for line in data:gmatch "[^\n]+" do		local headword, translit = line:match "^([^\t]+)\t([^\t]+)$" if not headword then error("Following line did not match pattern:\n" .. line) end output:insert(("# %s %s | %s: %s"):format(N(headword), N(translit), MH(headword), tostring(export.filter(headword, translit)))) i = i + 1 if i >= max_lines then break end end return output:concat("\n") end

function export.spelling(frame) local data_page_title = frame.args[1] local max_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 1000000 local content = mw.title.new(data_page_title):getContent local output = require "Module:array" local data = assert(content:match(" %s*(.-)%s* "), "expected pre tag in data page") local i = 0

for line in data:gmatch "[^\n]+" do		local headword, others = line:match "^([^\t]+)\t([^\t]+)$" if not headword then error("Following line did not match pattern:\n" .. line) end headword = N(headword) entry = lang:makeEntryName(headword) output:insert(("# %s edit %s"):format(entry, headword, entry, N(others)))

i = i + 1 if i >= max_lines then break end end return output:concat("\n") end

function export.dunno(frame) local data_page_title = frame.args[1] local max_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 1000000 local content = mw.title.new(data_page_title):getContent local output = require "Module:array" local data = assert(content:match(" %s*(.-)%s* "), "expected pre tag in data page") local i = 0

for line in data:gmatch "[^\n]+" do		local headword = line:match "^([^\t]+)\t$" if not headword then error("Following line did not match pattern:\n" .. line) end headword = N(headword) entry = lang:makeEntryName(headword) output:insert(("# %s edit %s"):format(entry, headword, entry, MH(headword)))

i = i + 1 if i >= max_lines then break end end return output:concat("\n") end

function export.draft(frame) local data_page_title = frame.args[1] local min_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 0 local max_lines = frame.args[3] and assert(tonumber(frame.args[3])) or 1500 local content = mw.title.new(data_page_title):getContent local data = assert(content:match(" %s*(.-)%s* "), "expected pre tag in data page") local i = 0

local t, T, F = "", {"Correct"}, {"To check", {"Unpointed"}, {"Several spellings"}, {"Several romanizations"}, {"Lacking gershayim"}, {"Bad cute accent"}, {"Hyphen"}, {"Multiword"}, {"Lacking stress"}, {"Stress position"}, {"Schwa"}, {"Kamats (a/o)"}, {"Other"}} --to add: lacking pointing, lacking translit

for line in data:gmatch "[^\n]+" do		i = i + 1 if min_lines <= i then if i >= max_lines then i = i - 1 break else local headword, translit = line:match "^([^\t]+)\t([^\t]+)$" if not headword then error("Following line did not match pattern:\n" .. line) else local g, m = export.convert(headword, translit) headword = N(headword) entry = lang:makeEntryName(headword) t = ("# %s edit %s • %s"):format(entry, headword, entry, N(translit), MH(headword)) --t = ("# %s %s • %s (%s | %s)"):format(N(headword), N(translit), MH(headword), m, g)					if g == m then table.insert(T, t)					elseif not M(headword, "[ְֱֲֳִֵֶַָׇֹֻ״־]") then --unpointed table.insert(F[2], t)					elseif M(g, "[\\/,]") then --several spellings table.insert(F[3], t)					elseif M(m, "[\\/,]") then --several romanizations table.insert(F[4], t)					elseif s(g, "″", "") == m then --lacking gershayim table.insert(F[5], t)					elseif M(m, "&[aeiou]acute;") then --bad acute accent table.insert(F[6], t)					elseif s(m, "[- ]", "") == s(g, "[- ]", "") then --hyphen table.insert(F[7], t)					elseif M(g, " ") then --multiword table.insert(F[8], t)					elseif not M(m, a) then --lacking stress table.insert(F[9], t)					elseif s(m, ".", o) == s(g, ".", o) then --stress position table.insert(F[10], t)					elseif s(m, "e", "") == s(g, "e", "") then --schwa table.insert(F[11], t)					elseif s(s(m, "[áó]", "X"), "[ao]", "x") == s(s(g, "[áó]", "X"), "[ao]", "x") then --kamats (a/o) table.insert(F[12], t)					else table.insert(F[13], t)					end end end end end i = i + 1 length = i - min_lines --true number, with i = where it stopped T[1] = "==" .. T[1] .. " (" .. (#T - 1) .. "/" .. length .. ")==" F[1] = "==" .. F[1] .. " (" .. (length - #T + 1) .. "/" .. length .. ")==" for z = 2, #F do F[z][1] = "===" .. F[z][1] .. " (" .. (#F[z] - 1) .. "/" .. length .. ")===" F[z] = table.concat(F[z], "\n") end return "Interval: [" .. min_lines .. ", " .. i .. "[\n" .. table.concat(F, "\n\n") .. "\n" .. table.concat(T, "\n") end

return export