Module:he-utilities

local export = {}

local m_str_utils = require("Module:string utilities")

local catfix = require("Module:utilities").catfix local codepoint = m_str_utils.codepoint local gsub = m_str_utils.gsub local u = m_str_utils.char

local export = {}

local sc = require("Module:scripts").getByCode("Hebr")

local lang = require("Module:languages").getByCode("he") local m_utilities = require("Module:utilities") local m_links = require("Module:links") local m_headword = require("Module:headword") local rsplit = mw.text.split

-- A wrapper function allowing the contents of this module to be called from -- templates. For example, '' produces -- 'ך', as does ''. function export.main(frame) if type(export[frame.args[1]]) == 'function' then return export[frame.args[1]](frame.args[2]) else return export[frame.args[1]][frame.args[2]] end end

-- A mapping from strings containing letter-names, e.g. "alef", to strings -- containing letters, e.g. "\215\144". (The latter is a UTF-8-encoded string -- containing the single Unicode character U+05D0 HEBREW LETTER ALEF.) export.letters = {} for i, name in ipairs({ 'alef', 'bet', 'gimel', 'dalet', 'hei', 'vav', 'zayen',						'khet', 'tet', 'yud', 'kafSofit', 'kaf', 'lamed',						'memSofit', 'mem', 'nunSofit', 'nun', 'samekh', 'ayin',						'peiSofit', 'pei', 'tsadiSofit', 'tsadi', 'kuf', 'resh',						'shin', 'tav' }) do	export.letters[name] = u(0x05D0 + i - 1) end export.letters.sin = export.letters.shin

-- Same as previous, but for vowels instead of letters. (It also includes a few -- marks and diacritics that aren't quite "vowels", but are in the same sequence -- of Unicode characters.) export.vowels = {} for i, name in ipairs({ 'shva', 'khatafSegol', 'khatafPatakh', 'khatafKamats',						'khirik', 'tseirei', 'segol', 'patakh', 'kamats',						'kholam', 'ignoreMe', 'kubuts', 'dagesh', 'meteg',						'makaf', 'rafe', 'pasek', 'shinDot', 'sinDot',						'sofPasuk', 'upperDot' }) do	export.vowels[name] = u(0x05B0 + i - 1) end export.vowels.ignoreMe = nil export.vowels.mapik = export.vowels.dagesh

-- If letter is kaf, mem, nun, pei, or tsadi, returns kaf sofit, mem sofit, -- etc., respectively; otherwise, just returns letter. function export.otSofit(letter) if letter == export.letters.kaf or letter == export.letters.mem or letter == export.letters.nun or letter == export.letters.pei or letter == export.letters.tsadi then return u(codepoint(letter) - 1) else return letter end end

-- Same as previous, except that if letter is kaf or kaf sofit, also tacks on -- a sh'va (since kaf sofit is written with a sh'va when it has no other vowel). function export.otSofitShva(letter) letter = export.otSofit(letter) if letter == export.letters.kafSofit then return letter .. export.vowels.shva else return letter end end

-- If letter is kaf sofit, mem sofit, etc., returns kaf, mem, etc.; otherwise, -- just returns letter. function export.otLoSofit(letter) if letter == export.letters.kafSofit or letter == export.letters.memSofit or letter == export.letters.nunSofit or letter == export.letters.peiSofit or letter == export.letters.tsadiSofit then return u(codepoint(letter) + 1) else return letter end end

-- If letter is bet, gimel, dalet, kaf, pei, or tav, returns letter plus a -- dagesh; otherwise, just returns letter. function export.dageshKal(letter) if letter == export.letters.bet or letter == export.letters.gimel or letter == export.letters.dalet or letter == export.letters.kaf or letter == export.letters.pei or letter == export.letters.tav then return letter .. export.vowels.dagesh else return letter end end

-- If letter is shin plus a shin or sin dot, returns shin (without the dot); -- otherwise, just returns letter. function export.dotlessShin(letter) if letter == export.letters.shin .. export.letters.shinDot or letter == export.letters.sin .. export.letters.sinDot then return export.letters.shin else return letter end end

local letters   = "[א-ת]" local modifiers = "[ּׁׂׄ]?" local separators = "[-־ %.,!|]?" local regex = "(" .. letters .. modifiers .. ")" .. separators

local medial_radicals = { ["א"] = true, ["ב"] = true, ["ג"] = true, ["ד"] = true, ["ה"] = true, ["הּ"] = false, ["ו"] = true, ["ז"] = true, ["ח"] = true, ["ט"] = true, ["י"] = true, ["כ"] = true, ["ל"] = true, ["מ"] = true, ["נ"] = true, ["ס"] = true, ["ע"] = true, ["פ"] = true, ["צ"] = true, ["ק"] = true, ["ר"] = true, ["שׁ"] = true, ["שׂ"] = true, ["ת"] = true, } local initial_radicals = medial_radicals local final_radicals = { ["א"] = true, ["ב"] = true, ["ג"] = true, ["ד"] = true, ["ה"] = true, ["הּ"] = true, ["ו"] = false, ["ז"] = true, ["ח"] = true, ["ט"] = true, ["י"] = false, ["ך"] = true, ["ל"] = true, ["ם"] = true, ["ן"] = true, ["ס"] = true, ["ע"] = true, ["ף"] = true, ["ץ"] = true, ["ק"] = true, ["ר"] = true, ["שׁ"] = true, ["שׂ"] = true, ["ת"] = true, }

local radical_romanizations = { ["א"] = "ʾ", ["ב"] = "b", ["ג"] = "g", ["ד"] = "d", ["ה"] = "h", ["ו"] = "w", ["ז"] = "z", ["ח"] = "kh", ["ט"] = "ṭ", ["י"] = "y", ["[כך]"] = "k", ["ל"] = "l", ["[מם]"] = "m", ["[נן]"] = "n", ["ס"] = "s", ["ע"] = "ʿ", ["[פף]"] = "p", ["[ץצ]"] = "ts", ["ק"] = "q", ["ר"] = "r", ["שׁ"] = "sh", ["שׂ"] = "s", ["ת"] = "t", }

local function transliterate_root(root_string) local romanized_root = root_string:gsub("־", "-") for k, v in pairs(radical_romanizations) do		romanized_root = gsub(romanized_root, k, v)	end return romanized_root end

function export.plain_root(frame) local radicals = {} local len = 0 local subber = function(radical) table.insert(radicals, radical) len = len + 1 return "" end local scraps = gsub(frame.args[1], regex, subber) if scraps ~= "" then error("Unrecognized characters in root.") end if len < 2 then error("Root must have at least two radicals.") end for i, radical in ipairs(radicals) do		if i == 1 then         -- initial if not initial_radicals[radical] then error("Unrecognized initial radical " .. radical .. ".") end elseif i == len then   -- final if not final_radicals[radical] then error("Unrecognized final radical " .. radical .. ".") end else                   -- medial if not medial_radicals[radical] then error("Unrecognized medial radical " .. radical .. ".") end end end return table.concat(radicals, "־") end

function export.romanized_root(frame) local root = export.plain_root(frame) return transliterate_root(root) end

function export.catfix return catfix(lang, sc) end

local function link(term, alt, tr, id) if word == "" or word == "&mdash;" then return word else return m_links.full_link({			term = term,			alt = alt,			tr = tr,			lang = lang,			id = id,		}, face) end end

local function validateRoot(rootTable, joined_root) if type(rootTable) ~= "table" then error("rootTable is not a table", 2) end

for i, letter in ipairs(rootTable) do		if mw.ustring.len(letter) > 1 then error("'" .. letter .. "', the " .. ordinal[i] ..                     " letter in the root '" .. joined_root ..                      "' should be a single letter.") end end end

function export.root(frame) local output = {} local categories = {} local title = mw.title.getCurrentTitle local fulltitle = title.fullText local namespace = title.nsText

local params = { [1] = {},		["nocat"] = {type = "boolean"}, ["plain"] = {type = "boolean"}, ["notext"] = {type = "boolean"}, ["sense"] = {} }

local args = require("Module:parameters").process(frame:getParent.args, params) local rootLetters = {}

if not args[1] and namespace == "Template" then rootLetters = {"כ", "ת", "ב"} elseif args[1] then rootLetters = rsplit(args[1], "־") else rootLetters = rsplit(fulltitle, "־") end

local joined_root = table.concat(rootLetters, "־") validateRoot(rootLetters, joined_root)

local sense = args["sense"] local sense_formatted = "" if sense ~= nil then sense_formatted = " (" .. sense .. ") " end

if fulltitle == joined_root then table.insert(output, m_headword.full_headword({lang = lang, pos_category = "roots", categories = {}, heads = { fulltitle }, nomultiwordcat = true}) )

if args["nocat"] then return table.concat(output) else return table.concat(output) .. table.concat(categories) end else local link_text

link_text = link(joined_root, joined_root .. sense_formatted, transliterate_root(joined_root), sense)

table.insert(output, link_text)

table.insert(			categories,			m_utilities.format_categories({lang:getCanonicalName .. " terms belonging to the root " .. joined_root .. sense_formatted}, lang)		)

if args["nocat"] then return table.concat(output) elseif args["plain"] then return table.concat(output) else local term_count = mw.site.stats.pagesInCategory(				lang:getCanonicalName .. " terms belonging to the root " .. joined_root .. sense_formatted,				"pages"			) return ' " .. table.concat(categories)		end	end end

return export