Module:he-translit

local export = {}

--Contributors: Malku H₂n̥rés, Sartma, Erutuon, Metaknowledge

local m_str_utils = require("Module:string utilities")

local gcodepoint = m_str_utils.gcodepoint local match = m_str_utils.match local s = m_str_utils.gsub local U = m_str_utils.char

local bidirectional_control_characters = U(0x061C) .. U(0x200E) .. U(0x200F) .. U(0x202A) .. "-" .. U(0x202E) .. U(0x2066) .. "-" .. U(0x2069) local word_end = "%f[%s%z" .. bidirectional_control_characters .. "%-]" local word_start = "%f[^%s%z" .. bidirectional_control_characters .. "%-]" -- Bidirectional control characters should be avoided as much as possible, -- but they are easily picked up when copying and pasting, so the module needs -- to account for them. -- This list is from Bidirectional control character.

local V = "[aɔɛeiăəou‌āēīōūêôáéíóúḗṓếố][̂̄̆]?́?" local C = "[ʔḇḡḏhwzḥṭylsʕqrśšṯ'ḵmnfṣbdgptkjc″vḫẓġTZCDK]"

local c = { --direct translit --full char ie. C   ["א"] = "ʔ", ["ב"] = "ḇ", ["ג"] = "ḡ", ["ד"] = "ḏ", ["ה"] = "h", ["ו"] = "w", ["ז"] = "z", ["ח"] = "ḥ", ["ט"] = "ṭ", ["י"] = "y", ["ל"] = "l", ["ס"] = "s", ["ע"] = "ʕ", ["ק"] = "q", ["ר"] = "r", ["ש"] = "ß", ["ת"] = "ṯ", --miscellaneous: ["׳"] = "'", --geresh ["־"] = "-", --hyphen ["׃"] = " .", --dot ["ׂ"] = "ˊ", --sin dot ["ׁ"] = "ˇ", --shin dot ["ּ"] = "·", --dagesh ["֫"] = "^", --oleh ["ֽ"] = "+", --meteg --niqqud ie. V	["ַ"] = "a", ["ָ"] = "ɔ", ["ֶ"] = "ɛ", ["ֵ"] = "e", ["ִ"] = "i", ["ֳ"] = "ɔ̆", ["ֲ"] = "ă", ["ֱ"] = "ɛ̆", ["ְ"] = "ə", ["ֹ"] = "o", ["ֻ"] = "u", ["ׇ"] = "ɔ", }

local b = { --BH --when different final form {"[כך]", "ḵ"}, {"[מם]", "m"}, {"[נן]", "n"}, {"[פף]", "f"}, {"[צץ]", "ṣ"},

{"(" .. V .. ")(·?)(+?)(^?)([ˊˇ]?'?)", "%5%2%1%4%3"}, --order: s(h)in dot, geresh, dagesh, vowel (niqqud), oleh, meteg --bgdkft: fricative + dagesh > stop {"ḇ·", "b"}, {"ḡ·", "g"}, {"ḏ·", "d"}, {"ṯ·", "t"}, {"ḵ·", "k"}, {"f·", "p"}, --s(h)in dot {"ß(·?)ˇ", "š%1"}, {"ß(·?)ˊ", "ś%1"}, --vowel lengthenings {"i([+^]?)y", "ī%1"}, --V > long / _{jw}{no V no dagesh} {"ī([+^]?" .. V .. ")", "iy%1"}, {"ī·", "iy·"}, {"e([+^]?)y", "ē%1"}, {"ē([+^]?" .. V .. ")", "ey%1"}, {"ɛ([+^]?)y", "E%1"}, --see E > ɛ̄ below {"E([+^]?" .. V .. ")", "ɛy%1"}, {"(" .. C .. "·?)wo", "%1ō"}, {"(" .. V .. "[+^]?)w·", "%1U"}, {"w·", "ū"}, {"U", "w·"}, {"(" .. C .. "·?)y·", "%1ī"}, --h > circumflex / V_{no V no dagesh} {"(" .. V .. "[+^]?)h", "%1H"}, {"H(" .. V .. ")", "h%1"}, {"H·", "h"}, {"e([+^]?)H", "ê%1"}, {"o([+^]?)H", "ô%1"}, {"ɛ([+^]?)H", "ɛ̂%1"}, {"ɔ([+^]?)H", "ɔ̂%1"}, {"a([+^]?)H", "â%1"},

{"(" .. V .. "[+^]?%s?)(.)·(%s?" .. V .. ")", "%1%2%2%3"}, --dagesh gemination {"[·ß]", ""}, --deletion of unpointed s(h)ins and useless dageshim --schwa: Ə means "kept" {"ə" .. word_end, ""}, {"ə([ḇḡḏḵfṯ])", "Ə%1"}, {"([+‌āēīōūoE])(" .. C .. ")ə", "%1%2Ə"}, {"E", "ɛ̄"}, --see >E above {"(" .. C .. "ə?" .. C .. ")ə", "%1Ə"}, {"(" .. C .. ")Ə(" .. C .. ")([Əə])", "%1ə%2Ə"}, {word_start .. "([ūw]?a?" .. C .. ")ə", "%1Ə"}, {"ə", ""}, {"Ə", "ə"},

{"([ʕhḥ])a(" .. word_end .. ")", "^a%1%2"}, --final /a/-guttural inversion --penultimate stress: segolates & -áyiC {"(" .. C .. "[eɛo])(%+?".. C .. "ɛ" .. C .. ")" .. word_end, "%1^%2"}, {"(" .. C .. "a)(%+?".. C .. C .. "?a" .. C ..")" .. word_end, "%1^%2"}, {"ayi(" .. C .. ")" .. word_end, "a^yi%1"}, --stress marking {"a^", "á"}, {"e^", "é"}, {"i^", "í"}, {"o^", "ó"}, {"u^", "ú"}, {"ɛ^", "ɛ́"}, {"ɔ^", "ɔ́"}, {"ā^", "ā́"}, {"ē^", "ḗ"}, {"ī^", "ī́"}, {"ō^", "ṓ"}, {"ū^", "ū́"}, {"ɛ̄^", "ɛ̄́"}, {"ɔ̄^", "ɔ̄́"}, {"ê^", "ế"}, {"ô^", "ố"}, {"ɛ̂^", "ɛ̂́"}, {"ɔ̂^", "ɔ̂́"},

{"ɔyw(" .. word_end .. ")", "ɔw%1"}, --irregular… {"(" .. V .. "[+^]?)([bdgptk])(" .. V .. ")", "%1%2%2%3"}, --dagesh bgdkft gemination {"f", "p̄"}, --bc p̄ are 2 chars {"%s%.", "."}, --quotes: " ." > "." (esthetics) }

--MH local m = { --direct change ["ḏ"] = "d", ["ḡ"] = "g", ["ś"] = "s", ["״"] = "″", --gershayim ["q"] = "k", ["ī"] = "i", ["ū"] = "u", ["́"] = "^", --stress marking conversion below }

local l = { --indirect {"p̄", "f"}, {"[̂̆̄]", ""},	{"ḥ'", "ḫ"}, {"ṯ'", "T"}, {"ṭ'", "ẓ"}, {"g'", "j"}, {"z'", "Z"}, {"ṣ'", "C"}, {"d'", "D"}, {"[rʕ]'", "ġ"}, {"(.)%1", "%1"},	{"[ḇw]", "v"}, {"[ḵḥ]", "K"}, {"[ṯṭ]", "t"}, {"'", ""},	{"[ʔʕ]", "'"}, --above: loss of vowel length, loss of gemination, turning n-grams into 1 char, MH mergers.

--schwa --prefixes -- {word_start .. "([bvkKlšdm])ə", "%1e"}, -- {"(u[bvkKlšdm])ə", "%1e"}, --initial C clusters {word_start .. "([rnmly])ə", "%1e"}, {word_start .. "(" .. C .. ")ə([h'])", "%1e%2"}, --internal {"([ə+]" .. C .. ")ə", "%1e"}, {"(" .. C .. C .. ")ə", "%1e"}, {"[ə+]", ""}, --deletion of remaining schwa and metegim

--put here not above to avoid e/ə confusion {"[āâă]", "a"}, {"[ēêɛ]", "e"}, {"[ōô]", "o"}, {"[ḗế]", "é"}, {"[ṓố]", "ó"},

{"(" .. word_start .. "[^áéíóú^]-[aeiouɔ])(" .. C .. "?" .. C .. "?)" .. word_end, "%1^%2"}, --module-explicit default final stress... --same articulation > schwa insertion {"([bp])([bp])", "%1e%2"}, {"([vf])([vf])", "%1e%2"}, {"([dt])([dt])", "%1e%2"}, {"([DTṣ])([DTṣ])", "%1e%2"}, {"([zs])([zs])", "%1e%2"}, {"([Zš])([Zš])", "%1e%2"}, {"([jC])([jC])", "%1e%2"}, {"([gk])([gk])", "%1e%2"}, {"(K)(K)", "%1e%2"}, {"(r)(r)", "%1e%2"}, {"''", "'e'"},

--a/o, including kol {"ɔ(" .. C .. C .. ")", "o%1"}, {"ɔ(" .. C .. ")" .. word_end, "o%1"}, {"(" .. word_start .. "[kK])ɔ(^l" .. word_end .. ")", "%1o%2"}, {"([bvkKlšd][ea][kK])ɔ(^l" .. word_end .. ")", "%1o%2"}, -- {"(m[ei][kK])ɔ(^l" .. word_end .. ")", "%1o%2"}, {"(" .. word_start .. "u[kK])ɔ(^l" .. word_end .. ")", "%1o%2"}, {"(ha[kK])ɔ(^l" .. word_end .. ")", "%1o%2"}, {"ɔ", "a"},

{"(" .. word_start .. C .. C .. "?" .. V .. ")^(" .. C .. "?" .. C .. "?" .. word_end .. ")", "%1%2"}, --…reader-implicit acute accent in monosyllabic --stress marking {"a^", "á"}, {"e^", "é"}, {"i^", "í"}, {"o^", "ó"}, {"u^", "ú"}, --glottal stops: kept when {CV}'V, {"(" .. word_start .. ")'", "%1"}, {"'(" .. C .. ")", "%1"}, {"'(" .. word_end .. ")", "%1"}, --fake digraphs {"([szck])h", "%1'h"}, --one char > displaying {"ṣ", "ts"}, {"š", "sh"}, {"T", "t'"}, {"Z", "zh"}, {"C", "ch"}, {"D", "d'"}, {"K", "kh"}, }

function export.BH(text) text = s(s(text, '.', c), "[֣֖֣֑֣֣֧֛֖֥֧֛֥֖֑֣֖֥֔֗֗֙֔]", "") --remove cantillation marks so that it works for quotes too for a = 1, #b do		text = s(text, b[a][1], b[a][2]) end return text end

function export.BH_tr(text) return s(export.BH(text), "+", "") --metegim kept for MH end

function export.MH_tr(text) local acronym = false text = s(export.BH(text), '.', m) --.BH to keep metegim, m is applied if match(text, "″") and not match(text, V) then --acronym = gershayim & no V		text = s(s(s(text, "p̄", "p"), "ḇ", "b"), "ḵ", "k") acronym = true end for a = 1, #l do --in any case, l is applied text = s(text, l[a][1], l[a][2]) end if acronym == true then text = mw.ustring.upper(text) end return text end

function export.tr(text, lang, sc) if not sc then sc = require("Module:languages").getByCode(lang):findBestScript(text):getCode end if sc ~= "Hebr" or not match(text, "[ְֱֲֳִֵֶַָׇֹֻ״־]") then return nil elseif lang == "he" then return export.MH_tr(text) elseif lang == "hbo" then --though useless return export.BH_tr(text) end end

function export.tr_all(frame) return export.BH_tr(frame.args[1]) .. ", " .. export.MH_tr(frame.args[1]) end

--Erutuon's code for code points below

--[[ local Array = require "Module:array" local function show_code_point_names(text)	if not text then return "" end	local names = Array	for cp in gcodepoint(text) do		-- Remove HEBREW LETTER, HEBREW POINT, etc.		local name = require "Module:Unicode data".lookup_name(cp)			:gsub(				"^HEBREW (%w+) ",				function(type)					if type == "ACCENT" then return "ACCENT " else return "" end				end)			:lower		names:insert(name)	end	return names:concat ", " end

local old_s = s function s(...) local old = ... local new = old_s(...) if old ~= new then mw.log(show_code_point_names(old), show_code_point_names(new), ...) end return new end --]]

return export