Module:User:Sarri.greek/grk-translit-modern

-- 2024.03.06. wikt:en:User:Sarri.greek -- tests at Module talk:User:Sarri.greek/grk-translit-modern -- This is version of Module:grc-translit -- See Module:el-translit --[=[	-- script polytonic Greek -- script monotonic or polytonic Greek (any script may be found in quotations)
 * grk-translit-classic = for Ancient Greek grc, Koine, grc-koi, learned Medieval & their dialects, Katharevousa el-kth
 * grk-translit-modern = for any Medieval Greek gkm, Modern Greek & their dialects,

Trasliterate like Modern Greek ISO843 (TypeB, slightly more phonemic than TypeA, 	i macron ī for eta, o macron ō for omega	with corrections γ=gh, δ=dh, χ=kh as proposed for a mixed type C)	Pronunciation as at Template:R:gkm:Grammar Cambrdige In modern, prosody marks are not needed, but are kept for possible examples of metrics in poetry.
 * Learned Medieval Greek is transliterated exactly as Ancient Greek script (rho with daseia/rough, hypogegrammeni)
 * Main Medieval Greek (vulgar) rho was written with or without daseia.

CORRECTIONS - PROLBEMS
 * add ligatures for quotations only? -- no, we can use param substitute at Template:quote

HOW it is USED?? Template:xlit has: ]=]--

local export = {}

local m_data = require('Module:grc-utilities/data')

-- Break Greek text into units of a single consonant or monophthong letter, or diphthong, with any diacritics local tokenize = require('Module:grc-utilities').tokenize

--local ufind = mw.ustring.find -- --local ugsub = mw.ustring.gsub -- --local U = mw.ustring.char -- --local ulower = mw.ustring.lower -- --local uupper = mw.ustring.upper --

-- This means: ?? local UTF8char = '[%z\1-\127\194-\244][\128-\191]*'

-- Diacritics from Module:grc-utilities/data --[=[ it says: local U = require("Module:string/char") ]=]-- local diacritics = m_data.named -- Greek local acute = diacritics.acute -- U(0x301) this is okseia ´ and the overall tonos local grave = diacritics.grave -- U(0x300) this is bareia ` local circumflex = diacritics.circum -- U(0x342) this is perispomeni ῀ -- Latin_circum = U(0x302) local diaeresis = diacritics.diaeresis -- U(0x308) this are the dialytics ¨ local smooth = diacritics.smooth -- U(0x313) this is psile ᾿ local rough = diacritics.rough -- U(0x314) this is daseia ῾ local macron = diacritics.macron -- U(0x304) this is macron ˉ, normally not needed, needed exceptionally in quotations -- spacing_macron = U(0xAF) -- modifier_macron = U(0x2C9) local breve = diacritics.breve -- U(0x306) this is brachy ˘, normally not needed -- spacing_breve = U(0x2D8) local subscript = diacritics.subscript -- U(0x345) this is hypogegrammene --?? (adscript prosgegrammene is written out with i??) see below, a_subscript -- ALSO has -- coronis = U(0x343) -- undertie = U(0x35C) -- actually "combining double breve below" -- Latin local hat = diacritics.Latin_circum -- Latin_circum = U(0x302)

local macron_diaeresis = macron .. diaeresis .. "?" .. hat -- ??what is this local a_subscript = '^[αΑ].*' .. subscript .. '$' local velar = 'κγχξ'

local tt = { -- Vowels ["α"] = "a", ["ε"] = "e", ["η"] = "i" .. macron, -- the 'ī' with macron looks bad, like perispomeni / The classic ē reminds more of 'eta' ["ι"] = "i", ["ο"] = "o", ["υ"] = "u", ["ω"] = "o" .. macron, -- ō

-- Consonants ["β"] = "v", -- instead of ancient = b	["γ"] = "gh", -- instead of g	["δ"] = "dh", -- instead of d	["ζ"] = "z", ["θ"] = "th", ["κ"] = "k", ["λ"] = "l", ["μ"] = "m", ["ν"] = "n", ["ξ"] = "ks", --?? instead of x	["π"] = "p", ["ρ"] = "r", ["σ"] = "s", ["ς"] = "s", ["τ"] = "t", ["φ"] = "f", -- instead of latinization ph ? ["χ"] = "kh", ["ψ"] = "ps", -- Archaic letters (AncGr) -- at modern, may be found in quotations and some, for numbering system ["ϝ"] = "Ϝ", -- do not transliterate to "w" -- this is always the capital Ϝ ["ϻ"] = "ϻ", -- do not transliterate to "ś" ["ϙ"] = "Ϙ", -- do not transliterate to "q" -- this is always the capital Ϙ ["ϡ"] = "ϡ", -- do not transliterate to "š" ["ͷ"] = "ͷ", -- do not transliterate to "v" number

-- special characters, for quotations only -- Incorrect characters: see About Ancient Greek. -- These are tracked by Module:script utilities. ["ϐ"] = "v", -- instead of 'b'	["ϑ"] = "th", ["ϰ"] = "k", ["ϱ"] = "r", ["ϲ"] = "s", ["ϕ"] = "f", -- instead of ph	-- Diacritics -- unchanged: macron, diaeresis, grave, acute [breve] = '', -- brachy [smooth] = '', -- psile [rough] = '', -- daseia [circumflex] = hat, -- perispomene [subscript] = 'i', -- hypogegrammene }

-- change name from export.tr to export.translit function export.translit(text, lang, sc) -- daseia -- ANCE if rough daseia: return h, in Koine a grey h, in MedGr onwards nothing if text == '῾' then return '' -- instead of h	end --		Replace semicolon or Greek question mark with regular question mark,		except after an ASCII alphanumeric character (to avoid converting		semicolons in HTML entities). text = mw.ustring.gsub(text, "([^A-Za-z0-9])[;" .. mw.ustring.char(0x37E) .. "]", "%1?") -- Handle the middle dot = semicolon. In AncGr is equivalent to semicolon or colon, but semicolon is probably more common. text = text:gsub("·", ";") local tokens = tokenize(text)

--now read the tokens local output = {} for i, token in pairs(tokens) do		-- Convert token to lowercase and substitute each character -- for its transliteration local translit = mw.ustring.lower(token):gsub(UTF8char, tt) local next_token = tokens[i + 1] -- the previous is tokens[i - 1]

-- CONDITIONS for modern transliteration -- tests Module_talk:User:Sarri.greek/grk-translit-modern -- #mp = mu and pi  μπ -- capitals are taken care of --[=[ this does not work. when i write gsub it has error. When i write sub it just does not work whattt must i use? there are: string.gsub string.sub mw.ustring.gsub mw.ustring.sub I want to say: If you find ^[μΜ][πΠ] at the beginning of a word, substitute them with b else... mb ]=]-- --	if token:find('^[μ][π]') then if token == "μ" and tokens[i + 1] == "π" then --	if mw.ustring.find(token, '^[μ]') then if mw.ustring.find(text, '^μ') then --		if token:find('^[μ][π]') then --		if mw.ustring.find(token, '^[μ][π]') then token = 'μ' tokens[i + 1] = "" translit = "b" else token = 'μ' tokens[i + 1] = "" translit = "mb" end end

--if mw.ustring.find(text, '^[μ][π]') then --	translit = mw.ustring.sub(text, '[μΜ][πΠ]', "b") --	end

--[=[noooooooooooooooooooooo if token == "μ" and tokens[i + 1] == "π" then text = string.sub(token, "(.?)([μ])([π])", -- capitals are ok "(.?)([μΜ])([πΠ])"			function (before, mupi)				--?? what example is before == "-" ????				if before == "" or before == " " or before == "-"				then					translit = before .. "b"				else -- not at beginning					translit = before .. "mb"				end			end) end -- close mu ]=]--

-- nu and ντ -- capitals are taken care of --todo

-- gamma ?? Please, could you correct this, so that it works? if token == 'γ' or token == 'Γ' -- capitals are ok	then -- γκ -- capitals are taken care of		if token == "γ" and tokens[i + 1] == "κ" then -- γ before a velar = 'κγχξ' should be  BUT NOT at beginning of word --ANC--			if next_token and velar:find(next_token, 1, true) then -- arctic Γκάνα text = gsub(token, "(.?)([γ])([κ])", -- capitals are ok "(.?)([γΓ])([κΚ])"				function (before, gammakappa)					--?? what example is before == "-" ????					if before == "" or before == " " or before == "-"					then						translit = before .. "g"					end				end)

-- γγ = ng NOT ngh -- 'γγίζω = γγίζω elseif (token == "γ" and tokens[i + 1] == "γ") then text = gsub(token, "(.?)([γ])([γ])", -- capitals are ok "(.?)([γΓ])([γΓ])"				function (before, gammagamma)					--?? what is before == "-" --assumed median?					if before == "" or before == " " or before == "-"					then						translit = before .. "ng"					end				end) -- γχ nkh γξ = nks with normal translit of 2nd letter. These are always median άγχος ελέγξω elseif (token == "γ" and tokens[i + 1] == "[χξ]") -- capitals are ok "[χΧξΞ]" then translit = "n" end -- close elseifs end -- close gamma

if token == 'ρ' and tokens[i - 1] == 'ρ' then --ANC--			-- ρ after ρ should be  translit = 'r'		elseif mw.ustring.find(token, a_subscript) then -- add macron to ᾳ --??should we keep this for examples of metrics? --ANC--			translit = mw.ustring.gsub(translit, '([aA])', '%1' .. macron) translit = 'a'		end if token:find(rough) then if mw.ustring.find(token, '^[Ρρ]') then --ANC--				translit = translit .. 'h'				translit = translit else -- vowel --ANC--				translit = 'h' .. translit translit = translit end end

-- AncGr -- keep it just in case... -- Remove macron from a vowel that has a circumflex. if mw.ustring.find(translit, macron_diaeresis) then translit = translit:gsub(macron, '') end

--[=[ CONDITIONS for classic ancient transliteration if token == 'γ' and next_token and velar:find(next_token, 1, true) then -- γ before a velar should be  translit = 'n'		elseif token == 'ρ' and tokens[i - 1] == 'ρ' then -- ρ after ρ should be  translit = 'rh' elseif ufind(token, a_subscript) then -- add macron to ᾳ translit = ugsub(translit, '([aA])', '%1' .. macron) end if token:find(rough) then if ufind(token, '^[Ρρ]') then translit = translit .. 'h'			else -- vowel translit = 'h' .. translit end end -- Remove macron from a vowel that has a circumflex. if ufind(translit, macron_diaeresis) then translit = translit:gsub(macron, '') end ]=]--

-- Capitalize first character of transliteration. if token ~= mw.ustring.lower(token) then translit = translit:gsub("^" .. UTF8char, mw.ustring.upper) end table.insert(output, translit) end output = table.concat(output) return output end

-- Module_talk:User:Sarri.greek/grk-translit-modern -- ============= use it with arguemtns =============== -- function export.get_tr(frame) --	local args = frame:getParent.args	-- for Templates local args = frame.args				-- invoke -- lemma local text = args['1'] or '' if args['1'] ~= '' and args['1'] ~= nil then text = export.translit(args['1']) end

return text end

return export

-- check Module:el-translit for αυ, ευ, ηυ, μπ inital, -- add ντ initial, γκ initial (we have delta = dh, and gamma = gh) --[=[	text = gsub(text, "([αεηΑΕΗ])([υύ])",				function (vowel, upsilon, position)					-- Find next character that is not whitespace or punctuation.					local following = ""					while true do						local next = mw.ustring.sub(text, position, position)						if next == "" then -- reached end of string							break						elseif next:find "[%s%p]" then							position = position + 1						else							following = next							break						end					end					return tt[vowel]						.. (upsilon == "ύ" and acute or "")						.. ((following == "" or ("θκξπσςτφχψ"):find(following, 1, true)) and "f" or "v")				end)

text = gsub(text, "([αεοωΑΕΟΩ])([ηή])",				function (vowel, ita)					if ita == "ή" then						return tt[vowel] .. "i" .. diaeresis .. acute					else						return tt[vowel] .. "i" .. diaeresis					end				end)

text = gsub(text, "[ωΩ][ιί]",				{["ωι"] = "oï", ["ωί"] = "oḯ",				 ["Ωι"] = "Oï", ["Ωί"] = "Oḯ"})

text = gsub(text, "[οΟ][υύ]",				{["ου"] = "ou", ["ού"] = "oú",				 ["Ου"] = "Ou", ["Ού"] = "Oú"})

text = gsub(text, "(.?)([μΜ])π",				function (before, mi)					if before == "" or before == " " or before == "-" then						if mi == "Μ" then							return before .. "B"						else							return before .. "b"						end					end				end)

]=]--