Module:User:Sarri.greek/el-transcript

-- 2024.04.05. wikt:en:User:Sarri.greek --[=[ -- This is version of el-IPA, with accents ON vowels, no syllabification -- IPA symbols have no capitals -- It is used for parameter ts=, as in texts, for quotations, at inflectional tables etc ]=]--

local export = {}

function export.ts(frame) local args = frame:getParent.args	-- for Templates --	local args = frame.args				-- invoke -- need -- text = any greek text -- result = the transcribed IPA symbols local following_gr = '' -- is the following Greek letter (in text) local following_ipa = '' -- is next IPA symbol, at the transcription, not at the text. local punctuation_gr = "[,.!;:·]" -- and more local punctuation_ipa = "[,.!?:;]" -- and more --?? do I write return blah, or do I write result = blah -- args for override (at my ts= Template:User:Sarri.greek/tlse)

-- ============ first do Conditions and Specials then do isolated letters. -- space is space if mw.ustring.find(text, " ") then return ' '

-- do nothing for a diacritic, spirtits, accents found alone (some editions place them after a letter) -- ow no, accents should be applied to the previous vowel - o bufff never mind. DO it manually (like the capitals at du Cange) elseif mw.ustring.find(text, "[´`῀]") then return '' -- accents tonos=okseia, bareia, perisopomene elseif mw.ustring.find(text, "[᾿῎῍῏]") then return '' -- with psile elseif mw.ustring.find(text, "[῾῞῝῟]") then return '' -- with daseia elseif mw.ustring.find(text, "[¨΅῭῁]") then return '' -- with dialytics elseif mw.ustring.find(text, "['’ˉ˘]") then return '' -- apostroph, curly apostroph, macron, brachy (breve) -- punctuation elseif mw.ustring.find(text, "[,]") then return ',' -- comma elseif mw.ustring.find(text, "[.]") then return '.' -- full stop (period) elseif mw.ustring.find(text, "[!]") then return '!' -- exclmation mark elseif mw.ustring.find(text, "[;]") then return '?' -- question mark -- BUT cf #semicolonalphanumberic elseif mw.ustring.find(text, "[:]") then return ':' -- colon elseif mw.ustring.find(text, "[·]") then return ';' -- middot is semicolon elseif mw.ustring.find(text, "[…]") then return '…' -- hellip -- brackets (parenthesis, square brackets, curly brackets) -- do not change -- do I have to write this?

-- SPECIAL WORDS ==================== -- AND override needed

-- special words with accent NOT pronounced (monotonic or for polytonic see the AncGr page) -- ανά κατά elseif mw.ustring.find(text, "[αΑἀἈ][νΝ][άΆὰᾺ]") then return 'ana' elseif mw.ustring.find(text, "[κΚ][αΑ][τΤ][άΆὰᾺ]") then return 'kata' -- δια or διά has 2 or 3 options, Do manually? I can give standard 1st option -- μετά has 2 options, the more common is normal, the other is without accent

-- special words not accented, but pronounced WITH accent -- δια cf options for δια elseif mw.ustring.find(text, "[γΓ][εΕ][ιΙ][αΑ]") then if following_gr == ' ' or following_gr == punctuation then return 'ʝá' end --?? todo

-- ============== SPECIAL COMBINATIONS -- for AncGr marked prosodies on accented letters = ignore the prosody --?? how?todo -- SUFFIXES with or without synizesis -- κ- -γ- γκ- γγ- χ- + any e or i sound --?? must I make first the e sounds and the i sounds? -- kappa elseif mw.ustring.find(text, "[κΚ]") then if following_ipa == '[eé]' or following_ipa == '[ií]' then return 'c' 		else return 'k'		end -- gamma & gammakappa elseif mw.ustring.find(text, "[γΓ]") then if following_gr == "[γΓκΚ]" then if following_ipa == '[eé]' or following_ipa == '[ií]' then return 'ɟ' end else if following_ipa == '[eé]' or following_ipa == '[ií]' then return 'ʝ' else return 'ɣ' end end -- chi elseif mw.ustring.find(text, "[χΧ]") then if following_ipa == '[eé]' or following_ipa == '[ií]' then return 'ç' else return 'x'		end

-- DIGRAPHS ==================== -- sound e with alpha iota elseif mw.ustring.find(text, "[αΑ][ιΙἰἸἱἹ]") then return 'e' -- alpha + iota psile, daseia elseif mw.ustring.find(text, "[αΑ][ίΊὶῚἴἼἲἺἶἾἵἽἳἻἷἿ]") then return 'é' -- +iota tonos/okseia, bareia perispomene & spirits -- other alpha digraphs are ok as isolated --	elseif mw.ustring.find(text, "[άΆἄἌἂἊἆἎἅἍἃἋἇἏ][ιΙ]") then return 'ái' -- alpha+accent, iota --	elseif mw.ustring.find(text, "[αΑἀἈἁἉ][ϊΪ]") then return 'ai' -- alphas + iota with dialytics --	elseif mw.ustring.find(text, "[αΑἀἈἁἉ][ΐῒῗ]") then return 'aí' -- alphas + iota with dialytics+accent

-- sound i epsilon iota, omicron iota, upsilon iota elseif mw.ustring.find(text, "[εΕοΟυΥ][ιΙἰἸἱἹ]") then return 'i' -- +iota psile, daseia elseif mw.ustring.find(text, "[εΕοΟυΥ][ίΊὶῚἴἼἲἺἶἾἵἽἳἻἷἿ]") then return 'í' -- +iota tonos/okseia, bareia perispomene & spirits

-- alpha/epsilon/eta + upsilon αυ (ευ ηυ too) elseif mw.ustring.find(text, "[αΑ][υΥὐὑὙ][φΦϕ]") then return 'af' elseif mw.ustring.find(text, "[αΑ][υΥὐὑὙ][βΒϐ]") then return 'av' --	elseif mw.ustring.find(text, "[αΑ][υΥὐὑὙ][ ]") then return 'af' -- alpha ypsilon space ταυ punctuation --	elseif mw.ustring.find(text, "[αΑ][υΥὐὑὙ][,.!;:·]") then return 'af' .. "[,.!;:·]"	elseif mw.ustring.find(text, "[αΑ][υΥὐὑὙ]") then --return 'af' .. ts("[θΘϑκΚϰξΞπΠσςΣϲτΤχΧψΨ]") --		function (alpha, upsilon, position) local alpha = "[αΑ]" -- do aei αΑεΕηΗ -- Find next character that is not whitespace or punctuation. local following = "" while true do						local next = mw.ustring.sub(text, position, position) if next == "" then -- reached end of string break elseif next:find "[%s%p]" then position = position + 1 else following = next break end end if following == "" or following == "[θΘϑκΚϰξΞπΠσςΣϲτΤχΧψΨ]" then return "af" elseif following == "[γΓδΔεΕζΖηΗιΙλΛμΜνΝοΟρΡϱυΥωΩ]" then return "av" -- for vowel or γδζλμνρ end -- άυ άϋ as normal -- same with accents: elseif mw.ustring.find(text, "[αΑ][ύΎὺῪῦὔὒὖὕὝὓὛὗὟ][φΦϕ]") then return 'áf' elseif mw.ustring.find(text, "[αΑ][ύΎὺῪῦὔὒὖὕὝὓὛὗὟ][βΒϐ]") then return 'áv' elseif mw.ustring.find(text, "[αΑ][ύΎὺῪῦὔὒὖὕὝὓὛὗὟ]") then local alpha = "[αΑ]" -- do aei αΑεΕηΗ -- Find next character that is not whitespace or punctuation. local following = "" while true do						local next = mw.ustring.sub(text, position, position) if next == "" then -- reached end of string break elseif next:find "[%s%p]" then position = position + 1 else following = next break end end if following == "" or following == "[θΘϑκΚϰξΞπΠσςΣϲτΤχΧψΨ]" then return "áf" elseif following == "[γΓδΔεΕζΖηΗιΙλΛμΜνΝοΟρΡϱυΥωΩ]" then return "áv" -- for vowel or γδζλμνρ end -- do the same with ευ --?? todo

-- do the same with ηυ --?? todo

-- ου ού elseif mw.ustring.find(text, "[oO][υΥὐὑὙ]") then return 'u'	elseif mw.ustring.find(text, "[oO][ύΎὺῪῦὔὒὖὕὝὓὛὗὟ]") then return 'ú'

-- μπ arctic = b, median is mostly mb and override needed --	elseif mw.ustring.find(text, "[μΜ][πΠ]") then --?? todo

-- ντ arctic = d, median is mostly nd and overrided needed --?? todo

-- γκ arctic = g, median is mostly ŋɡ (+ e or i sounds is ŋɟ NOT the mistake ɲɟ see Macridge and notes) -- Αγκόλα is aŋɡóla -- άγγελος is áŋɟelos -- and override needed e.g. αλέγκρο is NOT aléŋɡɾo but aléɡɾo --?? todo

-- γχ (always median) is ŋx or (+ e or i sound is ŋç NOT mistake ɲç the same as above with γκ) --?? todo -- γξ (always median) is ŋks elseif mw.ustring.find(text, "[γΓ][ξΞ]") then return 'ŋks' -- μβ always median is	elseif mw.ustring.find(text, "[μΜ][βΒϐ]") then return 'ɱv' -- μφ always median is	elseif mw.ustring.find(text, "[μΜ][φΦϕ]") then return 'ɱf' -- σμ usually is zm and override needed elseif mw.ustring.find(text, "[σΣ][μΜ]") then return 'zm' -- τσ elseif mw.ustring.find(text, "[τΤ][σΣςcC]") then return "t͡s" -- τζ elseif mw.ustring.find(text, "[τΤ][σΣςcC]") then return "d͡z"

-- =================== ISOLATED LETTERS =========== -- -- special characters, for quotations only -- Incorrect characters: see About Ancient Greek. -- These are tracked by Module:script utilities. elseif mw.ustring.find(text, "ϐ") then return "v" -- calligraphic beta -- AngGr as 'b'	elseif mw.ustring.find(text, "ϑ") then return "θ" -- calligraphic theta elseif mw.ustring.find(text, "ϰ") then return "k" -- calligraphic kappa elseif mw.ustring.find(text, "ϱ") then return "r" -- calligraphic rho elseif mw.ustring.find(text, "ϕ") then return "f" -- calligraphic phi -- add calligraphic epsilon -- ligatures?? for και = ce

-- Archaic letters (AncGr) digamma san koppa sampi and the dated stigma, lunated stigma etc elseif mw.ustring.find(text, "[ϝϜ]") then return "w" -- this should always be the capital Ϝ elseif mw.ustring.find(text, "ϻ") then return "s" elseif mw.ustring.find(text, "[ϙϘ]") then return "k" -- this should always be the capital Ϙ elseif mw.ustring.find(text, "[Ϛϛ]") then return "st" -- stigma -- stigma capital? rare elseif mw.ustring.find(text, "[cC]") then return "s" -- lunate sigma --	elseif mw.ustring.find(text, "ϡ") then return "ϡ", -- do nothing?? --	elseif mw.ustring.find(text, "ͷ") then return "ͷ", -- do nothing? transliterate to "v" number

-- Vowels (all unaccented, all accented, both monotonic polytonic) macron brachy do as grc?todo elseif mw.ustring.find(text, "[αΑᾳᾼἀἈᾀᾈἁἉᾁᾉᾰᾸᾱᾹ]") then return "a" --?? should ᾱ be double 'aa' elseif mw.ustring.find(text, "[άΆὰᾺᾴὰᾲᾶᾷἄἌᾄᾌἂἊᾂᾊἆἎᾆᾎἅἍᾅᾍἃἋᾃᾋἇἏᾇᾏ]") then return "á" -- DO MORE elseif mw.ustring.find(text, "[εΕἐἘἑἙ]") then return "e" --always brachy elseif mw.ustring.find(text, "[έΈὲῈἔἜἒἚἕἝἓἛ]") then return "é" elseif mw.ustring.find(text, "[ηΗῃῌἠἨᾐᾘἡἩᾑᾙ]") then return "i" --alywas macron elseif mw.ustring.find(text, "[ήΉὴῊῆῄῂῇἤἬᾔᾜἢἪᾒᾚἦἮᾖᾞἥἭᾕᾝἣἫᾓᾛἧἯᾗᾟ]") then return "í" elseif mw.ustring.find(text, "[ιΙϊΪἰἸἱἹῐῘῑῙ]") then return "i" --?? should ῑ be double 'ii' elseif mw.ustring.find(text, "[ίΊὶῚῖΐῒῗἴἼἲἺἶἾἵἽἳἻἷἿ]") then return "í" elseif mw.ustring.find(text, "[οΟὀὈὁὉ]") then return "o" --always brachy elseif mw.ustring.find(text, "[όΌὸῸὄὌὂὊὅὍὃὋ]") then return "ó" elseif mw.ustring.find(text, "[υΥϋΫὐὑὙῠῨῡῩ]") then return "i" --has no psile --?? should ῡ be double 'ii' elseif mw.ustring.find(text, "[ύΎὺῪῦΰῢῧὔὒὖὕὝὓὛὗὟ]") then return "í" elseif mw.ustring.find(text, "[ωΩῳῼὠὨᾠᾨὡὩᾡᾩ]") then return "o" --always macron elseif mw.ustring.find(text, "[ώῺὼῴῲῶῷὤὬᾤᾬὢὪᾢᾪὦὮᾦᾮὥὭᾥᾭὣὫᾣᾫὧὯᾧᾯ]") then return "ó"

-- Consonants elseif mw.ustring.find(text, "β") then return "v" -- gamma, see above elseif mw.ustring.find(text, "δ") then return "ð" elseif mw.ustring.find(text, "ζ") then return "z" elseif mw.ustring.find(text, "θ") then return "θ" --kappa, see above elseif mw.ustring.find(text, "λ") then return "l" elseif mw.ustring.find(text, "μ") then return "m" elseif mw.ustring.find(text, "ν") then return "n" elseif mw.ustring.find(text, "ξ") then return "ks" elseif mw.ustring.find(text, "π") then return "p" elseif mw.ustring.find(text, "ρ") then return "r" elseif mw.ustring.find(text, "σ") then return "s" elseif mw.ustring.find(text, "τ") then return "t" elseif mw.ustring.find(text, "φ") then return "f" --chi, see above elseif mw.ustring.find(text, "ψ") then return "ps"

end -- CLOSE ALL ifs

--[=[ OK, use [a letter] .. spirit .. accent, but I need to SEE them too local m_data = require('Module:grc-utilities/data')

-- Break Greek text into units of a single consonant or monophthong letter, or diphthong, with any diacritics local tokenize = require('Module:grc-utilities').tokenize

--local ufind = mw.ustring.find -- --local ugsub = mw.ustring.gsub -- --local U = mw.ustring.char -- --local ulower = mw.ustring.lower -- --local uupper = mw.ustring.upper --

-- This means: ?? --local UTF8char = '[%z\1-\127\194-\244][\128-\191]*'

-- Diacritics from Module:grc-utilities/data local diacritics = m_data.named -- Greek local acute = diacritics.acute -- this is okseia ´ and the overall tonos local grave = diacritics.grave -- this is bareia ` local circumflex = diacritics.circum -- this is perispomeni ῀ local diaeresis = diacritics.diaeresis -- this is the dialytics ¨ local smooth = diacritics.smooth -- this is psile ᾿ local rough = diacritics.rough -- this is daseia ῾ local macron = diacritics.macron -- this is macron ˉ, normally not needed, needed exceptionally in quotations local breve = diacritics.breve -- this is brachy ˘, normally not needed local subscript = diacritics.subscript -- this is hypogegrammene --?? (adscript prosgegrammene is written out with i??) see below, a_subscript

-- Latin local hat = diacritics.Latin_circum

-- local macron_diaeresis = macron .. diaeresis .. "?" .. hat -- ??what is this

local a_subscript = '^[αΑ].*' .. subscript .. '$' ]=]--

-- --	text = mw.ustring.gsub(text, "([^A-Za-z0-9])[;" .. mw.ustring.char(0x37E) .. "]", "%1?") -- Handle the middle dot = semicolon. In AncGr is equivalent to semicolon or colon, but semicolon is probably more common. --	text = text:gsub("·", ";")

-- CONDITIONS for modern transliteration at Module:el-translit

-- check Module:el-translit for αυ, ευ, ηυ, μπ inital, -- add ντ initial, γκ initial (we have delta = dh, and gamma = gh) --[=[	text = gsub(text, "([αεηΑΕΗ])([υύ])",				function (vowel, upsilon, position)					-- Find next character that is not whitespace or punctuation.					local following = ""					while true do						local next = mw.ustring.sub(text, position, position)						if next == "" then -- reached end of string							break						elseif next:find "[%s%p]" then							position = position + 1						else							following = next							break						end					end					return tt[vowel]						.. (upsilon == "ύ" and acute or "")						.. ((following == "" or ("θκξπσςτφχψ"):find(following, 1, true)) and "f" or "v")				end)

text = gsub(text, "([αεοωΑΕΟΩ])([ηή])",				function (vowel, ita)					if ita == "ή" then						return tt[vowel] .. "i" .. diaeresis .. acute					else						return tt[vowel] .. "i" .. diaeresis					end				end)

text = gsub(text, "(.?)([μΜ])π",				function (before, mi)					if before == "" or before == " " or before == "-" then						if mi == "Μ" then							return before .. "B"						else							return before .. "b"						end					end				end)

]=]--

end

return export