Module:User:Sarri.greek/grk-stems/data

-- tests at Module talk:User:Sarri.greek/grk-stems --[=[ -- Explanations, at bottom of this page. -- See everything veryyyy big, zoom your browser 200% or see at wikt:en:User:Sarri.greek/fonts -- check codes at wikt:en:Module:grc-utilities & wikt:en:Module:grc-utilities/data INSTRUCTIONS Load this module using require, not using mw.loadData. USE e.g.: local m_data = require("Module:XXX/data") IF: local module_path = 'Module:Yyyy' USE: local m_data = require(module_path .."/data") DO NOT USE: local m_data = mw.loadData("XXX") HOW to call it: m_data.xxxxxx e.g. m_data.unaccented_to_accented

CONTENTS a) simple sequences	b) conversions accented_to_unaccented // unaccented_to_oxia // perispomeni_to_oxia // oxia_to_perispomeni c) diphthongs and digraphs (2-vowel-sequences)	 digraphs // digraphs_accent_back // digraphs_accented_to_unaccented

PROBLEMS SOLVED UNORTHODOX characters: They exist only in some old editions, where whole words are in capitals, retaining their diacritics = Principle: when you do not use unicode at one part, then do not use unicode to the other part either = write those at a .txt, show it at .htm and copypaste = copy the capital.with.diaersis (as one character) & copy next to it the invisible tonos = the tonos unicodes are invisible Example: IOTA.with.diaeresis+tonos as Ϊ + the invisible oxeia  - ◌́ (U+0301) Example Άͅ ALPHA.prosdiegrammene+tonos as ᾼ + the invisible oxia (U+030 & # x 0 3 0 1 ;) write at .txt, show it at .htm and copy PROBLEMS cf ?? in case a reader has a personal css with such a font. Can this be controlled? ]=]--
 * CAPITAL+diaeresis+tonos.
 * CAPITAL+prosdiegrammeno iota+tonos
 * FORBID all family-fonts that present the accent tonos or oxia as a small vertical line. E.g. Verdana.
 * Do it with that U? unicode? Show how it is written.
 * For dichronon_oxia I do not know how to write all prosodies.

local export = {}

-- NEED: FORBID all family-fonts that present the accent tonos or oxia as a small vertical line.

-- --                        a) SIMPLE SEQUENCES                          -- -- --?? DO i need all UNORTHODOX in here? bahh

-- vowel+perispomeni (circumflex) -- These are always macra (macron), no need for prosody marks -- see big wikt:en:User:Sarri.greek/fonts export.vowel_perispomeni = '[ᾶᾷἆᾆἇᾇῆῇἦᾖἧᾗῖἶἷῗῦὖὗῧῶῷὦᾦὧᾧἎᾎἏᾏἮᾞἯᾟἾἿὟὮᾮὯᾯ]'

-- brachy(short)+oxia (oxia acute, or baria grave all these accents called tonos -- There are no prosody marks. export.brachy_oxia = '[έὲἔἒἕἓόὸὄὂὅὃΈῈἜἚἝἛΌῸὌὊὍὋ]'

-- macron(long)+oxia (oxia acute, or baria grave all these accents called tonos -- There are no prosody marks. export.macron_oxia = '[ήῄὴῂἤᾔἢᾒἥᾕἣᾓώῴὼῲὤᾤὢᾢὥᾥὣᾣΉῊἬᾜἪᾚᾚἭᾝἫᾛῺὬᾬὪᾪὭᾭὫᾫ]'

-- diphthong (2 vowels together) + any tonos (okseia, bareia, perispomene) -- NOT dialytics ΐῒῗΰῢῧ -- These are always macra (macron), no need for prosody marks export.diphthong_tonos = '[αΑεΕηΗοΟ][ίὶῖἴἲἶἵἳἷΊῚἼἺἾἽἻἿύὺῦὔὒὖὑὕὓὗΎῪὝὛὟ]' -- ΝΟΤ ALL of them [ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗῐῑΙΊῚἸἼἺἾἹἽἻἿῙῘυύὺῦὐὔὒὖὑὕὓὗϋΰῢῧῠῡΥΎῪὙὝὛὟῩῨ]') --

-- ?? Do I NEED to write IN the function the ones with prosodies? --[=[ -- The 3 ambiguous dichrona (dichronon = with 2 possible prosodies) are α ι υ -- Here, we also need the characters with BOTH PROSODIES short alpha+tonos	ᾰ̓́ -  Ᾰ̓́  -  ᾰ̔́  -  Ᾰ̔́ 	iota	upsilon	 copypaste from a .txt long alpha+tonos	.. iota	upsilon	 copypaste from a .txt ]=]-- -- dichronon+oxia (oxia acute, or baria grave all these accents called tonos export.dichr_oxia = '[άᾴὰᾲἄᾄἂᾂἅᾅἃᾃίὶἴἲἵἳΐῒύὺὔὒὕὓΰῢΆᾺἌᾌἊᾊᾊἍᾍἋᾋΊῚἼἺἽἻΎῪὝὛ]'

-- all vowels+oxia or baria, or perispomeni (any kind of tonos accent) export.tonos = '[ᾶᾷἆᾆἇᾇῆῇἦᾖἧᾗῖἶἷῗῦὖὗῧῶῷὦᾦὧᾧἎᾎἏᾏἮᾞἯᾟἾἿὟὮᾮὯᾯέὲἔἒἕἓόὸὄὂὅὃΈῈἜἚἝἛΌῸὌὊὍὋήῄὴῂἤᾔἢᾒἥᾕἣᾓώῴὼῲὤᾤὢᾢὥᾥὣᾣΉῊἬᾜἪᾚᾚἭᾝἫᾛῺὬᾬὪᾪὭᾭὫᾫάᾴὰᾲἄᾄἂᾂἅᾅἃᾃίὶἴἲἵἳΐῒύὺὔὒὕὓΰῢΆᾺἌᾌἊᾊᾊἍᾍἋᾋΊῚἼἺἽἻΎῪὝὛ]'

-- --          b) CONVERSIONS (change the characters)                     -- -- -- to see them, zoom in 170% or 200%

-- -- ? please write notes for unicodes or whatever code too -- remove accent from accented export.accented_to_unaccented = { -- alpha ambiguous dichrononon -- do I need +prosodies here? -- α no spirits ['ά'] = 'α', ['Ά'] = 'Α', ['ᾴ'] = 'ᾳ', ['Άͅ'] = 'ᾼ', -- UNORTHODOX write ALPHA.with.iota + invisible unicode tonos at .txt, show it at .htm and copypaste ['ᾶ'] = 'α', -- ?? ALPHA + persipomeni -- UNORTHODOX ['ᾷ'] = 'ᾳ', -- ?? ALPHA.with.i + perisopomeni -- UNORTHODOX -- with psile ['ἄ'] = 'ἀ', ['Ἄ'] = 'Ἀ', ['ᾄ'] = 'ᾀ', ['ᾌ'] = 'ᾈ', ['ἆ'] = 'ἀ', ['Ἆ'] = 'Ἀ', ['ᾆ'] = 'ᾀ', ['ᾎ'] = 'ᾈ', -- with dasia ['ἅ'] = 'ἁ', ['Ἅ'] = 'Ἁ', ['ᾅ'] = 'ᾁ', ['ᾍ'] = 'ᾉ', ['ἇ'] = 'ἁ', ['Ἇ'] = 'Ἁ', ['ᾇ'] = 'ᾁ', ['ᾏ'] = 'ᾉ', -- ε epsilon (always brachy = short = never persipomene circumflex) ['έ'] = 'ε', ['Έ'] = 'Ε', ['ἔ'] = 'ἐ', ['Ἔ'] = 'Ἐ', ['ἕ'] = 'ἑ', ['Ἕ'] = 'Ἑ', -- η eta (always marcon = long) ['ή'] = 'η', ['Ή'] = 'Η', ['ῄ'] = 'ῃ', -- ?? ETA.with.i + oxia -- UNORTHODOX ['ῆ'] = 'η', -- ?? ETA + persipomeni -- UNORTHODOX ['ῇ'] = 'ῃ', -- ?? ETA.with.i + perisopomeni -- UNORTHODOX -- with psile ['ἤ'] = 'ἠ', ['Ἤ'] = 'Ἠ', ['ᾔ'] = 'ᾐ', ['ᾜ'] = 'ᾘ', ['ἦ'] = 'ἠ', ['Ἦ'] = 'Ἠ', ['ᾖ'] = 'ᾐ', ['ᾞ'] = 'ᾘ', -- with dasia ['ἥ'] = 'ἡ', ['Ἥ'] = 'Ἡ', ['ᾕ'] = 'ᾑ', ['Ἥ'] = 'Ἡ', ['ἧ'] = 'ἡ', ['Ἧ'] = 'Ἡ', ['ᾗ'] = 'ᾑ', ['Ἧ'] = 'Ἡ', -- iota ambiguous dichrononon -- do I need +prosodies here? -- ι no spirits -- possible diaeresis (dialytics) ['ί'] = 'ι', ['Ί'] = 'Ι', ['ΐ'] = 'ϊ', -- IOTA+dialytics+tonos -- UNORTHODOX -- https://www.compart.com/en/unicode/U+0390 decomposed as Ι (U+0399) - ◌̈ (U+0308) - ◌́ (U+0301) -- 1.FAILED write this at .txt, show at .htm and copy:   Ι&#x0308;&#x0301; -- 2.FAILED write this at .txt, show at .thm and copy: Ϊ&#x0301;  which is= Ϊ (IOTA.diaeresis) +  (U+0308) - ◌́ (U+0301) -- 3.YES copypaste IOTAwithdialytics+ copypaste invisible tonos Ϊ&#x0301; that is Ϊ + -- = when you do not use unicode at one part, then do not use unicode to the other part either ['Ϊ́'] = 'Ϊ', -- this is 3. ['ῖ'] = 'ι', -- ?? IOTA + perispomeni -- UNORTHODOX ['ῗ'] = 'ϊ', -- ?? IOTA.with.dialytics + perispomeni -- UNORTHODOX -- with psile -- ?? psile okseia, psile perisp does not convert to IOTA WITH PSILI (U+1F38) in accent shifts ['ἴ'] = 'ἰ', ['Ἴ'] = 'Ἰ', --	['Ἴ'] = '&#x1F38;' ['ἶ'] = 'ἰ', ['Ἶ'] = 'Ἰ', -- ['Ἶ'] = '&#x1F38;', --with dasia ['ἵ'] = 'ἱ', ['Ἵ'] = 'Ἱ', ['ἷ'] = 'ἱ', ['Ἷ'] = 'Ἱ', -- dialytics ??? -- omicron (always brachy = short = never persipomene circumflex) ['ό'] = 'ο', ['Ό'] = 'Ο', ['ὄ'] = 'ὀ', ['Ὄ'] = 'Ὀ', ['ὅ'] = 'ὁ', ['Ὅ'] = 'Ὁ', -- upsilon ambiguous dichrononon -- do I need +prosodies here? -- υ no spirits -- possible diaeresis (dialytics) ['ύ'] = 'υ', ['Ύ'] = 'Υ', ['ΰ'] = 'ϋ', -- ?? UPSILON.with.diaeresis + oxia -- UNORTHODOX ['ῦ'] = 'υ', -- ?? UPSILON + perispomeni -- UNORTHODOX -- ?? UPSILON.with.diaeresis + perispomeni -- UNORTHODOX -- with psile ['ὔ'] = 'ὐ', ['ὖ'] = 'ὐ', -- with daseia ['ὕ'] = 'ὑ', ['Ὕ'] = 'Ὑ', ['ὗ'] = 'ὑ', ['Ὗ'] = 'Ὑ', -- ω omega (always marcon = long) ['ώ'] = 'ω', ['Ώ'] = 'Ω', ['ῴ'] = 'ῳ', -- ?? OMEGA.with.i + oxeia -- UNORTHODOX ['ῶ'] = 'ω', ['ῷ'] = 'ῳ', -- with psile ['ὤ'] = 'ὠ', ['Ὤ'] = 'Ὠ', ['ᾤ'] = 'ᾠ', ['ᾬ'] = 'ᾨ', ['ὦ'] = 'ὠ', ['Ὦ'] = 'Ὠ', ['ᾦ'] = 'ᾠ', ['ᾮ'] = 'ᾨ', -- with daseia ['ὥ'] = 'ὡ', ['Ὥ'] = 'Ὡ', ['ᾥ'] = 'ᾡ', ['ᾭ'] = 'ᾩ', ['ὧ'] = 'ὡ', ['Ὧ'] = 'Ὡ', ['ᾧ'] = 'ᾡ', ['ᾯ'] = 'ᾩ', }

-- -- place accent (okseia) on unaccented -- for unaccented-to-perispomeni circumflex (for polytonic): see oxia_to_perispomene -- ?? NEED: get more pairs & all UNORTHODOX export.unaccented_to_oxia = { -- alpha ['α'] = 'ά', ['Α'] = 'Ά', ['ᾳ'] = 'ᾴ', ['ἀ'] = 'ἄ', --	['ἁ'] = 'ἅ', -- -- epsilon ['ε'] = 'έ', ['Ε'] = 'Έ', ['ἐ'] = 'ἔ', --	['ἑ'] = 'ἕ', -- -- eta ['η'] = 'ή', ['Η'] = 'Ή', ['ῃ'] = 'ῄ', --	['ἠ'] = 'ἤ', --	['ἡ'] = 'ἥ', -- -- iota ['ι'] = 'ί', ['Ι'] = 'Ί', ['ϊ'] = 'ΐ', --	['ἰ'] = 'ἴ', --	['ἱ'] = 'ἵ', -- -- omicron ['ο'] = 'ό', ['Ο'] = 'Ό', ['ὀ'] = 'ὄ', --	['ὁ'] = 'ὅ', -- -- upsilon ['υ'] = 'ύ', ['Υ'] = 'Ὺ', ['ϋ'] = 'ΰ', --	-- with psile ['ὐ'] = 'ὔ', --	-- with daseia ['ὑ'] = 'ὕ', -- -- omega ['ω'] = 'ώ', ['Ω'] = 'Ώ', ['ῳ'] = 'ῴ', --	-- with psile ['ὠ'] = 'ὤ', --	['ᾠ'] = 'ᾤ', -- ᾤα --	-- with daseia ['ὡ'] = 'ὥ', --	--	-- }

-- -- replace perispomeni (circuflex) with okseia (acute) -- this is for polytonic export.perispomeni_to_oxia = { -- alpha ['ᾶ'] = 'ά', --  ['ᾷ'] = 'ᾴ', --  -- with psile ['ἆ'] = 'ἄ', ['Ἆ'] = 'Ἄ', ['ᾆ'] = 'ᾄ', --  -- with daseia ['ἇ'] = 'ἅ', ['Ἇ'] = 'Ἅ', ['ᾇ'] = 'ᾅ', -- -- eta ['ῆ'] = 'ή', --  ['ῇ'] = 'ῄ', --  -- with psile ['ἦ'] = 'ἤ', ['Ἦ'] = 'Ἤ', ['ᾖ'] = 'ᾔ', --  -- with daseia ['ἧ'] = 'ἥ', ['Ἧ'] = 'Ἥ', ['ᾗ'] = 'ᾕ', -- -- iota ['ῖ'] = 'ί', --  -- with psile ['ἶ'] = 'ἴ', ['Ἶ'] = 'Ἴ', -- psile perispomeni (1F3F) -- and dialytics? -- with daseia ['ἷ'] = 'ἵ', ['Ἷ'] = 'Ἵ', -- and dialytics? -- upsilon ['ῦ'] = 'ύ', --  -- and dialytics? -- with psile ['ὖ'] = 'ὔ', --  -- with daseia ['ὗ'] = 'ὕ', ['Ὗ'] = 'Ὕ', -- omega ['ῶ'] = 'ώ', --  ['ῷ'] = 'ῴ', --  -- with psile ['ὦ'] = 'ὤ', ['Ὦ'] = 'Ὤ', ['ᾦ'] = 'ᾤ', --  -- with daseia ['ὧ'] = 'ὥ', ['Ὧ'] = 'Ὥ', ['ᾧ'] = 'ᾥ', }

-- -- ?? add all missing capitals, add unorthodox? -- replace oxeia (acute) with perispomene (circuflex) export.oxia_to_perispomeni = { ['ά'] = 'ᾶ', ['ᾴ'] = 'ᾷ', ['ἄ'] = 'ἆ', ['ᾄ'] = 'ᾆ', ['ἅ'] = 'ἇ', ['ᾅ'] = 'ᾇ', ['ή'] = 'ῆ', ['ῄ'] = 'ῇ', ['ἤ'] = 'ἦ', ['ᾔ'] = 'ᾖ', ['ἥ'] = 'ἧ', ['ᾕ'] = 'ᾗ', ['ί'] = 'ῖ', ['ἴ'] = 'ἶ', ['ἵ'] = 'ἷ', ['ΐ'] = 'ῗ', ['ύ'] = 'ῦ', ['ὔ'] = 'ὖ', ['ὕ'] = 'ὗ', ['ώ'] = 'ῶ', ['ῴ'] = 'ῷ', ['ὤ'] = 'ὦ', ['ᾤ'] = 'ᾦ', ['ὥ'] = 'ὧ', ['ᾥ'] = 'ᾧ', }

-- --         c) diphthongs and digraphs (2-vowel-sequences)              -- --

-- -- these are diphthongs = 2 vowels together as one export.digraphs = { 'αι', 'ει', 'οι', 'αυ', 'ευ', 'ηυ', 'ου' } -- υι ?? is a diphthong, only in polytonic -- modern synizeses: εια, ειο, υα (γυαλί),

-- -- Move accent backwords. This is called recessive accent. --[=[ -- ?? Do i NEED? In polytonic we may have αΐ to άι OR αΐ to άϊ (with redundant, needless dialytics at second letter). BOTH exist. -- at the moment do as in monotonic ]=]-- export.digraphs_accent_back = { ['αΐ'] = 'άι', ['εΐ'] = 'έι', ['οΐ'] = 'όι', ['αΰ'] = 'άυ', ['εΰ'] = 'έυ', ['ουί'] = 'ούι' }	-- ?? oυϊ with accent only in polytonic?

-- -- Convert modern greek diphthongs (pronounced as one syllable) to two separate vowels: export.digraphs_accented_to_unaccented = { ['άι'] = 'αϊ', ['έι'] = 'εϊ', ['όι'] = 'οϊ', ['άυ'] = 'αϋ', ['έυ'] = 'εϋ', ['όυ'] = 'οϋ' }	-- ήυ ?? -- ['ύι'] = 'υϊ', not in nouns / δεν υπάρχει σε ουσιαστικά, μόνο στο επίθετο δρύινος. -- Αντίθετα, θα βάλει διαλυτικά στο βούισμα, βουΐσματα. Πολυτονικό? return export

--[=[ EXPLANATIONS Conversions of greek characters unaccented <--> accented vowels or digraphs i) for monotonic script: only one accent: oxia acute ⟨ ΄ ⟩ ii) for polytonic script: The diacritics: Accents: τόνος tonos (οξεία oxia, acute) ⟨ ´ ) is now accepted as identical to the modern accent TONOS and the latin acute accent: ⟨ ´ ⟩.		So, polytonic includes the functions of monotonic.		CAREFUL: here, ALL tonos = oxia must NEVER be a VERTICAL line		FORBID all font-families that present tonos with a little vertical line (like Verdana)	περισπωμένη perispomeni ( ῀ ) similar but not identical to the latin circumflex ( ˆ )	(The βαρεία, grave accent ( ˋ )  is used only in texts, not isolated words) Breathings πνεύματα: 	ψιλή, psile, soft breathing ( ᾿ ) 	δασεία, daseia, rough breathing  ῾ ) διαίρεσις diaeresis or διαλυτικά dialytics: splits digraph-vowels υπογρεγραμμένη subscript ιώτα iota For more, see https://en.wiktionary.org/wiki/Module:grc-utilities Prosody is used visibly only for Ancient Greek (and Hellenistic Koine)
 * μακρόν (macron) or βραχύ (breve)

Ref ]=]--
 * https://en.wiktionary.org/wiki/Module:grc-utilities/data
 * https://www.fileformat.info/info/unicode/block/greek_extended/list.htm
 * https://en.wikipedia.org/wiki/Greek_script_in_Unicode
 * https://en.wikipedia.org/wiki/Greek_alphabet#Greek_in_Unicode