Module:User:Sarri.greek/grk-stems

-- 2024.03.04. wikt:en:User:Sarri.greek --[=[ Module:User:Sarri.greek/grk-stems-test CREATE STEMS for any Greek polytonic pr monotonic (explanations at bottom of the /data page) All data at wikt:en:Module:User:Sarri.greek/grk-stems/data Example: from word δημοκρατία we extract the stem δημοκρατί we use wikt:en:Module:User:Sarri.greek/stems function word_minus_1end_i by writing require("Module:User:Sarri.greek/stems").word_minus_1end_i(lemma=δημοκρατία) Create variations with the following functions: To do this, we first NEED to KNOW if there are 2-vowel-sequences called diphthongs (2 vowels together) or digraphs (2 vowels not pronounced together) and if there is a synizesis for digraphs (this is not for Ancient Greek) Check
 * stem = the stem as it appears at PAGENAME or lemma= of our choice. δημοκρατί
 * stem0 = the stem with NO accents = δημοκρατι
 * stem1 = the stem0 with accent on the 1st syllable from the end (the last syllable ultima) δημοκρατί
 * stem2 = the stem0 with accent on the 2nd syllable from the end (penultima) δημοκράτι -- does not occur
 * stem3 = the stem0 with accent on the 3rd syllable from the end δημόκρατι -- does not occur
 * ox = oxia (acute) e.g. stem1ox
 * per = perispomeni (circumflex) e.g. stem1per
 * wikt:en:Module:grc-utilities & wikt:en:Module:grc-utilities/data
 * also see wikt:en:Module:typing-aids/data/grc
 * wikt:el:Module:grk-stems & wikt:el:Module:grk-stems/data
 * check Module:grc-utilities/data & tables at wikt:en:Wiktionary:About_Ancient_Greek
 * Unicode Block “Greek and Coptic” = https://www.compart.com/en/unicode/block/U+0370
 * Unicode Block “Combining Diacritical Marks” = https://www.compart.com/en/unicode/block/U+0300
 * Unicode Block “Greek Extended” (polytonic) = https://www.compart.com/en/unicode/block/U+1F00

CONTENTS a) FUNCTIONS		1. removeaccent has zero accent_0 		2. accent_1 = put accent on the ultima (last syllable)		3. accent_2 = put accent on the penultima (2nd syllable before last) without synizesis		4. accent_2syn = put accent on the penultima, asking editor for synizesis number 		5. Convert perispomene_to_oxia (circumflex to acute)		6. Convert oxia_to_perispomene (acute to circumflex)	b) export functions word0 word1 word2syn word2 word_ox Get stems with wikt:en:Module:User:Sarri.greek/stems c) export functions for stems (with minus= letters from end)	stem, stem0, stem1, stem2syn, stem2, stem_ox, stem_p ]=]--

local export = {}

local module_path = 'Module:User:Sarri.greek/grk-stems' local m_data = require(module_path .."/data") -- all character conversions & explanations

-- --                          a) FUNCTIONS                               -- --

-- ===================== remove accent = accent_0 for stem0 - see b) EXPORTs function removeaccent(word)   -- for every character    local wordproduced = ''    for codepoint in mw.ustring.gcodepoint( word ) do -- BUG It does not read capitals+diacritics like Ἶσις -- NEVERMIND

-- ADD HERE the block for digraphs with dialytics -- e.g. accented πράυνσ- ρόιδο to unaccented πραϋνσ- ροϊδο -- for every combination with accent+dialytics -- e.g DO I NEED capitals = ['άι'] = 'αϊ',    ['έι'] = 'εϊ',    ['όι'] = 'οϊ',    ['άυ'] = 'αϋ',    ['έυ'] = 'εϋ',    ['όυ'] = 'οϋ' for doubleaccented, newvalue in pairs(m_data.digraphs_accented_to_unaccented) do       beginning, ending = mw.ustring.find(word, doubleaccented) if beginning ~= nil then return mw.ustring.gsub(word, doubleaccented, newvalue, 1) --replace once --accent was found and moved according to the rules end end -- if dialytics are not found, we start from the end looking for an accent stempoint = mw.ustring.len(word)

--   accentNotFound = true --   while accentNotFound do

-- MUST WRITE mychar = something NOTTTTTTTTTTTTTTTT this one it repeats the character. --      mychar = mw.ustring.sub(word, stempoint, stempoint) -- /data page accented_to_unaccenter are e.g. ['ά'] = 'α',   ['Ά'] = 'Α',   ['ᾴ'] = 'ᾳ', --      newchar = m_data.accented_to_unaccented[mychar] --      if newchar ~= nil then --          wordproduced = newchar .. wordproduced --          accentNotFound = false --      else

--          wordproduced = mychar .. wordproduced --      end

stempoint = stempoint -1 -- stempoint -1 gives πραϋνση1 what is that 1 next to the word??? --   end

-- replace it according to instructions at /data -- MUSTT WRITE mychar= mychar = mw.ustring.char(codepoint) -- m_data.accented_to_unaccented are e.g. ['ά'] = 'α', ['Ά'] = 'Α', ['ᾶ'] = 'α', ['ᾴ'] = 'ᾳ', ['ᾷ'] = 'ᾳ', convertedchar = m_data.accented_to_unaccented[mychar] -- if an accented character is found that needs conversion if convertedchar ~= nil then -- add it           wordproduced = wordproduced .. convertedchar else -- let it be           wordproduced = wordproduced .. mychar end end return wordproduced end

-- ===================== accent_ultima, place accent OXIA on the last vowel -- for Template:stem1 -- apply on stem0 to get oxia at last vowel --[=[ -- previously at el.wiktionary by User:Flyax The function begins at the last letter of stem going backwards. When it reaches the first unaccented vowel, it adds accent The stem parameters should not have any accented vowels. ]=]-- function accent_1(word) stempoint = mw.ustring.len(word) wordproduced = "" while stempoint > 0 do      mychar = mw.ustring.sub(word, stempoint, stempoint) newchar = m_data.unaccented_to_oxia[mychar] if newchar ~= nil then if stempoint > 1 then return mw.ustring.sub(word, 1, stempoint-1 ) .. newchar .. wordproduced else return newchar .. wordproduced end else wordproduced = mychar .. wordproduced stempoint = stempoint -1 end end return wordproduced end

-- ===================== accent_2 to penulitma, no synizesis function accent_2(stem) stempoint = mw.ustring.len(stem) wordproduced = "" profound = false while stempoint > 0 do      mychar = mw.ustring.sub(stem, stempoint, stempoint) newchar = m_data.unaccented_to_oxia[mychar] if newchar ~= nil and profound then if stempoint > 1 then return mw.ustring.sub(stem, 1, stempoint-1 ) .. newchar .. wordproduced else return newchar .. wordproduced end else if newchar ~= nil then profound = true end wordproduced = mychar .. wordproduced stempoint = stempoint -1 end end return wordproduced end

-- ===================== accent_2syn to penulitma + check synizesis -- previously at el.wiktionary by Flyax, 2013 -- put accent to penultima (2nd last syllable) + check synizesis for Modern Greek function accent_2syn(word, syniz) local wordproduced = "" -- for every combination with accent+dialytics -- for doubleaccented, newvalue in pairs(m_data.digraphs2) do   -- digraphs_accented_to_unaccented  are like   ['άι'] = 'αϊ',  ['έι'] = 'εϊ',   ['όι'] = 'οϊ', ... for doubleaccented, newvalue in pairs(m_data.digraphs_accented_to_unaccented) do       beginning, ending = mw.ustring.find(word, doubleaccented) if beginning ~= nil then return mw.ustring.gsub(word, doubleaccented, newvalue, 1) --replace once --accent was found and moved end end -- if dialytics are not found, we start from the end looking for an accent stempoint = mw.ustring.len(word) accentNotFound = true while accentNotFound do      mychar = mw.ustring.sub(word, stempoint, stempoint) newchar = m_data.accented_to_unaccented[mychar] if newchar ~= nil then wordproduced = newchar .. wordproduced accentNotFound = false else wordproduced = mychar .. wordproduced end stempoint = stempoint -1 end -- check if there is synizesis: for modern VERBS, editor MUST define -- EXAMPLES: αιφνιδιάζω (syniz=nil), μεριάζω (syniz=1), αδειάζω (syniz=2) (number of vowels counting as one) if syniz ~= nil then syniz = tonumber(syniz) wordproduced = mw.ustring.sub(word, stempoint+1-syniz, stempoint) .. wordproduced stempoint = stempoint - syniz end -- check if accent was on a digraph twoletters = mw.ustring.sub(word, stempoint, stempoint) .. newchar for _,v in pairs(m_data.digraphs) do       if v == twoletters then wordproduced = mw.ustring.sub(word, stempoint, stempoint) .. wordproduced stempoint = stempoint -1 end end return accent_1(mw.ustring.sub(word, 1, stempoint)) .. wordproduced end

-- ===================== Convert perispomene (circumflex) to oxia (= acute = tonos) -- EXPORT it. -- previously at el.wiktionary by User:Flyax -- Changes the perispomeni to oxia. If it does not find a perispomeni, it does nothing. function export.perispomeni_to_oxia(stem) stempoint = mw.ustring.len(stem) wordproduced = "" while stempoint > 0 do      mychar = mw.ustring.sub(stem, stempoint, stempoint) newchar = m_data.perispomeni_to_oxia[mychar] if newchar ~= nil then if stempoint > 1 then return mw.ustring.sub(stem, 1, stempoint-1 ) .. newchar .. wordproduced else return newchar .. wordproduced end else wordproduced = mychar .. wordproduced stempoint = stempoint -1 end end return wordproduced end

-- ===================== Convert oxia (acute) to persipomeni (circumflex) -- Converts oxia to perispomeni. If oxia is not found, it does nothing. -- If i take away the word export. it does not work -- but it cannot work for (see below) function wordpi function export.oxia_to_perispomeni(stem) stempoint = mw.ustring.len(stem) wordproduced = "" while stempoint > 0 do      mychar = mw.ustring.sub(stem, stempoint, stempoint) newchar = m_data.oxia_to_perispomeni[mychar] if newchar ~= nil then if stempoint > 1 then return mw.ustring.sub(stem, 1, stempoint-1 ) .. newchar .. wordproduced else return newchar .. wordproduced end else wordproduced = mychar .. wordproduced stempoint = stempoint -1 end end return wordproduced end

function oxia_to_perispomeni_invoke(stem) stempoint = mw.ustring.len(stem) wordproduced = "" while stempoint > 0 do      mychar = mw.ustring.sub(stem, stempoint, stempoint) newchar = m_data.oxia_to_perispomeni[mychar] if newchar ~= nil then if stempoint > 1 then return mw.ustring.sub(stem, 1, stempoint-1 ) .. newchar .. wordproduced else return newchar .. wordproduced end else wordproduced = mychar .. wordproduced stempoint = stempoint -1 end end return wordproduced end

-- --                  b) EXPORT the 'word' functions                     -- -- -- AVOID, DO NOT create Templates. Invoke directly.

-- ============= word =============== -- -- For word = word as it appears at PAGENAMEs or lemma= of our choice -- get it from Module:User:Sarri.greek/stems, function: cut off any number of letters is needed

-- ============= word0 no accent for Templates OR invoke from another MODULE =============== -- -- remove accent, for Templates function export.word0(arg) if type(arg) == 'table' and type(arg.getParent) == 'function' then local frame = arg local args = frame:getParent.args term = args['lemma'] or '' else term = arg end PAGENAME = mw.title.getCurrentTitle.text lemma = PAGENAME:match( "^%s*(.-)%s*$" ) if term ~= '' then lemma = term end if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end return removeaccent(lemma)		-- or lemma = (mw.ustring.sub(lemma,1,-1)	end

-- ============= word0_i =============== -- -- remove accent, direct invoke from PAGES, not from modules function export.word0_i(frame) --	local args = frame:getParent.args	-- for Templates local args = frame.args				-- invoke -- lemma local lemma = args['lemma'] or '' PAGENAME = mw.title.getCurrentTitle.text if lemma == '' or lemma == nil then args['lemma'] = PAGENAME:match( "^%s*(.-)%s*$" ) -- trim empty spaces ( "^%s*(.-)%s*$" ) from beginning and end. Found at all Modules. else lemma = args['lemma'] end return removeaccent(mw.ustring.sub(lemma,1,-1)) end

-- ============= word1 okseia at 1st syllable from end=============== -- -- accent oxia (acute) on ultima (the last syllable, the last vowel), for Templates function export.word1(arg) if type(arg) == 'table' and type(arg.getParent) == 'function' then local frame = arg local args = frame:getParent.args term = args['lemma'] or '' else term = arg end PAGENAME = mw.title.getCurrentTitle.text lemma = PAGENAME:match( "^%s*(.-)%s*$" ) if term ~= '' then lemma = term end if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end return accent_1(removeaccent(lemma))		--or lemma = (mw.ustring.sub(lemma,1,-1) end

-- ============= word1_i =============== -- -- accent oxia (acute) on ultima (the last syllable, the last vowel), direct invoke -- the result is precisely word1ox function export.word1_i(frame) --	local args = frame:getParent.args	-- for Templates local args = frame.args				-- invoke -- lemma local lemma = args['lemma'] or '' PAGENAME = mw.title.getCurrentTitle.text if lemma == '' or lemma == nil then args['lemma'] = PAGENAME:match( "^%s*(.-)%s*$" ) -- trim empty spaces ( "^%s*(.-)%s*$" ) from beginning and end. Found at all Modules. else lemma = args['lemma'] end return accent_1(removeaccent(mw.ustring.sub(lemma,1,-1))) end

-- do i need word2 WITHOUT checking syniz??? -- ============= word2 okseia at 2nd syllable from end WITHOUT synizesis=============== -- -- accent on penultima, NO check synizesis, for Templates function export.word2(arg) if type(arg) == 'table' and type(arg.getParent) == 'function' then local frame = arg local args = frame:getParent.args term = args['lemma'] or '' else term = arg end PAGENAME = mw.title.getCurrentTitle.text lemma = PAGENAME:match( "^%s*(.-)%s*$" ) if term ~= '' then lemma = term end if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end return accent_2(removeaccent(lemma))	-- or lemma = (mw.ustring.sub(lemma,1,-1)) end

-- do i need word2 WITHOUT checking syniz??? -- ============= word2_i =============== -- -- accent on penultima, NO check synizesis, direct invoke function export.word2_i(frame) --	local args = frame:getParent.args	-- for Templates local args = frame.args				-- invoke -- lemma local lemma = args['lemma'] or '' PAGENAME = mw.title.getCurrentTitle.text if lemma == '' or lemma == nil then args['lemma'] = PAGENAME:match( "^%s*(.-)%s*$" ) -- trim empty spaces ( "^%s*(.-)%s*$" ) from beginning and end. Found at all Modules. else lemma = args['lemma'] end return accent_2(removeaccent(mw.ustring.sub(lemma,1,-1))) end

-- ============= word_p = oksia to perispomeni for template =============== -- -- for Templates function export.word_p(arg) if type(arg) == 'table' and type(arg.getParent) == 'function' then local frame = arg local args = frame:getParent.args term = args['lemma'] or '' else term = arg end PAGENAME = mw.title.getCurrentTitle.text lemma = PAGENAME:match( "^%s*(.-)%s*$" ) if term ~= '' then lemma = term end if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end return oxia_to_perispomeni_invoke(lemma)		-- or lemma = (mw.ustring.sub(lemma,1,-1)	end -- ============= word_p_i =============== -- -- direct invoke function export.word_p_i(frame) --	local args = frame:getParent.args	-- for Templates	local args = frame.args				-- invoke -- lemma	local lemma = args['lemma'] or 	PAGENAME = mw.title.getCurrentTitle.text	if lemma ==  or lemma == nil then args['lemma'] = PAGENAME:match( "^%s*(.-)%s*$" )					-- trim empty spaces ( "^%s*(.-)%s*$" ) from beginning and end. Found at all Modules.		else lemma = args['lemma']	end return oxia_to_perispomeni_invoke(mw.ustring.sub(lemma,1,-1))	end

-- --             SYNIZESIS   partially used, semiCHECKED                 -- --

-- ============= word2syn =============== -- -- accent on penultima plus synizesis, for Templates function export.synizesis(arg) if type(arg) == 'table' and type(arg.getParent) == 'function' then local frame = arg local args = frame:getParent.args term = args['syniz'] or '' else term = arg end if syniz ~= '' then syniz = term end return term end -- examples@el.wikt: Module:el-nouns-decl/functions ΔΙΑΛΥΤΙΚΑ DIALYTICS at κλίση #δύναμη (καταπράυνση) -- at κλίση #όνομα (θρόισμα, ράισμα, χάιδεμα) -- example@el.wikt: ancient greek verb δανείζω used at Module:grc-verbs-cong/1 #καταφέρω function export.word2syn(arg) if type(arg) == 'table' and type(arg.getParent) == 'function' then local frame = arg local args = frame:getParent.args term = args['lemma'] or '' else term = arg end PAGENAME = mw.title.getCurrentTitle.text lemma = PAGENAME:match( "^%s*(.-)%s*$" ) if term ~= '' then lemma = term end if lemma == '' then lemma = PAGENAME:match( "^%s*(.-)%s*$" ) end return accent_2syn(lemma, synizesis)	-- or lemma = (mw.ustring.sub(lemma,1,-1)) end

-- ============= word2syn_i =============== -- -- accent on penultima plus synizesis, direct invoke function export.word2syn_i(frame) --	local args = frame:getParent.args	-- for Templates local args = frame.args				-- invoke -- lemma local lemma = args['lemma'] or '' PAGENAME = mw.title.getCurrentTitle.text if lemma == '' or lemma == nil then args['lemma'] = PAGENAME:match( "^%s*(.-)%s*$" ) -- trim empty spaces ( "^%s*(.-)%s*$" ) from beginning and end. Found at all Modules. else lemma = args['lemma'] end

if syniz ~= '' then syniz = args['syniz'] end return accent_2syn(lemma, syniz)	-- or lemma = (mw.ustring.sub(lemma,1,-1)) end

-- I DO NOT NEED THIS -- ============= word_ox =============== -- -- make a persipomeni to oxia on any word -- the result is precisely word_ox --[=[ function export.word_ox(frame) --	local args = frame:getParent.args	-- for Templates local args = frame.args				-- invoke -- lemma local lemma = args['lemma'] or '' PAGENAME = mw.title.getCurrentTitle.text if lemma == '' or lemma == nil then args['lemma'] = PAGENAME:match( "^%s*(.-)%s*$" ) -- trim empty spaces ( "^%s*(.-)%s*$" ) from beginning and end. Found at all Modules. else lemma = args['lemma'] end return export.perispomeni_to_oxia(word) end ]=]--

-- --                  c) EXPORT the 'stem' functions                     -- -- -- like 'word' functions but + parameter minus= for editor to cut off last letters. -- to use at inflection template etc

-- ================= stem for Templates =============== -- THIS is for any language -- stem as extracted from a PAGENAME or from a whole word of our choice (lemma=) export['stem'] = function(frame)		-- for a template local args = frame:getParent.args	-- for Templates --	local args = frame.args				-- invoke -- lemma local lemma = args['lemma'] or '' PAGENAME = mw.title.getCurrentTitle.text if lemma == '' or lemma == nil then args['lemma'] = PAGENAME:match( "^%s*(.-)%s*$" ) -- trim empty spaces( "^%s*(.-)%s*$" ) from beginning and end. Found at all Modules. else lemma = args['lemma'] end

local minusletters = args['minus'] or '' -- make empty if editor writes nothing args['minus'] = args['minus'] or '' -- But give some number, or Lua error : attempt to perform arithmetic on field 'minus' (a string value) if args['minus'] ==  or args['minus'] == nil then args['minus'] = '0' or  end local result -- is this needed? result = mw.ustring.sub(args['lemma'],1,-(args['minus']+1)) -- do not write: minusletters -- exmple if editor writes |minus=2 (cut off 2 letters from end) -- 1, -3 = start at 1st letter, stop at 2nd letter from end - stempoint is 3 return result end -- close function

-- ================= stemi to invoke =============== -- stem_i would be more corret... -- at any page write: -- (for PAGNEMAE) -- (lemma= of your choice, minus= a number for letter to cut) -- at any Module write: function export.stemi(frame) --export['stem-i'] = function(frame) -- this DOES NOT WORK --	local args = frame:getParent.args	-- for Templates local args = frame.args				-- invoke -- lemma local lemma = args['lemma'] or '' PAGENAME = mw.title.getCurrentTitle.text if lemma == '' or lemma == nil then args['lemma'] = PAGENAME:match( "^%s*(.-)%s*$" ) -- trim empty spaces ( "^%s*(.-)%s*$" ) from beginning and end. Found at all Modules. else lemma = args['lemma'] end local minusletters = args['minus'] or '' args['minus'] = args['minus'] or '' if args['minus'] ==  or args['minus'] == nil then args['minus'] = '0' or  end local result -- is this needed? result = mw.ustring.sub(args['lemma'],1,-(args['minus']+1)) -- do not write: minusletters -- exmple if editor writes |minus=2 (cut off 2 letters from end) -- 1, -3 = start at 1st letter, stop at 2nd letter from end - stempoint is 3 return result end -- close function

-- --- Suppose, you have a GREEK stem. You can -

-- ================= stem0 for Templates =============== -- -- REMOVE ACCENTS from a word, stem or a PAGENAME export['stem0'] = function(frame)		-- for a template local args = frame:getParent.args	-- for Templates --	local args = frame.args				-- invoke -- lemma local lemma = args['lemma'] or '' PAGENAME = mw.title.getCurrentTitle.text if lemma == '' or lemma == nil then args['lemma'] = PAGENAME:match( "^%s*(.-)%s*$" ) -- trim empty spaces( "^%s*(.-)%s*$" ) from beginning and end. Found at all Modules. else lemma = args['lemma'] end

-- REMOVE ACCENTS with function stem0 local result1 -- call word0 from above result1= word0(args['lemma']) -- CUT OFF last letters from result1 TESTWORDS κορόιδο Άλπεις -- exmple if editor writes |minus=2 (cut off 2 letters from end) -- 1, -3 = start at 1st letter, stop at 2nd letter from end - stempoint is 3 local minusletters = args['minus'] or '' args['minus'] = args['minus'] or '' if args['minus'] ==  or args['minus'] == nil then args['minus'] = '0' or  end local result -- is this needed? result = mw.ustring.sub(result1,1,-(args['minus']+1)) return result end -- close function

return export