Module:User:Sarri.greek/grk-pronunciation

-- 2024.04.24. wikt:en:User:Sarri.greek my notes with !!, todo with ?? -- this is Module:grc-pronunciation -- Now, default is med1+med2. Also view period=cla (all periods) -- TESTS raw at Template:User:Sarri.greek/grk-IPA-mod -- TEST for med at Template:User:Sarri.greek/gkm-IPA --[=[!!?? STRUCTURE: This module produces at the moment consecutive lines for different language codes with pronunciations. Each line has 3 things / 'elements': 1) the period-label (linked) e.g. period-label-cla, period-lebal-koi1... (example: 5th c. BCE Attic) 2) the IPA-label+key (linked IPAkey): e.g.  IPA-label-cla, IPA-label-koi1... 3) the actual IPA e.g. IPA-cla, IPA-koi1.... At the moment these three are unbreakable. There are no args for the editor. One cannot call only one element. ONLY 3) or ONLY 1). One cannot omit an element e.g. with?? Module:IPA has something like split_output "raw" One cannot override IPA. One cannot add a second IPA. One cannot add inline notes before or after.

NEED for structure Need args for editor. tested: cla-only, el-only etc.	el: if override cannot be done, the we CANNOT have modern Greek IPA because it is impossible to predict some pronunciations The same goes for some med2 (with synizesis) it may or it may not have synizesis depending on register. -- now done with tables at templates Then, editor can make a Template with any combination of the above Make a line for ONE period: inline (no break lines) period-label-cla .. IPA-label-cla .. IPA-cla in one line (no break lines) add parameters for all elements Combinations: functions for one period only Period-titles as cla-only, el-only, from Module:User:Sarri.greek/accent qualifier/data Combinations of many lines: if a next line follows, but But now, we need the same width column for Period-titles Better do it at a template Function: IPA only (for inflectional tables. It could be placed under the forms or under transliterations)
 * 1 Detach elements. Especially, detach the IPAs as named parameters
 * 2 OVERRIDE must be possible by editor, especially for med2, el loanwards or minor corrections
 * 3 second ipa (for optional second pronunciation)
 * -- NOTES for every period line

PROBLEMS at IPA = > > > > specific letters or clusters problems at DATA page. see /data page for /ks ps zm/ and others

(not Polyt? But it works equally well for monotonic ell too) or a message: Do you wish only el... ]=]--
 * ?? if imput initial ΆάΈέΉήΊίΌόΎύΏώ  / ΑαΕεΗηΙιΟοΥυΩω / Αί αί Εί εί Οί οί Ού ού  / Αύ αύ Εύ εύ Ηύ ηύ  then, show warning for period=el

local export = {} local m_data = mw.loadData("Module:User:Sarri.greek/grk-pronunciation/data") --!! mw.loadData("Module:grc-pronunciation/data")

--required local mark_implied_length = require('Module:grc-accent').mark_implied_length local strip_accent = require('Module:grc-accent').strip_accent

local m_general_utils = require("Module:utilities")

-- Module:grc-utilities converts sequences of diacritics to the order required by this module, -- then replaces combining macrons and breves with spacing ones. local m_utils = require("Module:grc-utilities") local m_utils_data = require("Module:grc-utilities/data") local full_link = m_utils.link local tag_text = m_utils.tag local diacritics = m_utils_data.diacritics local rearrangeDiacritics = m_utils.pronunciationOrder

local m_IPA = require("Module:IPA") local m_a = require("Module:User:Sarri.greek/accent qualifier") --!! require("Module:accent qualifier") --!! put these under periods, according to each language local lang = require("Module:languages").getByCode("grc") --!!this works for all but not Links to Wikipedias local sc = require("Module:scripts").getByCode("Polyt") --!!this works fine for polytonic and monotonic

local periods = {'cla', 'koi1', 'koi2', 'med1', 'med2', 'el'} -- was {'cla', 'koi1', 'koi2', 'byz1', 'byz2'} local inlinePeriods = {'med1', 'med2'} --!! was = {'cla', 'koi2', 'byz2'}

--!! add params for inline notes?

--!! there are commands, not some made up names local title = mw.title.getCurrentTitle local pagename = title.text local namespace = title.nsText

local rsplit = mw.text.split local rfind = mw.ustring.find local usub = mw.ustring.sub local rmatch = mw.ustring.match local rsubn = mw.ustring.gsub local ulen = mw.ustring.len -- local ulower = mw.ustring.lower local U = mw.ustring.char

local function fetch(s, i)	--[==[ because we fetch a single character at a time so often out of bounds fetch gives '' ]==]	i = tonumber(i) if type(i) ~= "number" then error("fetch requires a number or a string equivalent to a number as its second argument.") end if i == 0 then return "" end local n = 0 for character in string.gmatch(s, "[\1-\127\194-\244][\128-\191]*") do		n = n + 1 if n == i then return character end end return "" end

--!! these are IPA things. --Combining diacritics are tricky. local tie = U(0x35C)				-- tie bar local nonsyllabic = U(0x32F)		-- combining inverted breve below local high = U(0x341)				-- combining acute tone mark local low = U(0x340)				-- combining grave tone mark local rising = U(0x30C)				-- combining caron local falling = diacritics.Latin_circum	-- combining circumflex local midHigh = U(0x1DC4)			-- mid–high pitch local midLow = U(0x1DC6)			-- mid–low pitch local highMid = U(0x1DC7)			-- high–mid pitch local voiceless = U(0x325)			-- combining ring below local aspirated = 'ʰ' local macron = '¯' local breve = '˘'

--?? what does this do? --!! ['frontDiphth'] = "[αο]ι",		['Greekdiacritic'] = m_utils_data.all, local function is(text, X)	if not text or not X then return false end pattern = m_data.chars[X] or error('No data for "' .. X .. '".', 2) if X == "frontDiphth" or X == "Greekdiacritic" then pattern = "^" .. pattern .. "$"	else pattern = "^[" .. pattern .. "]$"	end return mw.ustring.find(text, pattern) end

--!! env = environment MAKE diphthongs αι ει?? οι, αυ ευ ηυ but not when iota has dialytics (diaeresis) --!! ['frontVowel'] = "ιηευ",	['frontDiphth'] = "[αο]ι",	['iDiaer'] = "ϊΐῒῗ", local env_functions = { preFront = function(term, index) local letter1, letter2 = fetch(term, index + 1), fetch(term, index + 2) return is(strip_accent(letter1), "frontVowel") or (is(strip_accent(letter1 .. letter2), "frontDiphth") and not is(letter2, "iDiaer")) end, isIDiphth = function(term, index) local letter = fetch(term, index + 1) return strip_accent(letter) == 'ι' and not m_data[letter].diaer end, isUDiphth = function(term, index) local letter = fetch(term, index + 1) return strip_accent(letter) == 'υ' and not m_data[letter].diaer end, hasMacronBreve = function(term, index) return fetch(term, index + 1) == macron or fetch(term, index + 1) == breve end, }

local function decode(condition, x, term) --[==[		"If" and "and" statements. Note that we're finding the last operator first, which means that the first will get ultimately get decided first. If + ("and") or / ("or") is found, the function is called again, until if-statements are found. In if-statements: * A number represents the character under consideration: -1 is the previous character, 0 is the current, and 1 is the next. * Equals sign (=) checks to see if the character under consideration is equal to a character. * Period (.) plus a word sends the module to the corresponding entry in the letter's data table. * Tilde (~) calls a function on the character under consideration, if the function exists. ]==]	if mw.ustring.find(condition, '[+/]') then -- Find slash or plus sign preceded by something else, and followed by anything -- (including another sequence of slash or plus sign and something else). local subcondition1, sep, subcondition2 = mw.ustring.match(condition, "^([^/+]-)([/+])(.*)$") if not (subcondition1 or subcondition2) then error('Condition "' .. tostring(condition) .. '" is improperly formed') end if sep == '/' then		-- logical operator: or			return decode(subcondition1, x, term) or decode(subcondition2, x, term) elseif sep == '+' then	-- logical operator: and return decode(subcondition1, x, term) and decode(subcondition2, x, term) end elseif mw.ustring.find(condition, '=') then				-- check character identity local offset, char = unpack(mw.text.split(condition, "=")) if namespace == "Module" or namespace == "Template" then mw.log(term, offset, char, x + offset, fetch(term, x + offset), char == fetch(term, x + offset) ) end return char == fetch(term, x + offset) -- out of bounds fetch gives '' elseif mw.ustring.find(condition, '%.') then				-- check character quality local offset, quality = unpack(mw.text.split(condition, "%.")) local character = fetch(term, x + offset) return m_data[character] and m_data[character][quality] or false elseif mw.ustring.find(condition, '~') then				-- check character(s) using function local offset, func = unpack(mw.text.split(condition, "~")) return env_functions[func] and env_functions[func](term, x + offset) or false end end

local function check(p, x, term) if type(p) == 'string' or type(p) == 'number' then return p	elseif type(p) == 'table' then  --This table is sequential, with a variable number of entries. for _, possP in ipairs(p) do			if type(possP) == 'string' or type(possP) == 'number' then return possP elseif type(possP) == 'table' then   --This table is paired, with two values: a condition and a result. rawCondition, rawResult = possP[1], possP[2] if decode(rawCondition, x, term) then return (type(rawResult) == 'string') and rawResult or check(rawResult, x, term) end end end else error('"p" is of unrecongized type ' .. type(p)) end end

--?? handle lines/periods separately too? --!! add notes NEED somewhere here local function convert_term(term, periodstart) if not term then error('The variable "term" in the function "convert_term" is nil.') end local IPAs = {} local start local outPeriods = {} if periodstart and periodstart ~= "" then start = false else start = true end for _, period in ipairs(periods) do 		if period == periodstart then start = true end if start then IPAs[period] = {} table.insert(outPeriods, period) end end local length, x, advance, letter, p = mw.ustring.len(term), 1, 0, '', nil while x <= length do		letter = fetch(term, x)		local data = m_data[letter] if not data then		-- no data found -- explicit pass else -- check to see if a multicharacter search is warranted advance = data.pre and check(data.pre, x, term) or 0 p = (advance ~= 0) and m_data[mw.ustring.sub(term, x, x + advance)].p or data.p			for _, period in ipairs(outPeriods) do				table.insert(IPAs[period], check(p[period], x, term)) end x = x + advance end x = x + 1 end

--Concatenate the IPAs for _, period in ipairs(outPeriods) do		IPAs[period] = { IPA = table.concat(IPAs[period], '')} end return IPAs, outPeriods end

local function find_syllable_break(word, nVowel, wordEnd) if not word then error('The variable "word" in the function "find_syllable_break" is nil.') end if wordEnd then return mw.ustring.len(word) --!! unbreakable or special consonants for el, ... --!! ks ps & zm at el, med2, (med1?), I don't know about koi and grc --!! check example λοκσι λοξι elseif period == 'el' or period == 'med2' or period == 'med1' then if mw.ustring.match(word, nVowel - 1, "z") then if mw.ustring.match(word, nVowel - 2, "m") then return nVowel - 4 end end --!! consDoule (ks ps if they represent ξ ψ but not κσ) --!! if el τσ τζ = with ties t͡s d͡z --!! if med, el αυφ αυβ do not repeat aff avv Same for ευ ηυ

elseif is(fetch(word, nVowel - 1), "liquid") then if is(fetch(word, nVowel - 2), "obst") then return nVowel - 3 elseif fetch(word, nVowel - 2) == aspirated and is(fetch(word, nVowel - 3), "obst") then return nVowel - 4 else return nVowel - 2 end elseif is(fetch(word, nVowel - 1), "cons") then return nVowel - 2 elseif fetch(word, nVowel - 1) == aspirated and is(fetch(word, nVowel - 2), "obst") then return nVowel - 3 elseif fetch(word, nVowel - 1) == voiceless and fetch(word, nVowel - 2) == 'r' then return nVowel - 3 else return nVowel - 1 end end

local function syllabify_word(word) local syllables = {} --	cVowel means "current vowel", nVowel "next vowel",			sBreak "syllable break".							-- local cVowel, nVowel, sBreak, stress, wordEnd, searching while word ~= '' do		cVowel, nVowel, sBreak, stress = false, false, false, false --First thing is to find the first vowel. searching = 1 cVowelFound = false while not cVowel do			letter = fetch(word, searching) local nextLetter = fetch(word, searching + 1) if cVowelFound then if (is(letter, "vowel") and nextLetter ~= nonsyllabic) or is(letter, "cons") or letter == '' or letter == 'ˈ' then cVowel = searching - 1 elseif is(letter, "diacritic") then searching = searching + 1 elseif letter == tie then cVowelFound = false searching = searching + 1 else searching = searching + 1 end else if is(letter, "vowel") then cVowelFound = true elseif letter == 'ˈ' then stress = true end searching = searching + 1 end end --Next we try and find the next vowel or the end. searching = cVowel + 1 while (not nVowel) and (not wordEnd) do			letter = fetch(word, searching) if is(letter, "vowel") or letter == 'ˈ' then nVowel = searching elseif letter == '' then wordEnd = true else searching = searching + 1 end end --?? keep ψ = ps and ξ = ks toghther .ps. .ks. at med1 med2 el? --Finally we find the syllable break point. sBreak = find_syllable_break(word, nVowel, wordEnd) --Pull everything up to and including the syllable Break. local syllable = usub(word, 1, sBreak) --If there is a stress accent, then we need to move it to the --beginning of the syllable, unless it is a monosyllabic word, --in which case we remove it altogether. if stress then if next(syllables) or syllable ~= word then syllable = 'ˈ' .. rsubn(syllable, 'ˈ', '') else syllable = rsubn(syllable, 'ˈ', '') end stress = false end table.insert(syllables, syllable) word = usub(word, sBreak + 1) end local out = nil if #syllables > 0 then out = table.concat(syllables, '.') out = rsubn(out, '%.ˈ', 'ˈ') end return out end

local function syllabify(IPAs, periods) --Syllabify local word_ipa = '' local ipa = {} for _, period in ipairs(periods) do		ipa = {} for _, word in ipairs(rsplit(IPAs[period].IPA, ' ')) do			word_ipa = syllabify_word(word) if word_ipa then table.insert(ipa, word_ipa) end end IPAs[period].IPA = table.concat(ipa, ' ') end return IPAs end

--??TODO is everything automatically brachy for koi1 koi2 med1 med2? local function make_ambig_note(ambig, ambig_letter_list) -- The table ambig is filled with all the ambiguous vowels that have been found in the term. local ambig_note = '' if ambig and #ambig > 0 then local agr = (#ambig > 1) and { 's ', 'each one' } or { ' ', 'it' } ambig_note = '\n Mark the vowel length for 5th century Attic of the ambiguous vowel' .. agr[1] .. mw.text.listToText(ambig) .. ' by adding a macron after ' .. agr[2] .. ' if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked.' .. ' [This message shows only in preview mode.] ' --??TODO Take this Category off? no, but it should only apply to cla. .. m_general_utils.format_categories(				{ 'Ancient Greek terms with incomplete pronunciation' }, lang) ..' \n' end return ambig_note end

local function make_table(IPAs, ambig, periods, ambig_letter_list) --Final format local inlineProns = {} local listOfProns = {} local fullProns = {} local periods2 = {} --[=[

--!! add notes --?? need periodnotes td next to eadh periodline	 local inline_period_notes = {} local listOfNotes = {} for _, period in ipairs(periods) do --!! I change grc to grk at -- m_a.show({'grc-' .. period}) table.insert(fullProns, '* ' .. m_a.show({'grk-' .. period}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items =, } .. notes_full) periods2[period] = true end for _, period in ipairs(inlinePeriods) do		if periods2[period] then local pron = '/' .. IPAs[period].IPA .. '/'			table.insert(inlineProns, {pron = pron}) table.insert(listOfProns, pron) end end for _, period in ipairs(periods) do		if periods2[period] then local inline_notes = ' || ' .. {period .. '-note'} table.insert(inlineNotes, {inline_notes = inline_notes}) table.insert(listOfNotes, inline_notes) end end ]=]--

for _, period in ipairs(periods) do --!! I change grc to grk at -- m_a.show({'grc-' .. period}) table.insert(fullProns, '* ' .. m_a.show({'grk-' .. period}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items =  }) periods2[period] = true end for _, period in ipairs(inlinePeriods) do		if periods2[period] then local pron = '/' .. IPAs[period].IPA .. '/'			table.insert(inlineProns, {pron = pron}) table.insert(listOfProns, pron) end end --?? is this the length of IPA or the titles or both? I fixed the titles, small and balanced. --?? THE IPA length it is too big. Need autofit? and after it a note for eachline? --?? and manual like med1=xyz local inlineIPAlength = math.floor( math.max( mw.ustring.len("IPA(key): " .. table.concat(listOfProns, ' → ') or "") * 0.68, mw.ustring.len("(15th c. Medieval of Constantinople) IPA(key): /" .. IPAs.med2.IPA .. "/") * 0.68 ) ) local inline = '\n \n* ' .. m_IPA.format_IPA_full { lang = lang, items = inlineProns, separator = ' → ' } .. ' ' --!! ambiguous for cal local full = '\n \n' .. table.concat(fullProns, '\n') .. make_ambig_note(ambig, ambig_letter_list) .. ' ' --!! I do not want More/Less hide/show --!! take off switcher -- <div class="vsSwitcher" --?? is the IPA length here? It is too big. And I need a note/per line. And a manual med2=xyz What is toggle... float right?? return ' ' .. inline .. full .. ' ' end

--!! make period default = med1 at Tempalte:gkm-IPA, not here function export.create(frame) --?? if ["period"] == 'med1' then default = "med1" else {default = "med1"} end}, local params = { [1] = {default = pagename}, ["period"] = {default = "cla"}, } --!! change "grc-pronunciation", "create")	local args = require("Module:parameters").process(frame.getParent and frame:getParent.args or frame, params, nil, "User:Sarri.greek/gkm-pronunciation", "create")	local term = ulower(args[1])	local old = term	term = m_utils.standardDiacritics(term)	term = mark_implied_length(term)	--	if mw.ustring.toNFD(old) ~= term then		mw.log(old .. " > " .. term)	end		local decomposed = mw.ustring.toNFD(term)	if rfind(decomposed, "[εοηω]" .. m_utils_data.diacritic .. "*[" .. diacritics.spacing_macron .. diacritics.spacing_breve .. diacritics.breve .. diacritics.macron .. "]") then		error("Macrons and breves cannot be placed after the letters ε, ο, η, or ω.")	end	local ambig, ambig_letter_list	if args.period == "cla" then		ambig, ambig_letter_list = m_utils.findAmbig(term)	end	term = rsubn(term, 'ς', 'σ')	term = rsubn(term, 'ῤ', 'ρ')	term = rearrangeDiacritics(term)	local IPAs, periods = convert_term(term, args.period)	IPAs = syllabify(IPAs, periods)	return make_table(IPAs, ambig, periods, ambig_letter_list) end

function export.example(frame) --!! adding little title without bullet local little_title = 'An approximation of non-dialectal pronunciation. (? = uncertain or debated) \n'

local output = { '{| class="wikitable"' } --?? why cannot i put here '|' .. little_title? local params = { [1] = {}	} --!! changed "grc-pronunciation", "example")	--!! it works WHATEVER i write	local args = require("Module:parameters").process(frame:getParent.args, params, nil, "User:Sarri.greek/grk-pronunciation", "example")	local terms = mw.text.split(args[1], ",%s+")	for _, term in pairs(terms) do --?? cla? I am not changing this, as everything works fine with it.				local period = rmatch(term, "%(period ?= ?([^%)]+)%)") or "cla" local entry = rmatch(term, "([^%(]+) %(") or term or error('No term found in "' .. term .. '".') local link = full_link(entry) local IPA = export.create{ entry, ["period"] = period } --??	local periodnotes = '' table.insert(output, "\n|-\n| " .. link .. " || " .. IPA) --?? " || " .. periodnotes end table.insert(output, "\n|}") return table.concat(output) end

return export --Things we still need: --Voicing of sigma around (after?) voiced stops. --Proper alerts for editors, especially on ambiguous vowels.

--?? that IPA is too long. Put notes at end like qual --?? present lines/periods independently too --?? el (perhaps med1 too) must have manual option for override (for loadwords) --?? allow manuals e.g. med2=xxxx med2-note=as in Cretan dialect