Module:grc-pronunciation/sandbox

local export = {}

local strip_accent = require('Module:grc-accent').strip_accent -- Module:grc-utilities converts sequences of diacritics to the order required by this module, -- then replaces combining macrons and breves with spacing ones. local m_utils = require("Module:grc-utilities") local rearrangeDiacritics = m_utils.pronunciationOrder local m_utils_data = require("Module:grc-utilities/data") local chars = m_utils_data.named local m_data = mw.loadData("Module:grc-pronunciation/sandbox/data") local m_IPA = require("Module:IPA") local m_a = require("Module:accent qualifier") local lang = require("Module:languages").getByCode("grc") local sc = require("Module:scripts").getByCode("polytonic")

local full_link = m_utils.link local tag_text = m_utils.tag

local periods = {'cla', 'koi1', 'koi2', 'byz1', 'byz2'} local inlinePeriods = {'cla', 'koi2', 'byz2'}

local rsplit = mw.text.split local rfind = mw.ustring.find local usub = mw.ustring.sub local rmatch = mw.ustring.match local rsubn = mw.ustring.gsub local ulen = mw.ustring.len local ulower = mw.ustring.lower local U = mw.ustring.char local function fetch(s, i)	--[==[ because we fetch a single character at a time so often out of bounds fetch gives '' ]==]	return usub(s, i, i) end

--Combining diacritics are tricky. local tie = U(0x35C)				-- tie bar local nonsyllabic = U(0x32F)		-- combining inverted breve below local high = U(0x341)				-- combining acute tone mark local low = U(0x340)				-- combining grave tone mark local rising = U(0x30C)				-- combining caron local falling = chars.Latin_circum	-- combining circumflex local midHigh = U(0x1DC4)			-- mid–high pitch local midLow = U(0x1DC6)			-- mid–low pitch local highMid = U(0x1DC7)			-- high–mid pitch local voiceless = U(0x325)			-- combining ring below local aspirated = 'ʰ' local macron = '¯' local breve = '˘'

local function is(text, X)	if not text or not X then return false end pattern = m_data.chars[X] or error('No data for "' .. X .. '".', 2) if X == "frontDiphth" or X == "Greekdiacritic" then pattern = "^" .. pattern .. "$"	else pattern = "^[" .. pattern .. "]$"	end return rfind(text, pattern) end

local env_functions = { preFront = function(term, index) local letter1, letter2 = fetch(term, index + 1), fetch(term, index + 2) return is(strip_accent(letter1), "frontVowel") or (is(strip_accent(letter1 .. letter2), "frontDiphth") and not is(letter2, "iDiaer")) end, isIDiphth = function(term, index) local letter = fetch(term, index + 1) return strip_accent(letter) == 'ι' and not m_data[letter].diaer end, isUDiphth = function(term, index) local letter = fetch(term, index + 1) return strip_accent(letter) == 'υ' and not m_data[letter].diaer end, hasMacronBreve = function(term, index) return fetch(term, index + 1) == macron or fetch(term, index + 1) == breve end, }

local function decode(condition, x, term) --[==[		"If" and "and" statements. Note that we're finding the last operator first, which means that the first will get ultimately get decided first. If + ("and") or / ("or") is found, the function is called again, until if-statements are found. In if-statements: * A number represents the character under consideration: -1 is the previous character, 0 is the current, and 1 is the next. * Equals sign (=) checks to see if the character under consideration is equal to a character. * Period (.) plus a word sends the module to the corresponding entry in the letter's data table. * Tilde (~) calls a function on the character under consideration, if the function exists. ]==]	if rfind(condition, '[+/]') then -- Find slash or plus sign preceded by something else, and followed by anything -- (including another sequence of slash or plus sign and something else). local subcondition1, sep, subcondition2 = rmatch(condition, "^([^/+]-)([/+])(.*)$") if not (subcondition1 or subcondition2) then error('Condition "' .. tostring(condition) .. '" is improperly formed') end if sep == '/' then		-- logical operator: or			return decode(subcondition1, x, term) or decode(subcondition2, x, term) elseif sep == '+' then	-- logical operator: and return decode(subcondition1, x, term) and decode(subcondition2, x, term) end elseif rfind(condition, '=') then				-- check character identity local offset, char = unpack(rsplit(condition, "=")) return char == fetch(term, x + offset) -- out of bounds fetch gives '' elseif rfind(condition, '%.') then				-- check character quality local offset, quality = unpack(rsplit(condition, "%.")) local character = fetch(term, x + offset) return m_data[character] and m_data[character][quality] or false elseif rfind(condition, '~') then				-- check character(s) using function local offset, func = unpack(rsplit(condition, "~")) return env_functions[func] and env_functions[func](term, x + offset) or false end end

local function check(p, x, term) if type(p) == 'string' or type(p) == 'number' then return p	elseif type(p) == 'table' then  --This table is sequential, with a variable number of entries. for _, possP in ipairs(p) do			if type(possP) == 'string' or type(possP) == 'number' then return possP elseif type(possP) == 'table' then   --This table is paired, with two values: a condition and a result. rawCondition, rawResult = possP[1], possP[2] if decode(rawCondition, x, term) then return (type(rawResult) == 'string') and rawResult or check(rawResult, x, term) end end end else error('"p" is of unrecongized type ' .. type(p)) end end

local function convert_term(term, periodstart) if not term then error('The variable "term" in the function "convert_term" is nil.') end local IPAs = {} local start local outPeriods = {} if periodstart and periodstart ~= "" then start = false else start = true end for _, period in ipairs(periods) do 		if period == periodstart then start = true end if start then IPAs[period] = {} table.insert(outPeriods, period) end end local length, x, advance, letter, p = ulen(term), 1, 0, '', nil while x <= length do		letter = fetch(term, x)		local data = m_data[letter] if not data then		-- no data found -- explicit pass else -- check to see if a multicharacter search is warranted advance = data.pre and check(data.pre, x, term) or 0 p = (advance ~= 0) and m_data[usub(term, x, x + advance)].p or data.p			for _, period in ipairs(outPeriods) do				table.insert(IPAs[period], check(p[period], x, term)) end x = x + advance end x = x + 1 end --Concatenate the IPAs for _, period in ipairs(outPeriods) do		IPAs[period] = { IPA = table.concat(IPAs[period], '')} end return IPAs, outPeriods end

local function find_syllable_break(word, nVowel, wordEnd) if not word then error('The variable "word" in the function "find_syllable_break" is nil.') end if wordEnd then return ulen(word) elseif is(fetch(word, nVowel - 1), "liquid") then if is(fetch(word, nVowel - 2), "obst") then return nVowel - 3 elseif fetch(word, nVowel - 2) == aspirated and is(fetch(word, nVowel - 3), "obst") then return nVowel - 4 else return nVowel - 2 end elseif is(fetch(word, nVowel - 1), "cons") then return nVowel - 2 elseif fetch(word, nVowel - 1) == aspirated and is(fetch(word, nVowel - 2), "obst") then return nVowel - 3 elseif fetch(word, nVowel - 1) == voiceless and fetch(word, nVowel - 2) == 'r' then return nVowel - 3 else return nVowel - 1 end end

local function syllabify_word(word) local syllables = {} --	cVowel means "current vowel", nVowel "next vowel",			sBreak "syllable break".							-- local cVowel, nVowel, sBreak, stress, wordEnd, searching while word ~= '' do		cVowel, nVowel, sBreak, stress = false, false, false, false --First thing is to find the first vowel. searching = 1 cVowelFound = false while not cVowel do			letter = fetch(word, searching) local nextLetter = fetch(word, searching + 1) if cVowelFound then if (is(letter, "vowel") and nextLetter ~= nonsyllabic) or is(letter, "cons") or letter == '' or letter == 'ˈ' then cVowel = searching - 1 elseif is(letter, "diacritic") then searching = searching + 1 elseif letter == tie then cVowelFound = false searching = searching + 1 else searching = searching + 1 end else if is(letter, "vowel") then cVowelFound = true elseif letter == 'ˈ' then stress = true end searching = searching + 1 end end --Next we try and find the next vowel or the end. searching = cVowel + 1 while (not nVowel) and (not wordEnd) do			letter = fetch(word, searching) if is(letter, "vowel") or letter == 'ˈ' then nVowel = searching elseif letter == '' then wordEnd = true else searching = searching + 1 end end --Finally we find the syllable break point. sBreak = find_syllable_break(word, nVowel, wordEnd) --Pull everything up to and including the syllable Break. local syllable = usub(word, 1, sBreak) --If there is a stress accent, then we need to move it to the --beginning of the syllable, unless it is a monosyllabic word, --in which case we remove it altogether. if stress then if next(syllables) or syllable ~= word then syllable = 'ˈ' .. rsubn(syllable, 'ˈ', '') else syllable = rsubn(syllable, 'ˈ', '') end stress = false end table.insert(syllables, syllable) word = usub(word, sBreak + 1) end local out = nil if #syllables > 0 then out = table.concat(syllables, '.') out = rsubn(out, '%.ˈ', 'ˈ') end return out end

local function syllabify(IPAs, periods) --Syllabify local word_ipa = '' local ipa = {} for _, period in ipairs(periods) do		ipa = {} for _, word in ipairs(rsplit(IPAs[period].IPA, ' ')) do			word_ipa = syllabify_word(word) if word_ipa then table.insert(ipa, word_ipa) end end IPAs[period].IPA = table.concat(ipa, ' ') end return IPAs end

local function make_ambig_note(ambig) -- The table ambig is filled with all the ambiguous vowels that have been found in the term. local ambig_note = '' if #ambig > 0 then local agr = (#ambig > 1) and { 's ', 'each one' } or { ' ', 'it' } ambig_note = '\n Mark the vowel length of the ambiguous vowel' .. agr[1] .. mw.text.listToText(ambig) .. ' by adding a macron after ' .. agr[2] .. ' if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked.' .. ' [This message shows only in preview mode.] \n' end return ambig_note end

local function make_table(IPAs, ambig, periods) --Final format local inlineProns = {} local listOfProns = {} local fullProns = {} local periods2 = {} for _, period in ipairs(periods) do table.insert(fullProns, '* ' .. m_a.format_qualifiers(lang, {"grc-" .. period}) .. ' ' ..			m_IPA.format_IPA_full { lang = lang, items = }) periods2[period] = true end for _, period in ipairs(inlinePeriods) do		if periods2[period] then local pron = '/' .. IPAs[period].IPA .. '/'			table.insert(inlineProns, {pron = pron}) table.insert(listOfProns, pron) end end local inlineIPAlength = ulen("IPA(key): " .. table.concat(listOfProns, ' → ') or "") local inline = ' \n* ' .. m_IPA.format_IPA_full { lang = lang, items = inlineProns, separator = " → ", } .. " "	local full = ' \n' .. table.concat(fullProns, '\n') .. make_ambig_note(ambig) .. ' '	return ' ' .. inline .. full .. ' ' end

function export.create(frame) local params = { [1] = {alias_of = 'w'}, ["w"] = {default = mw.title.getCurrentTitle.text}, ["period"] = {default = "cla"}, }	local args = require("Module:parameters").process(frame.getParent and frame:getParent.args or frame, params) local term = ulower(args.w)	local ambig = {} if args.period == "cla" then ambig = m_utils.findAmbig(term) end term = rsubn(term, 'ς', 'σ') term = rsubn(term, 'ῤ', 'ρ') term = rearrangeDiacritics(term) local IPAs, periods = convert_term(term, args.period) IPAs = syllabify(IPAs, periods) return make_table(IPAs, ambig, periods) end

function export.example(frame) local output = { '{| class="wikitable"' } local params = { [1] = {}	}	local args = require("Module:parameters").process(frame:getParent.args, params) local terms = mw.text.split(args[1], ",%s+") for _, term in pairs(terms) do		local period = rmatch(term, "%(period ?= ?([^%)]+)%)") or "cla"		local entry = rmatch(term, "([^%(]+) %(") or term or error('No term found in "' .. term .. '".') 		local link = full_link(entry)		local IPA = export.create{ entry, ["period"] = period }		table.insert(output, "\n|-\n| " .. link .. " || " .. IPA)	end	table.insert(output, "\n|}")	return table.concat(output) end

return export --Things we still need: --Voicing of sigma around (after?) voiced stops. --Proper alerts for editors, especially on ambiguous vowels.