Module:User:Theknightwho/en-pron

local concat = table.concat local gsub = string.gsub local insert = table.insert local match = string.match local max = math.max local remove = table.remove local split = mw.text.split local sub = string.sub local umatch = mw.ustring.match

local m_data = mw.loadData("Module:User:Theknightwho/en-pron/data") local rules = m_data.rules local list = m_data.list local phonemes local Word

local Phoneme = {} Phoneme.__index = Phoneme

function Phoneme:__tostring return self.type end

function Phoneme:new(data) return setmetatable(data or {}, self) end

function Phoneme:change(phoneme) setmetatable(self, phonemes[phoneme]) end

function Phoneme:is_pause end

function Phoneme:is_pause2 end

function Phoneme:is_word_start return self == self.parent[1] end

function Phoneme:is_word_end return self == self.parent[#self.parent] end

function Phoneme:is_after_stress(i) for i = i - 1, 1, -1 do		if self.parent[i].level == 4 then return true end end end

function Phoneme:is_stressed return self.level and self.level >= 3 end

function Phoneme:is_unstressed return self.level and self.level <= 1 end

function Phoneme:is_diminished return self.level == 0 end

function Phoneme:is_max_stress return self.level == 4 end

local function is(phoneme, attr) return phoneme and phoneme[attr] end

phonemes = { ["%%"] = Phoneme:new{ -- Elided. stress = true, level = -1, },	-- Stress level 1 is used internally to mean diminished stress. ["%"] = Phoneme:new{ -- Unstressed. stress = true, level = 1, },

-- Stress level 2 is used internally to mean no explicit stress. [","] = Phoneme:new{ -- Secondary. stress = true, level = 3, ipa = "ˌ", enPR = "′", },	["'"] = Phoneme:new{ -- Primary. stress = true, level = 4, ipa = "ˈ", enPR = "′", },	["''"] = Phoneme:new{ -- Primary (with priority). stress = true, level = 5, },	["="] = Phoneme:new{ -- Stress on the previous syllable. stress = true, },	["|"] = Phoneme:new{ -- Syllable boundary. enPR = "-", ipa = ".", },	["_"] = Phoneme:new{ pause = true, },	-- Vowels. ["a"] = Phoneme:new{ vowel = true, short = true, enPR = "ă", ipa = "æ", process = function(self) if self:is_diminished then self:change("a#") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["a2"] = Phoneme:new{ vowel = true, process = function(self) self:change("a") if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["a#"] = Phoneme:new{ vowel = true, unstressed = true, enPR = "ə", ipa = "ə", process = function(self) if not self:is_unstressed then self:change("a") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["a#2"] = Phoneme:new{ vowel = true, process = function(self) if is(self.parent:next(2), "vowel") then self:change("a#") else self:change("a") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["aa"] = Phoneme:new{ vowel = true, enPR = "ä", ipa = "ɑː", process = function(self) self:change("A:") if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["aI"] = Phoneme:new{ vowel = true, enPR = "ī", ipa = "aɪ", process = function(self) if is(self.parent:next(1), "class") == "a" then self.parent:append(";") elseif is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["aI@"] = Phoneme:new{ vowel = true, enPR = "ī-ə", ipa = "aɪ.ə", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["aI3"] = Phoneme:new{ vowel = true, process = function(self) self:change("aI@") if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["aU"] = Phoneme:new{ vowel = true, enPR = "ou", ipa = "aʊ", process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["A:"] = Phoneme:new{ vowel = true, enPR = "ä", ipa = "ɑː", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["A@"] = Phoneme:new{ vowel = true, process = function(self) self:change("A:") if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["A#"] = Phoneme:new{ vowel = true, process = function(self) self:change("a") if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["A~"] = Phoneme:new{ vowel = true, enPR = "äɴ", ipa = "ɑ̃", process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["@"] = Phoneme:new{ vowel = true, unstressed = true, enPR = "ə", ipa = "ə", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["@-"] = Phoneme:new{ vowel = true, unstressed = true, nonsyllabic = true, enPR = "ə", ipa = "(ə)", process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["@2"] = Phoneme:new{ vowel = true, unstressed = true, process = function(self) if is(self.parent:next(1), "vowel") then self:change("I") self.parent:append(";") else self:change("@") end end, },	["@5"] = Phoneme:new{ vowel = true, unstressed = true, process = function(self) if (				is(self.parent:next(1), "vowel") or				is(self.parent:next(1), "pause")			) then self:change("U") else self:change("@") end end, },	["@L"] = Phoneme:new{ vowel = true, unstressed = true, enPR = "əl", ipa = "əl", process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["3"] = Phoneme:new{ vowel = true, enPR = "ə", ipa = "ə", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["3:"] = Phoneme:new{ vowel = true, enPR = "ûr", ipa = "ɜː", process = function(self) if self:is_diminished then self:change("@") end if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["e#"] = Phoneme:new{ vowel = true, process = function(self) self:change("E") if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["e@"] = Phoneme:new{ vowel = true, enPR = "âr", ipa = "ɛə", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["eI"] = Phoneme:new{ vowel = true, enPR = "ā", ipa = "eɪ", process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["E"] = Phoneme:new{ vowel = true, short = true, enPR = "ĕ", ipa = "ɛ", process = function(self) if self:is_diminished then if is(self.parent:next(1, true), "type") == "n" then self:change("@") else self:change("I") -- I2				end end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["E#"] = Phoneme:new{ vowel = true, process = function(self) if not self:is_unstressed then self:change("E") else self:change("@") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["E/"] = Phoneme:new{ vowel = true, process = function(self) local nxt = self.parent:next(1, nil, "vowel") if nxt and nxt:is_stressed and self:is_unstressed then self:change("@") else self:change("E") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["E2"] = Phoneme:new{ vowel = true, process = function(self) self:change("E") if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["i"] = Phoneme:new{ vowel = true, unstressed = true, enPR = "(ē)", ipa = "i", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append(";") elseif self.parent:nth(self, "vowel", true) ~= -1 then self:change("I") elseif self:is_stressed then self:change("i:") end end, },	["i:"] = Phoneme:new{ vowel = true, enPR = "ē", ipa = "iː", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append(";") end end, },	["i@"] = Phoneme:new{ vowel = true, enPR = "ē-ə", ipa = "i.ə", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["i@3"] = Phoneme:new{ vowel = true, enPR = "îr", ipa = "ɪə", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["I"] = Phoneme:new{ vowel = true, short = true, enPR = "ĭ", ipa = "ɪ", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append(";") end end, },	["I2"] = Phoneme:new{ vowel = true, unstressed = true, process = function(self) self:change("I") if is(self.parent:next(1), "vowel") then self.parent:append(";") end end, },	["I#"] = Phoneme:new{ vowel = true, unstressed = true, enPR = "(ĭ)", process = function(self) self:change("I") if is(self.parent:next(1), "vowel") then self.parent:append(";") end end, },	["I2#"] = Phoneme:new{ vowel = true, unstressed = true, process = function(self) self:change("I") if is(self.parent:next(1), "vowel") then self.parent:append(";") end end, },	["IR"] = Phoneme:new{ vowel = true, --		RP = phonemes["3:"].RP, enPR = "ûr", },	["o@"] = Phoneme:new{ vowel = true, enPR = "ôr", ipa = "ɔː", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["oU"] = Phoneme:new{ vowel = true, enPR = "ō", ipa = "əʊ", process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["oU#"] = Phoneme:new{ vowel = true, process = function(self) if self:is_stressed then self:change("0") elseif self:is_diminished then self:change("@") else self:change("oU") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["0"] = Phoneme:new{ vowel = true, short = true, enPR = "ŏ", ipa = "ɒ", process = function(self) if self:is_diminished then self:change("@") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["0#"] = Phoneme:new{ vowel = true, process = function(self) if not self:is_unstressed then self:change("0") else self:change("@") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["02"] = Phoneme:new{ vowel = true, process = function(self) self:change("0") if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["O"] = Phoneme:new{ vowel = true, enPR = "ô", ipa = "ɔː", process = function(self) if self:is_diminished then self:change("@") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["O:"] = Phoneme:new{ vowel = true, enPR = "ô", ipa = "ɔː", process = function(self) if self:is_diminished then self:change("@") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["O2"] = Phoneme:new{ vowel = true, process = function(self) self:change("0") if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["O@"] = Phoneme:new{ vowel = true, enPR = "ôr", ipa = "ɔː", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["OI"] = Phoneme:new{ vowel = true, enPR = "oi", ipa = "ɔɪ", process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["O~"] = Phoneme:new{ vowel = true, enPR = "ôɴ", ipa = "ɔ̃", process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["8"] = Phoneme:new{ vowel = true, short = true, enPR = "o͝o", ipa = "ʊ", process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["8@"] = Phoneme:new{ vowel = true, enPR = "o͝or", ipa = "ʊə", process = function(self) if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["u:"] = Phoneme:new{ vowel = true, enPR = "o͞o", ipa = "uː", process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["U"] = Phoneme:new{ vowel = true, short = true, enPR = "o͝o", ipa = "ʊ", process = function(self) if is(self.parent:next(-1, true), "type") == "j" then self:change("8") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["U@"] = Phoneme:new{ vowel = true, enPR = "o͝or", ipa = "ʊə", process = function(self) if self:is_unstressed then self:change("8@") end if is(self.parent:next(1), "vowel") then self.parent:append("r") end end, },	["V"] = Phoneme:new{ vowel = true, short = true, enPR = "ŭ", ipa = "ʌ", process = function(self) if self:is_diminished then self:change("@") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["V#"] = Phoneme:new{ vowel = true, process = function(self) if not self:is_unstressed then self:change("V") else self:change("@") end if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["VR"] = Phoneme:new{ vowel = true, --		RP = phonemes["3:"].RP, enPR = "ûr", },	-- Syllabic consonants. ["l-"] = Phoneme:new{ vowel = true, enPR = "əl", ipa = "əl", process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["m-"] = Phoneme:new{ vowel = true, process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["n-"] = Phoneme:new{ vowel = true, process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	["r-"] = Phoneme:new{ vowel = true, process = function(self) if is(self.parent:next(1, true), "vowel") then self.parent:append("|") end end, },	-- Consonants. ["b"] = Phoneme:new{ voiced = true, bilabial = true, plosive = true, },	["d"] = Phoneme:new{ voiced = true, alveolar = true, plosive = true, },	["d#"] = Phoneme:new{ voiced = true, alveolar = true, plosive = true, process = function(self) if (				is(self.parent:next(-1), "vowel") or				is(self.parent:next(-1), "voiced")			) then self:change("d") else self:change("t") end end, },	["d/dZ"] = Phoneme:new{ voiced = true, alveolar = true, plosive = true, process = function(self) if self.parent:next(1, true):is_stressed then self:change("d") else self:change("dZ") end end, },	["d/dZ2"] = Phoneme:new{ voiced = true, alveolar = true, plosive = true, process = function(self) if (				self.parent:next(1, true):is_stressed or				self.parent:next(2, true):is_stressed			) then self:change("d") else self:change("dZ") end end, },	["dZ"] = Phoneme:new{ voiced = true, palatoalveolar = true, sibilant = true, affricate = true, enPR = "j", ipa = "d͡ʒ", },	["D"] = Phoneme:new{ voiced = true, dental = true, fricative = true, enPR = "th", ipa = "ð", },	["f"] = Phoneme:new{ voiceless = true, labiodental = true, fricative = true, },	["g"] = Phoneme:new{ voiced = true, velar = true, plosive = true, ipa = "ɡ", },	["h"] = Phoneme:new{ voiceless = true, glottal = true, fricative = true, },	["j"] = Phoneme:new{ voiced = true, palatal = true, approximant = true, liquid = true, enPR = "y", },	[";"] = Phoneme:new{ liquid = true, palatal = true, enPR = "-", ipa = ".", },	["k"] = Phoneme:new{ voiceless = true, velar = true, plosive = true, },	["l"] = Phoneme:new{ voiced = true, alveolar = true, lateral = true, approximant = true, liquid = true, },	["l#"] = Phoneme:new{ voiceless = true, alveolar = true, lateral = true, fricative = true, enPR = "l", ipa = "ɬ", process = function(self) self:change("l") end, },	["m"] = Phoneme:new{ voiced = true, bilabial = true, nasal = true, },	["n"] = Phoneme:new{ voiced = true, alveolar = true, nasal = true, process = function(self) if is(self.parent:next(1, true), "velar") then self:change("N") end end, },	["N"] = Phoneme:new{ voiced = true, velar = true, nasal = true, enPR = "ng", ipa = "ŋ", },	["p"] = Phoneme:new{ voiceless = true, bilabial = true, plosive = true, },	["r"] = Phoneme:new{ voiced = true, alveolar = true, approximant = true, rhotic = true, ipa = "ɹ", },	["s"] = Phoneme:new{ voiceless = true, alveolar = true, sibilant = true, fricative = true, },	["s/S"] = Phoneme:new{ voiceless = true, alveolar = true, sibilant = true, fricative = true, process = function(self) if self.parent:next(1, true):is_stressed then self:change("s") else self:change("S") end end, },	["s/S2"] = Phoneme:new{ voiceless = true, alveolar = true, sibilant = true, fricative = true, process = function(self) if (				self.parent:next(1, true):is_stressed or				self.parent:next(2, true):is_stressed			) then self:change("s") else self:change("S") end end, },	["S"] = Phoneme:new{ voiceless = true, palatoalveolar = true, sibilant = true, fricative = true, enPR = "sh", ipa = "ʃ", },	["t"] = Phoneme:new{ voiceless = true, alveolar = true, plosive = true, },	["t2"] = Phoneme:new{ voiceless = true, alveolar = true, plosive = true, process = function(self) self:change("t") end, },	["t/S"] = Phoneme:new{ voiceless = true, alveolar = true, plosive = true, process = function(self) if self.parent:next(1, true):is_stressed then self:change("t") else self:change("S") end end, },	["t/tS"] = Phoneme:new{ voiceless = true, alveolar = true, plosive = true, process = function(self) if self.parent:next(1, true):is_stressed then self:change("t") else self:change("tS") end end, },	["t/tS2"] = Phoneme:new{ voiceless = true, alveolar = true, plosive = true, process = function(self) if (				self.parent:next(1, true):is_stressed or				self.parent:next(2, true):is_stressed			) then self:change("t") else self:change("tS") end end, },	["tS"] = Phoneme:new{ voiceless = true, palatoalveolar = true, sibilant = true, affricate = true, enPR = "ch", ipa = "t͡ʃ", },	["T"] = Phoneme:new{ voiceless = true, dental = true, fricative = true, enPR = "th", ipa = "θ", },	["v"] = Phoneme:new{ voiced = true, labiodental = true, fricative = true, },	["w"] = Phoneme:new{ voiced = true, velar = true, protruded = true, approximant = true, liquid = true, },	["w#"] = Phoneme:new{ voiceless = true, velar = true, protruded = true, approximant = true, enPR = "hw", ipa = "ʍ", process = function(self) self:change("w") end, },	["x"] = Phoneme:new{ voiceless = true, velar = true, fricative = true, enPR = "ᴋʜ", },	["z"] = Phoneme:new{ voiced = true, alveolar = true, sibilant = true, fricative = true, },	["z/Z"] = Phoneme:new{ voiced = true, alveolar = true, sibilant = true, fricative = true, process = function(self) if self.parent:next(1, true):is_stressed then self:change("z") else self:change("Z") end end, },	["z#"] = Phoneme:new{ voiced = true, alveolar = true, sibilant = true, fricative = true, process = function(self) if (				is(self.parent:next(-1), "vowel") or				is(self.parent:next(-1), "voiced")			) then self:change("z") else self:change("s") end end, },	["z/2"] = Phoneme:new{ voiced = true, alveolar = true, sibilant = true, fricative = true, process = function(self) if is(self.parent:next(-1), "sibilant") then self.parent:prepend("I") -- I2				self:change("z") elseif (				is(self.parent:next(-1), "vowel") or				is(self.parent:next(-1), "voiced")			) then self:change("s") else self:change("z") end end, },	["Z"] = Phoneme:new{ voiced = true, palatoalveolar = true, sibilant = true, fricative = true, enPR = "zh", ipa = "ʒ", },	["?"] = Phoneme:new{ voiceless = true, glottal = true, plosive = true, ipa = "(ʔ)", },	[":"] = Phoneme:new{ ipa = "ː", }, } for k, p in pairs(phonemes) do	p.__index = p	p.__tostring = Phoneme.__tostring p.type = k end

local Phonemizer = {} Phonemizer.__index = Phonemizer

function Phonemizer:new(data) return setmetatable(data, Phonemizer) end

function Phonemizer:check_list -- TODO end

function Phonemizer:main_rule(rule) if sub(self.str, self.head, self.head + #rule - 1) ~= rule then return false end return #rule * 21 - 20 end

function Phonemizer:get_rule_char self.rule_char = sub(self.rule, self.rule_ptr, self.rule_ptr) return self.rule_char end

function Phonemizer:advance_rule_ptr self.rule_ptr = self.rule_ptr + self.dir end

local group_rules = {}

group_rules["A"] = function(self, this) return m_data.A[this] and 20 - self.distance end

group_rules["B"] = function(self, this) return m_data.B[this] and 20 - self.distance end

group_rules["C"] = function(self, this) return m_data.C[this] and 19 - self.distance end

group_rules["D"] = function(self, this) return m_data.D[this] and (self.dir == -1 and 21 or 20) - self.distance end

group_rules["F"] = function(self, this) return m_data.F[this] and 20 - self.distance end

group_rules["K"] = function(self, this) return not (		m_data.A[this] or ( this == "" and self.dir == 1 and self.suffix_vowel )	) and 20 - self.distance end

-- TODO: self.suffix_removed group_rules["N"] = function(self) if self.dir == 1 and not self.suffix_removed then self.look_ptr = self.look_ptr - self.dir return 1 end end

group_rules["P"] = function(self) if (		self.dir == 1 and		not self.suffix_removed	) then self.rule_prefix = sub(self.rule, self.rule_ptr + 1) self.rule_ptr = #self.rule return 0 end end

-- Note: don't match if there are no previous vowels and no prefix has been removed. group_rules["S"] = function(self) if (		self.dir == 1 and		(self.vowels > 0 or self.prefix_removed) and		not self.suffix_removed	) then self.rule_suffix = sub(self.rule, self.rule_ptr + 1) -- If the suffix starts with a vowel, add the "a" modifier to it. if m_data.A[sub(self.str, self.head, self.head)] then self.rule_suffix = self.rule_suffix .. "a" end self.rule_ptr = #self.rule return 0 end end

group_rules["V"] = function(self) if self.dir == -1 then self.look_ptr = self.look_ptr - self.dir return self.pos == "verb" and 1 end end

group_rules["X"] = function(self, this) local look_ptr = self.look_ptr while this ~= "" do		if m_data.Y[this] then return false end look_ptr = look_ptr + self.dir this = sub(self.str, look_ptr, look_ptr) end return self.dir == -1 and 3 or (19 - self.distance) end

group_rules["Y"] = function(self, this) return m_data.Y[this] and 20 - self.distance end

group_rules["Z"] = function(self, this) return not umatch(this, "^%w$") and 21 - self.distance end

group_rules["!"] = function(self) if self.dir == -1 and self.first_upper then self.look_ptr = self.look_ptr - self.dir return 1 end end

group_rules["#"] = function(self) return self.dir == 1 and 0 end

group_rules["$"] = function(self, this) if self.dir == 1 then -- TODO end end

group_rules["%"] = function(self, this) local prev = self.look_ptr - self.dir return sub(self.str, prev, prev) == this and 21 - self.distance end

group_rules["&"] = function(self) if self.dir == -1 and self.stresses > 0 then self.look_ptr = self.look_ptr - self.dir return 19 end end

group_rules["+"] = function(self) self.look_ptr = self.look_ptr - self.dir return 20 end

-- TODO: self.hypen & self.hyphen_after group_rules["-"] = function(self, this) return (		this == "-" or		this == "" and ( self.dir == -1 and self.hyphen or			self.dir == 1 and self.hyphen_after )	) and 22 - self.distance end

group_rules["."] = function(self, this) return this ~= "" and 20 - self.distance end

group_rules["<"] = function(self) self.look_ptr = self.look_ptr - self.dir return -20 end

group_rules["@"] = function(self) local syllables = 1 while sub(self.rule, self.rule_ptr + self.dir, self.rule_ptr + self.dir) == "@" do		syllables = syllables + 1 self:advance_rule_ptr end local look_ptr, vowel_count, can_increment = self.look_ptr, 0, true local this = sub(self.str, look_ptr, look_ptr) while this ~= "" do		if m_data.Y[this] then vowel_count = vowel_count + (can_increment and 1 or 0) can_increment = false else can_increment = true end look_ptr = look_ptr + self.dir this = sub(self.str, look_ptr, look_ptr) end return vowel_count >= syllables and 18 + syllables - self.distance end

group_rules["_"] = function(self, this) return this == "" and (self.dir == -1 and 4 or (21 - self.distance)) end

function Phonemizer:group_rules local this = sub(self.str, self.look_ptr, self.look_ptr) if group_rules[self.rule_char] then return group_rules[self.rule_char](self, this) elseif self.rule_char == this then return 21 - self.distance else return false end end

function Phonemizer:check_rule(rule, look_ptr, dir, distance_iter, open_bracket, close_bracket) self.rule = rule self.rule_prefix = nil self.rule_suffix = nil self.look_ptr = look_ptr self.dir = dir self.rule_ptr = dir self:get_rule_char self.distance = -distance_iter local points = 0 while self.rule_char ~= "" do		self.distance = self.distance + distance_iter self.distance = self.distance > 18 and 19 or self.distance if self.rule_char == open_bracket then self:advance_rule_ptr self:get_rule_char local best_score = -1 while not (				self.rule_char == "" or				self.rule_char == close_bracket			) do				local add = self:group_rules if add and add > best_score then best_score = add end self:advance_rule_ptr self:get_rule_char end if best_score == -1 then return false end points = points + best_score else local add = self:group_rules if not add then return false end points = points + add end self:advance_rule_ptr self:get_rule_char self.look_ptr = self.look_ptr + dir end return points end

function Phonemizer:check_rules(i) -- Temporary: fail if rule has conditional modifiers. if rules[i + 4] then return false end local points = self:main_rule(rules[i + 1]) if not points then return false elseif rules[i] then local add = self:check_rule(rules[i], self.head - 1, -1, 2, "]", "[") if not add then return false end points = points + add end if rules[i + 2] then local add = self:check_rule(rules[i + 2], self.head + #rules[i + 1], 1, 6, "[", "]") if not add then return false end points = points + add end if points and points > self.best_score then self.best_score = points self.best_rule = i + 3 self.prefix = self.rule_prefix self.suffix = self.rule_suffix end end

local prefix_modifiers = {}

prefix_modifiers["t"] = function(self) self.stem.stress_override = true end

prefix_modifiers["i"] = function(self) if sub(self.prefix.str, -1) == "i" then self.prefix.str = sub(self.prefix.str, 1, -2) .. "y" end end

-- If the prefix contains no primary stress, or has a primary stress and the "t" flag, then the stem's stress is calculated without the prefix. If it has a primary stress and no "t" flag, then the stress is calculated with the prefix. function Phonemizer:handle_prefix -- Get modifiers, and replace self.prefix with data table. self.prefix_modifiers = sub(self.prefix, 2) self.prefix = { str = sub(self.str, 1, sub(self.prefix, 1, 1)), suffix_removed = true -- Stem treated as a "suffix". }	self.stem = { str = sub(self.str, #self.prefix.str + 1), prefix_removed = true, stress_override = self.max_stress < 4 }	-- Handle any modifiers. for i = 1, #self.prefix_modifiers do		prefix_modifiers[sub(self.prefix_modifiers, i, i)](self) end -- Recalculate prefix phonemes. self.phonemes = self:new(self.prefix):get_phonemes -- Calculate phonemes for the rest of the term, and add to phoneme table. for _, v in ipairs(self:new(self.stem):get_phonemes) do		v.parent = self.phonemes insert(self.phonemes, v)	end end

local suffix_modifiers = {}

suffix_modifiers["a"] = function(self) self.stem.suffix_vowel = true end

suffix_modifiers["d"] = function(self) if sub(self.stem.str, -2, -2) == sub(self.stem.str, -1, -1) then self.stem.doubled_final_letter = true end end

suffix_modifiers["e"] = function(self) local vowel = sub(self.stem.str, #self.stem.str - 1, #self.stem.str - 1) if (		m_data.Y[vowel] and		m_data.B[sub(self.stem.str, #self.stem.str, #self.stem.str)]	) then for exception, len in pairs(m_data.add_e_exceptions) do			if sub(self.stem.str, -len) == exception then return end end self.stem.str = self.stem.str .. "e" self.stem.e_added = true end for addition, len in pairs(m_data.add_e_additions) do		if sub(self.stem.str, -len) == addition then self.stem.str = self.stem.str .. "e" self.stem.e_added = true end end end

suffix_modifiers["f"] = function(self) -- TODO end

suffix_modifiers["i"] = function(self) if sub(self.stem.str, -1, -1) == "i" then self.stem.str = sub(self.stem.str, 1, #self.stem.str - 1) .. "y" end end

suffix_modifiers["m"] = function(self) self.stem.suffix_removed = nil end

suffix_modifiers["q"] = function(self) -- TODO end

suffix_modifiers["t"] = prefix_modifiers["t"]

suffix_modifiers["v"] = function(self) self.stem.pos = "verb" end

function Phonemizer:handle_suffix local suffix_len = sub(self.suffix, 1, 1) self.stem = { str = sub(self.str, 1, -suffix_len - 1), suffix_removed = true }	for i = 2, #self.suffix do		suffix_modifiers[sub(self.suffix, i, i)](self) end self.phonemes = self:new(self.stem):get_phonemes end

function Phonemizer:insert_phonemes(new) local i, unstressed = 1 while i <= #new do		local best_match for k in pairs(phonemes) do			if k == sub(new, i, i + #k - 1) and (				not best_match or				#k > #best_match			) then best_match = k			end end insert(self.phonemes, phonemes[best_match]:new) i = i + #best_match best_match = phonemes[best_match] if best_match.stress and best_match.level then self.max_stress = max(self.max_stress, best_match.level) unstressed = true elseif best_match.vowel then if not (unstressed or best_match.unstressed) then self.stresses = self.stresses + 1 end unstressed = nil self.vowels = self.vowels + 1 end end end

function Phonemizer:get_phonemes if umatch(sub(self.str, 1, 1), "^%u$") then self.first_upper = true end for k, v in pairs(m_data.replace) do		self.str = gsub(self.str, k, v)	end self.head = 1 self.phonemes = setmetatable({}, Word) self.stresses = 0 self.max_stress = 0 self.vowels = 0 for i = list.n, 1, -2 do		if list[i] == self.str then -- TODO end end while self.head <= #self.str do		self.best_rule = nil self.best_score = -1 for i = 1, rules.n, 5 do			self:check_rules(i) end if self.best_rule then if self.suffix then self:handle_suffix end if rules[self.best_rule] then self:insert_phonemes(rules[self.best_rule]) -- If the post-rule contains #, replace the next "e" with "E". if rules[self.best_rule - 1] and match(rules[self.best_rule - 1], "#") then self.str = sub(self.str, 1, self.head - 1) .. gsub(sub(self.str, self.head), "e", "E", 1) end end -- Break after a prefix, since the remainder is handled by a recursive call. if self.prefix then self:handle_prefix break end -- Advance by the length of the main match. self.head = self.head + #rules[self.best_rule - 2] else self.head = self.head + 1 end end if (		self.stress_override or		not (self.prefix_removed or self.suffix_removed)	) then self.phonemes:handle_stress end return self.phonemes end

Word = {} Word.__index = Word

function Word:new(word) local p = Phonemizer:new{str = word}:get_phonemes return p end

function Word:iter repeat self.i = self.i + 1 until (		not self[self.i] or		not self.attr or		self[self.i][self.attr]	) return self[self.i] end

function Word:iterate(attr) self.i = 0 self.attr = attr return self.iter, self end

function Word:handle_stress local max_stress, stress, phoneme = 0 for phoneme in self:iterate do -- Stress on previous vowel. if phoneme.type == "=" then prev_vowel = self:next(-1, true, "vowel") if prev_vowel and prev_vowel.level < 5 then prev_vowel.level = 4 max_stress = max(max_stress, 4) -- Reduce any preceding primary stress phonemes. for j = self.i - 1, 1, -1 do					if (						self[j] ~= prev_vowel and						self[j].level and						self[j].level == 4					) then self[j].level = 3 end end end remove(self, self.i)			self.i = self.i - 1 elseif phoneme.stress then stress = phoneme.level remove(self, self.i)			self.i = self.i - 1 if stress > max_stress then max_stress = stress elseif stress >= 4 then stress = max_stress - 1 end elseif phoneme.vowel and not phoneme.nonsyllabic then if stress then phoneme.level = stress else phoneme.level = phoneme.unstressed and 1 or 2 max_stress = max(max_stress, phoneme.level) end prev_vowel = phoneme stress = nil end end -- Remove elided syllables. for phoneme in self:iterate("vowel") do		if self[self.i].level == -1 then if self:next(1, true, "vowel").level < 3 then remove(self, self.i)				self.i = self.i - 1 else phoneme.level = 2 end end end -- Handle stressed syllables: -- If there's a primary stress with priority, reduce any other primary stresses to secondary. -- Add secondary stress to every unstressed syllable that isn't explicitly unstressed/diminished or adjacent to a stressed syllable. If primary stress has not yet been added, then the first match gets primary stress instead. for phoneme in self:iterate("vowel") do		if phoneme.level > 3 and max_stress == 5 then phoneme.level = phoneme.level - 1 elseif phoneme.level == 2 then local prev, nxt = self:next(-1, true, "vowel"), self:next(1, true, "vowel") if not (prev and prev.level > 2 or nxt and nxt.level > 2) then phoneme.level = max_stress <= 3 and 4 or 3 max_stress = 4 end end end -- Handle unstressed and diminished syllables: first and last syllables can't be diminished, nor a penultimate syllable before an unstressed final syllable. Otherwise, an unstressed syllable is diminished. -- Note: this must be done after any explicit stresses have been added, since the stress of the following syllable is relevant. for phoneme in self:iterate("vowel") do		if phoneme.level <= 2 then if self:nth(phoneme, "vowel") == 1 then phoneme.level = 1 else local pos_from_end = self:nth(phoneme, "vowel", true) if (					pos_from_end == -1 or					pos_from_end == -2 and self:next(1, true, "vowel").level <= 2				) then phoneme.level = 1 else phoneme.level = 0 end end end end end

function Word:handle_syllabification local i, vowel, phoneme, level = 0, 0 while i < #self do		i = i + 1 phoneme = self[i] level = phoneme.level repeat if not level or level < 3 then break end local stress = phonemes[level == 3 and "," or "'"]:new if self:nth(phoneme, "vowel") == 1 then insert(self, 1, stress) i = i + 1 break end local init = self[i - 1] if init.vowel or init.type == ";" then insert(self, i, stress) i = i + 1 break end for j = 3, 1, -1 do				if i > j then init = {} for k = i - j, i - 1 do						insert(init, tostring(self[k])) end if m_data.initials[concat(init)] then break end end end if not init then insert(self, i, stress) i = i + 1 break end phoneme = self[i - #init - 1] if #init > 1 and phoneme.short then remove(init, 1) end insert(self, i - #init, stress) i = i + 1 until true end end

function Word:append(phoneme) insert(self, self.i + 1, phonemes[phoneme]:new{parent = self}) end

function Word:prepend(phoneme) phoneme = phonemes[phoneme]:new{parent = self.parent} insert(self, self.i, phoneme) phoneme.parent = self self.phoneme = self[self.i]	if self.phoneme.process then self.phoneme:process end self.i = self.i + 1 end

-- Returns the next nth phoneme after the current one that has attribute `attr`. -- e.g. next(2, "vowel") returns the second vowel after the current phoneme. -- next(-1, "sibilant") returns the previous sibilant. function Word:next(n, word, attr) if word and not attr then return self[self.i + n]	elseif n == 0 then return (not attr or self.phoneme[attr]) and self.phoneme end local count = 0 local inc = n > 0 and 1 or -1 for i = self.i + inc, n > 0 and #self or 1, inc do		local phoneme = self[i] if not attr or phoneme[attr] then count = count + inc if count == n then return phoneme end end end if not word then return self:next_word(self.parent.i, n, attr, inc, count) end end

function Word:next_word(i, n, attr, inc, count) i = i + inc word = self.parent[i] if not word then return end for i = n > 0 and 1 or #word, n > 0 and #word or 1, inc do		local phoneme = word[i] if not attr or phoneme[attr] then count = count + inc if count == n then return phoneme end end end return word:next_word(i, n, attr, inc, count) end

-- Counts the phonemes from the start that have attribute `attr`, and returns the value for the current phoneme. `from_end` counts from the end, and returns a negative value. If the current phoneme does not have `attr`, returns nil. -- e.g. If the current phoneme is the second vowel, nth("vowel") will return 2. -- If the current phoneme is the final vowel, nth("vowel", true) will return -1. function Word:nth(phoneme, attr, from_end) local count, p = 0 local inc = from_end and -1 or 1 for i = from_end and #self or 1, from_end and 1 or #self, inc do		p = self[i] if p[attr] then count = count + inc if p == phoneme then return count end end end end

function Word:process -- Remove duplicate consonant phonemes. local i = 0 while i < #self do		i = i + 1 if i > 1 and not self[i].vowel and self[i].type == self[i - 1].type then remove(self, i)			i = i - 1 end self[i].parent = self end self.i = 0 while self.i < #self do		self.i = self.i + 1 self.phoneme = self[self.i]		if self.phoneme.process then self.phoneme:process end end self:handle_syllabification local output = {} for k, v in ipairs(self) do		insert(output, v.ipa or v.type) end return concat(output) end

local export = {}

function export.show(frame) if type(frame) == "table" then frame = frame.args[1] end local words = split(frame, "[^%w']") for i, word in ipairs(words) do		word = Word:new(word) word.parent = words words[i] = word end for i, word in ipairs(words) do		words.i = i		words.word = word words[i] = word:process end return concat(words, " ") end

return export