Module:VL-translit

local la_utils = require("Module:la-utilities")

local export = {}

local vowel_patterns = {}

vowel_patterns["all"] = { {"ā", "a"}, {"ae", "ę"}, {"áé", "ę́"}, {"e", "ę"}, {"o", "ǫ"}, }

vowel_patterns["It-W"] = { {"ē", "ẹ"}, {"i", "ẹ"}, {"ī", "i"}, {"ō", "ọ"}, {"u", "ọ"}, {"ū", "u"}, }

vowel_patterns["Italo-Western"] = vowel_patterns["It-W"] vowel_patterns["Western"] = vowel_patterns["It-W"]

vowel_patterns["E"] = { {"ē", "ẹ"}, {"i", "ẹ"}, {"ī", "i"}, {"ō", "o"}, {"ū", "u"}, }

vowel_patterns["Eastern"] = vowel_patterns["E"] vowel_patterns["Romanian"] = vowel_patterns["E"]

vowel_patterns["S"] = { {"ē", "e"}, {"ẹ", "e"}, {"ī", "i"}, {"ō", "o"}, {"ọ", "o"}, {"ū", "u"}, }

vowel_patterns["Sardinian"] = vowel_patterns["S"]

local dictionary = { ["a"] = "a",["e"] = "e",["i"] = "i",["o"] = "o",["u"] = "u", ["ā"] = "ā",["ē"] = "ē",["ī"] = "ī",["ō"] = "ō",["ū"] = "ū", ["ae"] = "ae",["oe"] = "ē",["ai"] = "aị",["ei"] = "ėị",["au"] = "aụ",["eu"] = "ėụ", ["b"] = "b",["d"] = "d",["f"] = "f", ["c"] = "c",["g"] = "g",["v"] = "v",["x"] = "x", ["qu"] = "qŭ", ["'"] = "'" }

local vowels = { "a", "e", "i", "o", "u", "ā", "ē", "ī", "ō", "ū", "ae", "oe", "aị", "ėị", "aụ", "ėụ", "-" }

local onsets = { "b", "p", "d", "t", "g", "c", "cu", "qŭ", "f", "s", "z", "l", "m", "n", "r", "j", "v", "w", "bl", "pl", "br", "pr", "ps", "dr", "tr", "gl", "cl", "gr", "cr", "fl", "fr", "sp", "st", "sc", "scu", "sl", "sm", "sn", "su", "spr", "str", "scr", "spl", "scl" }

local codas = { "b", "p", "d", "t", "g", "c", "f", "s", "z", "l", "m", "n", "r", "j", "sp", "st", "sc",

"lp", "lt", "lc", "lb", "ld", "lg", "lf", "rp", "rt", "rc", "rb", "rd", "rg", "rf", "mp", "nt", "nc", "mb", "nd", "ng",

"lm", "rl", "rm", "rn", "ps", "ts", "cs", "x", "ls", "ns", "rs", "lcs", "ncs", "rcs", "lms", "rls", "rms", "rns" }

for i, val in ipairs(vowels) do	vowels[val] = true end

for i, val in ipairs(onsets) do	onsets[val] = true end

for i, val in ipairs(codas) do	codas[val] = true end

-- These phonetic rules apply to the whole word, not just a syllable local word_rules_start = { {"h", ""}, {"k", "c"}, --{"w", "v"}, {"y", "i"}, {"ȳ", "ī"}, {"x('?)s", "x%1"}, {"x('?)([ct])", "s%1%2"}, {"b([st])", "p%1"}, {"d([st])", "t%1"}, {"g([st])", "c%1"}, {"n([bp])", "m%1"}, --{"qu", "qv"}, --{"ngu([aeiouāēīōū])", "ngv%1"}, {"([aeiouāēīōū])('?)b([aeiouāēīōū])", "%1%2v%3"}, -- This is not a copypaste error, the pattern needs to run twice to catch the edge case of -ababa- -- (common in the VL conditional) {"([aeiouāēīōū])('?)b([aeiouāēīōū])", "%1%2v%3"}, {"um$", "u"} }

local stress_shift_rules = { ["qu"] = "'qu", ["ngu"] = "n'gu", ["gu"] = "'gu", ["v"] = "'v", ["bl"] = "'bl", ["pl"] = "'pl", ["br"] = "'br", ["pr"] = "'pr", ["dr"] = "'dr", ["tr"] = "'tr", ["gl"] = "'gl", ["cl"] = "'cl", ["gr"] = "'gr", ["cr"] = "'cr", ["fl"] = "'fl", ["fr"] = "'fr", ["ct"] = "c't", ["pt"] = "p't", ["gd"] = "g'd", ["sl"] = "s'l", ["sm"] = "s'm", ["sn"] = "s'n", ["su"] = "s'u", ["st"] = "s't", ["xt"] = "x't", ["spr"] = "s'pr", ["str"] = "s'tr", ["scr"] = "s'cr", ["spl"] = "s'pl", ["scl"] = "s'cl", ["nct"] = "nc't" }

local word_rules_end = { {"^ĭ", "j"}, -- {"g([ei])", "j%1"}, -- {"dĭ", "j"}, -- {"gĭ", "j"}, -- {"z", "j"}, {"ė", "e"}, {"ị", "i"}, {"ụ", "u"}, {"ĭ", "i"}, {"ŭ", "u"}, {"ei", "i"}, {"ii", "i"}, {"ee$", "ie"}, {"([aẹęeọǫou])(́?)e$", "%1%2i"}, }

local nasalized = {}

nasalized["It-W"] = { {"[ẹęeēi](́?)[nm]", "ẽ%1"}, {"[ī](́?)[nm]", "ĩ%1"}, {"[ū](́?)[nm]", "ũ%1"}, {"[ọǫoōu](́?)[nm]", "õ%1"}, }

nasalized["E"] = { {"[ẹęeēi](́?)[nm]", "ẽ%1"}, {"[ī](́?)[nm]", "ĩ%1"}, {"[uū](́?)[nm]", "ũ%1"}, {"[ọoō](́?)[nm]", "õ%1"}, }

nasalized["S"] = { {"[eē](́?)[nm]", "ẽ%1"}, {"[iī](́?)[nm]", "ĩ%1"}, {"[uū](́?)[nm]", "ũ%1"}, {"[oō](́?)[nm]", "õ%1"}, }

local function nasalize_vowels(word, family) word = mw.ustring.gsub(word, "[aā](́?)[nm]$", "ã%1") word = mw.ustring.gsub(word, "[aā][nm](́?)s", "ã%1s") for _, rule in ipairs(nasalized[family]) do word = mw.ustring.gsub(word, rule[1] .. "$", rule[2]) word = mw.ustring.gsub(word, rule[1] .. "(́?)s", rule[2] .. "%2s") end return word end

local function segment_word(word) local phonemes = {} while mw.ustring.len(word) > 0 do		local longestmatch = "" for letter, ipa in pairs(dictionary) do			if mw.ustring.len(letter) > mw.ustring.len(longestmatch) and mw.ustring.sub(word, 1, mw.ustring.len(letter)) == letter then longestmatch = letter end end if mw.ustring.len(longestmatch) > 0 then table.insert(phonemes, dictionary[longestmatch]) word = mw.ustring.sub(word, mw.ustring.len(longestmatch) + 1) else table.insert(phonemes, mw.ustring.sub(word, 1, 1)) word = mw.ustring.sub(word, 2) end end return phonemes end

local function get_onset(syll) local consonants = {} for i = 1, #syll do		if vowels[syll[i]] then break end if syll[i] ~= "'" then table.insert(consonants, syll[i]) end end return table.concat(consonants) end

local function get_coda(syll) local consonants = {} for i = #syll, 1, -1 do		if vowels[syll[i]] then break end table.insert(consonants, 1, syll[i]) end return table.concat(consonants) end

local function get_vowel(syll) for i = 1,#syll do		if vowels[syll[i]] then return syll[i] end end end

local function split_syllables(remainder) local syllables = {} local syll = {} while #remainder > 0 do		local phoneme = table.remove(remainder, 1) if phoneme == "'" then if #syll > 0 then table.insert(syllables, syll) end syll = {"'"} elseif vowels[phoneme] then table.insert(syll, phoneme) table.insert(syllables, syll) syll = {} else table.insert(syll, phoneme) end end -- If there are phonemes left, then the word ends in a consonant -- Add them to the last syllable for _, phoneme in ipairs(syll) do		table.insert(syllables[#syllables], phoneme) end -- Split consonant clusters between syllables for i, current in ipairs(syllables) do		if i > 1 then local previous = syllables[i-1] local onset = get_onset(current) -- Shift over consonants until the syllable onset is valid while not (onset == "" or onsets[onset]) do				table.insert(previous, table.remove(current, 1)) onset = get_onset(current) end -- If the preceding syllable still ends with a vowel, and the current one begins with s + another consonant, or with gn, then shift it over if get_coda(previous) == "" and ((current[1] == "s" and not vowels[current[2]]) or (current[1] == "g" and current[2] == "n")) then table.insert(previous, table.remove(current, 1)) end -- If there is no vowel at all in this syllable if not get_vowel(current) then for j=1,#current do					table.insert(syllables[i-1], table.remove(current, 1)) end table.remove(syllables,i) end end end for i, syll in ipairs(syllables) do		local onset = get_onset(syll) local coda = get_coda(syll) if not (onset == "" or onsets[onset]) then require("Module:debug").track("VL-noun/bad onset") error("onset error:[" .. onset .. "]") end if not (coda == "" or codas[coda]) then require("Module:debug").track("VL-noun/bad coda") error("coda error:[" .. coda .. "]") end end return syllables end

local function detect_accent(syllables) -- Manual override for i = 1, #syllables do		for j = 1, #syllables[i] do			if syllables[i][j] == "'" then table.remove(syllables[i], j)				return i			end end end if #syllables > 2 then -- Does the penultimate syllable end in a single vowel? local penult = syllables[#syllables - 1] if mw.ustring.find(penult[#penult], "^[aeiou]$") then local ult = syllables[#syllables] if ult[2] and mw.ustring.find(ult[1] .. ult[2], "[bdg][lr]") then return #syllables - 1 else return #syllables - 2 end else return #syllables - 1 end elseif #syllables == 2 then return #syllables - 1 end return #syllables end

local function place_accent(syllable) -- Special case: i before a or o	new_syllable = mw.ustring.gsub(syllable, "i([aoāō])", "i%1́") if syllable == new_syllable then new_syllable = mw.ustring.gsub(syllable, "([aeẹęioọǫuāēīōūėịụ-])", "%1́") end return new_syllable end

local function convert_word(word, vowel_pattern) -- Prothetic i before s + consonant if vowel_pattern == "It-W" then word = mw.ustring.gsub(word, "^s([ptclmn])", "is%1") end -- do starting word-based rules for _, rule in ipairs(word_rules_start) do		word = mw.ustring.gsub(word, rule[1], rule[2]) end for k, v in pairs(stress_shift_rules) do word = mw.ustring.gsub(word, k .. "'", v)	end -- Double consonant stress shifts for _, v in ipairs({"b", "c", "d", "f", "g", "l", "m", "n", "p", "r", "s", "t"}) do word = mw.ustring.gsub(word, v .. v .. "'", v .. "'" .. "v") end local phonemes = segment_word(word) local syllables = split_syllables(phonemes) local accent = detect_accent(syllables) -- Check antepenult for e, i > j (written i)	--local antepenult = syllables[--

for i, syll in ipairs(syllables) do		if syllables[i + 1] then if mw.ustring.find(syll[#syll], "^[eēiī]$") and mw.ustring.find(syllables[i + 1][1], "^[aāoōuū]$") then syll[#syll] = "ĭ" if syllables[accent] == syll then accent = accent + 1 end end end end

for i, syll in ipairs(syllables) do		syll = table.concat(syll) for i, rule in ipairs(vowel_patterns["all"]) do			syll = mw.ustring.gsub(syll, rule[1], rule[2]) end for i, rule in ipairs(vowel_patterns[vowel_pattern]) do			syll = mw.ustring.gsub(syll, rule[1], rule[2]) end --if i ~= accent then			syll = mw.ustring.gsub(syll, "ẹ", "e")			syll = mw.ustring.gsub(syll, "ọ", "o")		end syllables[i] = (i == accent and place_accent(syll) or syll) end word = table.concat(syllables) for _, rule in ipairs(word_rules_end) do		word = mw.ustring.gsub(word, rule[1], rule[2]) end word = nasalize_vowels(word, vowel_pattern) return word end

function export.convert_words(words, vowel_pattern) word_table = {} for word in mw.ustring.gmatch(words, "%S+") do		table.insert(word_table, convert_word(word, vowel_pattern)) end return "*" .. table.concat(word_table, " ") end

return export