Module:mh-pronunc/sandbox

-- This module is primarily maintained at: -- https://en.wiktionary.org/wiki/Module:mh-pronunc -- Please direct all technical queries and contributions there. -- The version of this script on Wikipedia is only a mirror.

local export = {}

local ASYLLABIC = "̯" local BREVE = "̆" local BREVE2 = "͝" local CEDILLA = "̧" local CENTRAL = "̈" local DEVOICE = "̥" local DEVOICE2 = "̊" local LESSROUND = "̜" local LESSROUND2 = "͑" local MACRON = "̄" local MOREROUND = "̹" local MOREROUND2 = "͗" local SYLLABIC = "̩" local TIE = "͡" local TIE2 = "͜"

local EPENTH_CLUSTER = 0 local ASSIM_CLUSTER = 1 local STABLE_CLUSTER = 2

-- Forward-declare functions. local addUnique local assign local fastTrim local lerpF2 local mergedMidVowelsMap local needClusterTypes local needPhoneticMap local needVowelCharts local parse local parseBoolean local reverseString local splitTokens local string_gsub2 local string_gsubx local toBender local toMOD local toPhonemic local toPhonetic local toPhoneticDialect local toPhoneticRemainder

-- Forward-declare lookup tables. local benderMaps local clusterTypes local fromF1 local fromF2 local fromF2Conson local parseC_CH_CWmap local parsePseudoConsonMap local parseRemainingMap local phonemicMap local phoneticMap local toF1 local toF2 local toMODmap local voicedPrimaries

-- Adds elements to a sequence as if it's a set (retains unique elements only). addUnique = function(seq, value) for _, value2 in pairs(seq) do		if value == value2 then return end end seq[#seq + 1] = value end

-- Intended to work the same as JavaScript's Object.assign function. assign = function(target, ...) local args = { ... }	for _, source in pairs(args) do		if type(source) == "table" then for key, value in pairs(source) do				target[key] = value end end end return target end

fastTrim = function(text) return string.match(text, "^%s*(.-)%s*$") end

lerpF2 = function(secondaryL, secondaryR) needVowelCharts return fromF2[0.5 * (toF2[secondaryL] + toF2[secondaryR])] end

needClusterTypes = function if clusterTypes then return end local EPENTH = { ["j"] = EPENTH_CLUSTER, ["G"] = EPENTH_CLUSTER, ["w"] = EPENTH_CLUSTER }	local ASSIM = { ["j"] = ASSIM_CLUSTER, ["G"] = ASSIM_CLUSTER, ["w"] = ASSIM_CLUSTER }	local STABLE = { ["j"] = STABLE_CLUSTER, ["G"] = STABLE_CLUSTER, ["w"] = STABLE_CLUSTER }	local EMPTY = { ["p"] = EPENTH, ["t"] = EPENTH, ["k"] = EPENTH, ["m"] = EPENTH, ["n"] = EPENTH, ["N"] = EPENTH, ["r"] = EPENTH, ["l"] = EPENTH, ["h"] = EPENTH, ["y"] = EPENTH }	clusterTypes = { ["p"] = assign({}, EMPTY, {			["p"] = STABLE, -- /pp/			["m"] = ASSIM  -- /pm/ becomes [mm]		}), ["t"] = assign({}, EMPTY, {			["t"] = STABLE -- /tt/		}), ["k"] = assign({}, EMPTY, {			["k"] = STABLE, -- /kk/			["N"] = ASSIM  -- /kŋ/ becomes [ŋŋ]		}), ["m"] = assign({}, EMPTY, {			["p"] = STABLE, -- /mp/			["m"] = STABLE -- /mm/		}), ["n"] = assign({}, EMPTY, {			["t"] = STABLE, -- /nt/			["n"] = STABLE, -- /nn/			["r"] = STABLE, -- /nr/			["l"] = STABLE -- /nl/		}), ["N"] = assign({}, EMPTY, {			["k"] = STABLE, -- /ŋk/			["N"] = STABLE -- /ŋŋ/		}), ["r"] = assign({}, EMPTY, {			["n"] = ASSIM, -- /rn/ becomes [nn]			["r"] = STABLE, -- /rr/			["l"] = STABLE  -- /rl/		}), ["l"] = assign({}, EMPTY, {			["t"] = assign({}, STABLE, { ["j"] = EPENTH_CLUSTER, -- /ltʲ/ }),			["n"] = ASSIM, -- /ln/ becomes [nn]			["r"] = STABLE, -- /lr/			["l"] = STABLE  -- /ll/		}), ["h"] = EMPTY } end

needPhoneticMap = function

if phoneticMap then return end

needVowelCharts

local map = { ["p"] = "p", ["b"] = "b", ["t"] = "t", ["d"] = "d", ["k"] = "k", ["g"] = "ɡ", ["m"] = "m", ["n"] = "n", ["N"] = "ŋ", ["r"] = "r", ["l"] = "l", ["Hj"] = "j", ["HG"] = "ɰ", ["Hw"] = "w", ["_"] = "‿",		["j"] = "ʲ", ["G"] = "ˠ", ["w"] = "ʷ", ["a1"] = "æ", ["E1"] = "ɛ", ["e1"] = "e", ["i1"] = "i", ["a2"] = "a", ["E2"] = "ɜ", ["e2"] = "ɘ", ["i2"] = "ɨ", ["a3"] = "ɑ", ["E3"] = "ʌ", ["e3"] = "ɤ", ["i3"] = "ɯ", ["a5"] = "ɒ", ["E5"] = "ɔ", ["e5"] = "o", ["i5"] = "u", ["^"] = ASYLLABIC, ["@"] = "",		["("] = "(",		[")"] = ")",		[":"] = "ː", ["="] = "",		["\""] = "ˈ",		["%"] = "ˌ",		[","] = "",		["\\"] = ""	}	assign(map, false and {		["P"] = "b̥",		["T"] = "d̥",		["K"] = "ɡ̊"	} or {		["P"] = map["p"],		["T"] = map["t"],		["K"] = map["k"]	})	if false then		for primary in mw.text.gsplit("kKgN", "") do			map[primary.."G"] = map[primary]		end	end	map["Hj"] = map["Hj"] or map["i1^"] or (map["i1"]..map["^"])	map["i1^"] = map["i1^"] or map["Hj"]	map["yj"] = map["yj"] or map["i1^"]	map["i3^"] = map["i3^"] or map["HG"]	if true then		assign(map, {			["i3^"] = "ɰ",			["e3^"] = "ʁ",			["E3^"] = "ʁ",			["a3^"] = "ʕ"		})	end	if true then		for f1 in mw.text.gsplit("aEei", "") do			local key = f1.."5^"			map[key] = map[key] or map["Hw"]		end	end	for primary in mw.text.gsplit("pPbtTdkKgmnNrl_ \t\n", "") do		for secondary in mw.text.gsplit("jGw", "") do			local key = primary..secondary			map[key] = map[key] or ((map[primary] or primary)..map[secondary]) end end

for f1 = 1, 4 do		local vowelF1 = fromF1[f1] local vowel = vowelF1.."2" map[vowel] = map[vowel] or (map[vowelF1.."1"]..CENTRAL) vowel = vowelF1.."4" map[vowel] = map[vowel] or (map[vowelF1.."5"]..LESSROUND2) for f2 = 1, 5 do			vowel = vowelF1..fromF2[f2] local semi = vowel.."=" map[semi] = map[semi] or (map[vowel]..map["="]) semi = vowel.."@" map[semi] = map[semi] or (map[vowel]..map["@"]) semi = vowel.."^" map[semi] = map[semi] or (map[vowel]..map["^"]) end end

phoneticMap = map

end

needVowelCharts = function

if toF1 then return end

toF1 = { ["a"] = 1, ["E"] = 2, ["e"] = 3, ["i"] = 4, [ 1 ] = 1, [ 2 ] = 2, [ 3 ] = 3, [ 4 ] = 4	}	fromF1 = { [ 1 ] = "a", [ 2 ] = "E", [ 3 ] = "e", [ 4 ] = "i", ["a"] = "a", ["E"] = "E", ["e"] = "e", ["i"] = "i" }	toF2 = { ["j"] = 1,           ["G"] = 3,            ["w"] = 5, ["1"] = 1, ["2"] = 2, ["3"] = 3, ["4"] = 4, ["5"] = 5,		[ 1 ] = 1, [ 2 ] = 2, [ 3 ] = 3, [ 4 ] = 4, [ 5 ] = 5	}	fromF2 = { [ 1 ] = "1", [ 2 ] = "2", [ 3 ] = "3", [ 4 ] = "4", [ 5 ] = "5",		["1"] = "1", ["2"] = "2", ["3"] = "3", ["4"] = "4", ["5"] = "5",		["j"] = "1",             ["G"] = "3",              ["w"] = "5" }	fromF2Conson = { [ 1 ] = "j", [ 3 ] = "G", [ 5 ] = "w", ["1"] = "j", ["3"] = "G", ["5"] = "w", ["j"] = "j", ["G"] = "G", ["w"] = "w" }

end

parse = function(code)

local outSeq = {} code = mw.ustring.gsub(code, "%s+", " ") code = string.lower(code) for text in mw.text.gsplit(code, " *,[ ,]*") do

text = fastTrim(text) if text ~= "" then

local temp = string.gsub(text, "[abdeghijklmnprtwy_&'%- ]", "") if temp ~= "" then error("'"..code.."' contains unsupported characters: "..temp) end

-- Recognize "y_", "h_", "w_", "_y", "_h", "_w" as pseudoconsonants. parsePseudoConsonMap = parsePseudoConsonMap or { ["y"] = "0", ["h"] = "0h", ["w"] = "0w" }			text = string.gsub(text, "_*([hwy])_+", parsePseudoConsonMap) text = string.gsub(text, "_+([hwy])", parsePseudoConsonMap) if string.find(text, "_") then error("contains misplaced underscores: "..code) end

-- a plain {i} protected from dialect-specific reflexes text = string.gsub(text, "'i", "I")

-- "yi'y" and "'yiy" sequences text = string.gsub(text, "('?)yi('*)y", function(aposA, aposB)				if aposA ~= "" then					-- "dwelling upon" i					return "Z"				elseif aposB ~= "" then					-- "passing over lightly" i					return "z"				end			end)

-- Convert multigraphs to pseudo-X-SAMPA format. parseC_CH_CWmap = parseC_CH_CWmap or { ["k"]  = "kG", ["kh"] = "kGh", -- N\A ["kw"] = "kW", ["l"]  = "lJ", ["lh"] = "lG", ["lw"] = "lW", ["m"]  = "mJ", ["mh"] = "mG", ["mw"] = "mJw", -- N\A ["n"]  = "nJ", ["nh"] = "nG", ["nw"] = "nW", ["ng"] = "NG", ["ngh"] = "NGh", -- N\A ["ngw"] = "NW", ["r"]  = "rG", ["rh"] = "rGh", -- N\A ["rw"] = "rW", ["0"]  = "_J", ["0h"] = "_G", ["0w"] = "_W" }			text = string.gsub(text, "[klmnr0]g?[hw]?", parseC_CH_CWmap) if string.find(text, "g") then error("contains g that is not part of ng: "..code) end

-- Convert remaining sequences to pseudo-X-SAMPA format. parseRemainingMap = parseRemainingMap or { ["b"] = "pG", ["d"] = "rj", ["e"] = "E", ["&"] = "e", ["h"] = "hG", ["j"] = "tj", ["J"] = "j", ["p"] = "pj", ["t"] = "tG", ["w"] = "hw", ["W"] = "w", ["y"] = "hj", ["z"] = "yj", ["Z"] = "Yj", ["'"] = ""			}			text = string.gsub(text, ".", parseRemainingMap)

-- Enforce CVC, CVCVC, CVCCVC, etc. phonotactics, -- but allow VC, CV at affix boundaries -- where a vowel may link to another morpheme's consonant. temp = string.gsub(text, "[%s%-]+", "") if	string.find(temp, "_..[jGw]") or				string.find(temp, ".[jGw]_.") then error("pseudoconsonants may not neighbor a consonant") end if string.find(temp, "[aEeIi]_.[aEeIi]") then error(					"pseudoconsonants may only be at the beginning or end"..code				) end if string.find(temp, "[aEeIi][aEeIi]") then error("vowels must be separated by a consonant: "..code) end if string.find(temp, ".[jGw].[jGw]$") then error("may not end with a consonant cluster: "..code) end string.gsub(" "..temp, "[ jGw](.[jGw])(.[jGw][ptkmnNrlhyYjGw]*)",				function(consonX, consonY)					if consonX ~= consonY then						error( "may not begin with a consonant cluster ".. "unless it is a geminate: "..code )					end				end			)

if text ~= "" then addUnique(outSeq, text) end

end

end

return outSeq

end

parseBoolean = function(text) if type(text) == "string" then text = string.gsub(text, "[^0-9A-Za-z]", "") if	text ~= "" and text ~= "0" and string.lower(text) ~= "false" then return true end end return false end

reverseString = function(text) local chars = splitTokens(text) local i = 1 local j = #chars while i < j do		chars[i], chars[j] = chars[j], chars[i] i = i + 1 j = j - 1 end text = table.concat(chars, "") return text end

splitTokens = function(text, pattern, chars, shorten) chars = chars or {} local index = 1 for ch in string.gmatch(		text, pattern or "[%z\1-\127\194-\244][\128-\191]*"	) do		chars[index] = ch		index = index + 1 end if index <= #chars then if shorten then table.remove(chars, index) else repeat chars[index] = nil index = index + 1 until index > #chars end end return chars end

string_gsub2 = function(text, pattern, subst) local result = text result = string.gsub(result, pattern, subst) -- If it didn't change the first time, it won't change the second time. if result ~= text then result = string.gsub(result, pattern, subst) end return result end

string_gsubx = function(text, pattern, subst) repeat local oldText = text text = string.gsub(text, pattern, subst) until oldText == text return text end

toBender = function(inSeq, args) -- "1968" is from "Marshallese Phonology" (1968 by Byron W. Bender). -- "med" is from the Marshallese-English Dictionary (1976). -- "mod" is from the Marshallese-English Online Dictionary. -- "default" is the same as "mod" but with cedillas. local version = args and args.version if not benderMaps then local map1968 = { ["pj"] = "p", ["pG"] = "b", ["tj"] = "j", ["tG"] = "t", ["kG"] = "k", ["kw"] = "q", ["mj"] = "m", ["mG"] = "ṁ", ["nj"] = "n", ["nG"] = "ṅ", ["nw"] = "n̈", ["NG"] = "g", ["Nw"] = "g̈", ["rj"] = "d", ["rG"] = "r", ["rw"] = "r̈", ["lj"] = "l", ["lG"] = "ł", ["lw"] = "l̈", ["yj"] = "yi'y", ["Yj"] = "'yiy", ["hj"] = "y", ["hG"] = "h", ["hw"] = "w", ["_j"] = "", ["_G"] = "",  ["_w"] = "", ["a"] = "a", ["E"] = "e", ["e"] = "&", ["i"] = "i", ["I"] = "i" }		local mapMED = assign({}, map1968, {			["mG"] = "m̧",			["nG"] = "ņ",			["nw"] = "ņ°",			["Nw"] = "g°",			["rw"] = "r°",			["lG"] = "ļ",			["lw"] = "ļ°",			["e"] = "ȩ"		}) local mapMOD = assign({}, mapMED, {			["kw"] = "kʷ",			["mG"] = "ṃ",			["nG"] = "ṇ",			["nw"] = "ṇʷ",			["Nw"] = "gʷ",			["rw"] = "rʷ",			["lG"] = "ḷ",			["lw"] = "ḷʷ",			["e"] = "ẹ"		}) local mapDefault = assign({}, mapMOD, {			["mG"] = "m̧",			["nG"] = "ņ",			["nw"] = "ņʷ",			["lG"] = "ļ",			["lw"] = "ļʷ",			["e"] = "ȩ"		}) benderMaps = { ["1968"]   = map1968, ["med"]    = mapMED, ["mod"]    = mapMOD, ["default"] = mapDefault }	end local map = benderMaps[ type(version) == "string" and string.lower(version) or "" ] or benderMaps["default"] local outSeq = {} for _, text in pairs(inSeq) do		text = string.gsub(text, ".[jGw]?", map) addUnique(outSeq, text) end return outSeq end

toMOD = function(text) toMODmap = toMODmap or { ["Ȩ"] = "Ẹ", ["ȩ"] = "ẹ", ["Ļ"] = "Ḷ", ["ļ"] = "ḷ", ["M̧"] = "Ṃ", ["m̧"] = "ṃ", ["Ņ"] = "Ṇ", ["ņ"] = "ṇ", ["N̄"] = "Ñ", ["n̄"] = "ñ", ["O̧"] = "Ọ", ["o̧"] = "ọ" }	text = mw.ustring.gsub(text, ".["..CEDILLA..MACRON.."]?", toMODmap) return text end

toPhonemic = function(inSeq) local outSeq = {} if not phonemicMap then local map = { ["pj"] = "pʲ", ["pG"] = "pˠ", ["tj"] = "tʲ", ["tG"] = "tˠ", ["kG"] = "kˠ", ["kw"] = "kʷ", ["mj"] = "mʲ", ["mG"] = "mˠ", ["nj"] = "nʲ", ["nG"] = "nˠ", ["nw"] = "nʷ", ["NG"] = "ŋˠ", ["Nw"] = "ŋʷ", ["rj"] = "rʲ", ["rG"] = "rˠ", ["rw"] = "rʷ", ["lj"] = "lʲ", ["lG"] = "lˠ", ["lw"] = "lʷ", ["hj"] = "j", ["hG"] = "ɰ",  ["hw"] = "w", ["_j"] = "",  ["_G"] = "",   ["_w"] = "", ["a"] = "æ", ["E"] = "ɛ", ["e"] = "e", ["i"] = "i", ["I"] = "i" }		phonemicMap = map if false then assign(map, {				["a"] = "ɐ",				["E"] = "ə",				["e"] = "ɘ",				["i"] = "ɨ",				["I"] = "ɨ"			}) end map["yj"] = map.hj..map.i..ASYLLABIC..map.hj		map["Yj"] = map.hj..map.i.."ː"..map.hj	end for _, text in pairs(inSeq) do		text = string.gsub(text, ".[jGw]?", phonemicMap) addUnique(outSeq, text) end return outSeq end

toPhonetic = function(inSeq, args)

-- Recognize "ralik" for Rālik Chain (western dialect). -- Recognize "ratak" for Ratak Chain (eastern dialect). -- For other values, list both possible dialect reflexes where applicable. local dialect = args and args.dialect and mw.ustring.lower(mw.text.trim(args.dialect)) or "" if dialect == "rālik" then dialect = "ralik" end

-- If enabled, break words at consonant cluster boundaries -- and enunciate the word fragments individually. -- This mode does not assimilate clusters or produce epenthetic vowels. local enunciate = not not (args and parseBoolean(args.enunciate))

-- If enabled, display liaison joiners to mark -- spaces or hyphens in the input code that are not consonant clusters. local liaison = not not (args and parseBoolean(args.liaison))

-- If enabled, do not display pseudoconsonant hints at all. local noHints = not not (args and parseBoolean(args.nohints))

-- "false" will display all obstruent allophones as voiceless. -- "true" will display all obstruent allophones as voiced. -- Empty string or absent by default will display -- only medial obstruent allophones as semi-voiced. local voice = args and args.voice or "" if voice ~= "" then voice = parseBoolean(voice) end

local outSeq = {} local config = { ["outSeq"] = outSeq, ["enunciate"] = enunciate, ["liaison"] = liaison, ["noHints"] = noHints, ["voice"] = voice }

for _, text in pairs(inSeq) do		text = string.gsub(text, "[%s%-]+", " ") text = fastTrim(text) local isRalik = dialect == "ralik" if isRalik or dialect == "ratak" then text = toPhoneticDialect(text, config, isRalik) toPhoneticRemainder(text, config) else local ralik = toPhoneticDialect(text, config, true) local ratak = toPhoneticDialect(text, config, false) -- If both dialect reflexes are the same, display only one of them. toPhoneticRemainder(ralik, config) if ralik ~= ratak then toPhoneticRemainder(ratak, config) end end end

return outSeq

end

toPhoneticDialect = function(text, config, isRalik)

-- To streamline morpheme-initial regular expressions. text = "\t"..text

-- Morphemes can begin with geminated consonants, but spoken words cannot. text = string.gsub(text, "([\tjGw] *)(.[jGw])( *)%2( *)([aEeIi])",		function(prefix, conson, _, __, vowel)			local copyVowel = vowel			if vowel == "I" then				copyVowel = "i"			elseif				vowel == "a" and				conson ~= "hG"			then				copyVowel = "E"			end			if isRalik then				return prefix.."hj"..copyVowel..conson.._..conson..__..vowel			elseif conson == "hw" then				return prefix..conson..copyVowel..conson.._..conson..__..vowel			else				return prefix..conson..copyVowel.._..conson..__..vowel			end		end	)

-- Initial {yiyV-, yiwV-, wiwV-} sequences have special behavior. -- To block this in the template argument, use "'i" instead of "i". if isRalik then -- Rālik {wiwV-} becomes {yiwV-}. text = string.gsub(text, "([\tjGw] *h)w( *i *hw *[aEeIi])", "%1j%2") end -- {[yw]iwV-} becomes {[yw]iwwV-} in both dialects. text = string.gsub(text, "([\tjGw] *h[jw] *i *hw)( *[aEeIi])", "%1hw%2") -- {yiyV-} sequences text = string.gsub(text,		"([\tjGw] *)hj( *)i( *)hj( *[aEeIi])",		isRalik and "%1Yj%2%3%4" or "%1yj%2%3%4"	)

-- No longer need initial "\t". text = text.sub(text, 2)

-- Don't need to protect {i} anymore. text = string.gsub(text, "I", "i")

return text

end

toPhoneticRemainder = function(code, config)

-- "\n" bookends pronunciations of full terms. -- "\t" bookends prosodic breaks within pronunciations. local text = "\n\t"..code.."\t\n" local oldText

-- Handle pseudoconsonants and phrases that begin or end with bare vowels. local hasLeftVowel = string.find(code, "^_") if not hasLeftVowel then hasLeftVowel = string.find(code, "^[aEei]") if hasLeftVowel then text = string.gsub(				text,				"\n\t".."([aEei][^\t]*)".."\t\n",				"\n\t".."_j%1".."\t\n"..				"\n\t".."_G%1".."\t\n"..				"\n\t".."_w%1".."\t\n"			) end end local hasRightVowel = string.find(code, "_.$") if not hasRightVowel then hasRightVowel = string.find(code, "[aEei]$") if hasRightVowel then text = string.gsub(				text,				"\n\t".."([^\t]-[aEei])".."\t\n",				"\n\t".."%1_j".."\t\n"..				"\n\t".."%1_G".."\t\n"..				"\n\t".."%1_w".."\t\n"			) end end local hasEdgeVowel = hasLeftVowel or hasRightVowel if hasEdgeVowel then text = string.gsub(text, "/", "\t\t") end

local enunciate = config.enunciate local liaison  = config.liaison local noHints  = config.noHints local outSeq   = config.outSeq

-- Use liaison if we're enunciating. liaison = liaison or enunciate

if enunciate then -- Create a prosodic break at consonant clusters. text = string.gsub(text, "([jGw]) *(.[jGw])", "%1".."\t\t".."%2") end -- Per the Marshallese Reference Grammar. if false then -- Non-phrase-initial {yi'y-} vocalizes to true {yiy}. text = string.gsub(text, "([^\t] *)yj", "%1hjihj") -- Experimental, to fix the iọkiọkwe problem. else -- Non-phrase-initial {yi'y-} -- vocalizes to true {yiy} at the beginning of a word, -- but not in a non-initial position within a word. text = string.gsub(text, " yj", " hjihj") end

-- {'yiy} vocalizes contextually. do

-- To {iyy} after a consonant. if not enunciate then text = string.gsub(text, "([jGw] *)Yj", "%1ihjhj") end

-- To {yiyy} everywhere else. text = string.gsub(			text, "Yj", enunciate and ("hjihj".."\t\t".."hj") or "hjihjhj"		)

end

-- Mid-vowel harmony assimilation across semiconsonants. do

-- Always {e-a}, never {ẹ-a}. text = string.gsub(text, "e([ hjGw]*a)", "E%1")

-- Always {ẹ-i}, never {e-i}. text = string.gsub(text, "E([ hjGw]*i)", "e%1")

-- Always {e-e} and {ẹ-ẹ}, never {e-ẹ} or {ẹ-e}. text = string.gsub(text, "[Ee][ hjGw]*[Ee][ hjGwEe]*",			function(match)				local index = string.find(text, "[Ee][^Ee]*$")				local vowel = string.sub(text, index, index)				match = string.gsub(match, "[Ee]", vowel)				return match			end		)

end

-- Detect and mark stressed syllables, but not if this term is an affix. if not hasEdgeVowel then

-- Temporarily mark the end of the term's bookend as stressed. text = string.gsub(text, "(\t[\t\n])", "\"%1")

-- Temporarily mark all natural syllables as unstressed. text = string.gsub(text, "(.[jGw] *[aEei])", ",%1")

-- Recursively place stress before each CVC, CVCV and CVCCV sequence. text = string_gsubx(

text,

",("..			".[jGw] *[aEei] *[ptkmnNrlh]?[jGw]? *"..			",?"..			".[jGw] *[aEei]? *"..			"\"[^\t]*\t"..			")",

"\"%1"

)		-- Remove dangling syllable markers from the term's bookends.		text = string.gsub(text, " *\"? *\t *,? *", "\t")		-- Remove all unstressed syllable markers.		text = string.gsub(text, ",", "")		if not enunciate then			-- Restore unstressed syllable markers			-- only within consonant clusters that are not already stressed.			-- These will be removed again later anyway.			text = string.gsub(text, "([jGw] *)(.[jGw])", "%1,%2")		end		-- If there is more than one stressed syllable,		-- then mark the penultimate stressed syllable as primarily stressed,		-- and the others as secondarily stressed.		if string.find(text, "\"[^\"\t]*\"[^\t]*\t") then			text = string.gsub(text, "\"", "%%")			text = string.gsub(text, "%%([^%%\t]*%%[^%%\t]*\t)", "\"%1")		end

end -- Mark full vowels as syllabic. text = string.gsub(text, "([aEei])", "%1=") if not enunciate then

-- Tag consonant clusters for the next operation. oldText = text text = string.gsub(text, "(.[jGw])( *[\"%%,]?.[jGw])", "%1/%2")		needClusterTypes		-- Process unstable and assimilating consonant clusters.		if oldText ~= text then			text = string_gsub2(				text,				"([aEei])(= *[\"%%,]?)(.)([jGw])/"..				"( *[\"%%,]?)(.)([jGw])( *)([aEei])",				function(					vowelL, _, primaryL, secondaryL,					__, primaryR, secondaryR, ___, vowelR				)					local vowelE = ""					local markE = ""					local cluster = clusterTypes[primaryL][primaryR][secondaryR]					if cluster == EPENTH_CLUSTER then						-- An epenthetic vowel will be inserted.						if primaryL == "h" then							-- If the first consonant is a semiconsonant,							-- then copy the vowel on the left.							vowelE = vowelL						elseif primaryR == "h" then							-- If the first consonant is a full consonant							-- but the second consonant is a semicomsonant,							-- then copy the vowel on the right.							vowelE = vowelR elseif primaryR == "y" then -- If the first consonant is a full consonant -- but the second consonant is {yi'y}, -- then the epenthetic vowel is {i}, -- and the second consonant becomes plain {y}. vowelE = "i" primaryR = "h" else -- If neither consonant is a semiconsonant, -- then the epenthetic vowel has an F1							-- that is the maximum of -- the two neighboring vowels and {e}. vowelE = fromF1[math.max(								toF1[vowelL],								toF1[vowelR],								toF1["E"]							)] end markE = "@" else -- No epenthetic vowel. if cluster == ASSIM_CLUSTER then -- Regressive primary assimilation. primaryL = primaryR end if	secondaryL == "w" and primaryR ~= "t" then -- Progressive secondary assimilation. -- But there is no {tʷ} in Marshallese. secondaryR = secondaryL else -- Regressive secondary assimilation. secondaryL = secondaryR end end return (						vowelL.._..primaryL..secondaryL..vowelE..markE..						__..primaryR..secondaryR..___..vowelR					) end )		end

end

needVowelCharts -- Give a default F2 to vowels, -- averaging the F2 of their two neighboring consonants. -- This can also create transitional vowels whose F2 -- have no direct counterparts with consonant secondary articulation. text = string_gsub2(text, "([jGw])( *.)([=@] *[\"%%,]?.)([jGw])",		function(secondaryL, _, __, secondaryR)			return secondaryL.._..lerpF2(secondaryL, secondaryR)..__..secondaryR		end	)	-- Unconditionally surface semiconsonants in complete isolation.	oldText = text	text = string.gsub(text, "\t *h(.) *\t", "\tH%1\t")	-- If the term contains any other semiconsonants...	if	oldText == text and		string.find(text, "h")	then		local hasVG = false		local hasGV  = false		local hasVGV = false		-- Give unsurfaced semiconsonants a surface F1		-- matching the vowels on their left.		text = string.gsub(text, "([aEei])(.[=@] *[\"%%,]?)h(.)",			function(vowelF1, _, secondary)				hasVG = true				return vowelF1.._..vowelF1..fromF2[toF2[secondary]].."^"			end		) -- Adjust the F1 of surfaced semiconsonants -- according to the vowels on their right. -- To the maximum of the vowel if {y} or {w}. -- To the minimum of the vowel if {h}. if hasVG then text = string.gsub(text, "(.)(.)(%^ *)([aEei])",				function(semiF1, semiF2, _, vowelF1)					hasGV = true					hasVGV = true					local fn = semiF2 == "3" and math.min or math.max					return fromF1[fn( toF1[semiF1], toF1[vowelF1] )]..semiF2.._..vowelF1				end			) end -- Give remaining unsurfaced semiconsonants a surface F1 -- matching the vowels on their right. text = string.gsub(text, "h(.)( *)([aEei])",			function(secondary, _, vowelF1)				hasGV = true				return vowelF1..fromF2[toF2[secondary]].."^".._..vowelF1			end		) local startsGV = hasGV and not not string.find(text, "\t *[\"%%,]?..%^")		local endsVG  = hasVG and not not string.find(text, "%^ *\t")		if not enunciate then			-- If a vowel comes before a semiconsonant of the same F1,			-- then change the vowel's F2 to match the the semiconsonant.			if hasVG then				text = string.gsub(					text, "(.).([=@] *[\"%%,]?)%1(.)", "%1%3%2%1%3"				) end -- If a non-open vowel comes after {y} of the same F1			-- and before a velarized full consonant, -- then change the vowel's F2 to match the {y}. if hasGV then text = string.gsub(					text,					"([Eei])(1)(%^ *)%1.([=@] *[\"%%,]?[ptkmnNrl]G)",					"%1%2%3%1%2%4"				)			end			-- If a non-open vowel comes after {y} of the same F1			-- and before a syllable stress boundary,			-- then change the vowel's F2 to match the {y}.			if hasGV then				text = string.gsub(					text, "([Eei])(1)(%^ *)%1.([=@] *[\"%%,])", "%1%2%3%1%2%4"				) end -- If {a} comes after {y} of the same F1 after a stressed vowel, -- then change the vowel's F2 to match the {y}. if hasVGV then text = string.gsub(text, "(= *a)(1)(%^ *a).", "%1%2%3%2") end -- If a vowel comes after {w} of the same F1 after a stressed vowel, -- then change the vowel's F2 to match the {w}. if hasVGV then text = string_gsub2(					text, "(= *)(.)(5)(%^ *)%2.", "%1%2%3%4%2%3"				) end -- If a vowel comes after {h}... if hasGV then text = string.gsub(					text, "(.)(3)(%^ *)(.).([=@] *[\"%%,]?.)([jw15])",					function(semiF1, semiF2, _, vowelF1, __, secondary)						local vowelF2						if semiF1 == vowelF1 then							-- If they have the same F2,							-- then change the vowel's F2 to match the {h}.							vowelF2 = semiF2						else							-- If they do not have the same F2,							-- then reset the vowel's F2.							vowelF2 = lerpF2(semiF2, secondary)						end						return (							semiF1..semiF2.._..vowelF1..vowelF2..__..secondary						)					end				)			end			-- If a vowel comes after {y} or {w}			-- at the beginning of a prosodic unit			-- and before a stress boundary			-- before a semiconsonant and another vowel			-- that have the same F2 as each other			-- and both have the same F1 as the first vowel,			-- then change the first vowel's F2 to match.			if hasVGV then				text = string.gsub(					text,					"\t *(.[15]%^ *)(.).([=@] *[\"%%,])%2(.)(%^ *)%2%4", "\t%1%2%4%3%2%4%5%2%4" )			end		end		-- Unsurface {h} everywhere.		text = string.gsub(text, ".3%^", "hG")		-- Unsurface semiconsonants that can coalesce		-- with either of their neighboring vowels,		-- but not crossing syllable stress boundaries.		if hasGV then			text = string.gsub(text, "(.)(.)%^( *)%1%2", function(vowelF1, vowelF2, _) return "h"..fromF2Conson[toF2[vowelF2]].._..vowelF1..vowelF2 end )		end		if hasVG then			text = string.gsub(text, "(.)(.)(= *)%1%2%^", function(vowelF1, vowelF2, _) return vowelF1..vowelF2.._.."h"..fromF2Conson[toF2[vowelF2]] end )		end		-- Adjust the F1 of remaining surfaced {y} and {w}.		text = string.gsub(text, "(.)([15])%^", function(semiF1, semiF2) if semiF2 == "1" then if semiF1 == "a" then semiF1 = "E" end else -- semiF1 == "5" semiF1 = "i" end return semiF1..semiF2.."^" end)		-- Delete remaining unsurfaced semiconsonants altogether.		text = string.gsub(text, "h.", "")		if hasVGV and not enunciate then			-- Indicate certain long monophthongs as geminated.			text = string.gsub(text, "([aEei].)[=@]( *)%1[=@]", "%1=%2:")			text = string.gsub( text, "([aEei].)[=@]( *[\"%%,])%1[=@]([^:])", "%1=%2:%3"			)		end		-- If a weakened semiconsonant falls on a stressed syllable		-- before a vowel with the same F2,		-- then shift forward the stress marker.		text = string.gsub(text, "([\"%%,])0(.)( *[aEei])(.)", function(stress, semiF2, _, vowelF2) if toF2[semiF2] == toF2[vowelF2] then return "0"..semiF2..stress.._..vowelF2 end end )	end	-- Neutralize the difference between full and epenthetic vowels.	text = string.gsub(text, "[=@]", "")	-- Simplify secondary articulation of consonant clusters.	text = string.gsub(text, "([jGw])( *[\"%%,]?.)%1", "%2%1")

-- Partially voice obstruents before vowels at the beginning of a phrase or -- in consonant clusters after other obstruents or laterals. text = string.gsub(text, "([ptkl\t] *[\"%%,]?)([ptk])(. *[aEei])",		function(_, primary, __)			return _..string.upper(primary)..__		end	)	voicedPrimaries = voicedPrimaries or {		["p"] = "b", ["t"] = "d", ["k"] = "g"	}	-- Voice remaining obstruents before vowels.	text = string.gsub(text, "([ptk])(. *%(?[aEei])", function(primary, _)		return voicedPrimaries[primary].._	end)	if hasEdgeVowel then		if noHints then			-- Strip pseudoglides.			text = string.gsub(text, "_.", "")		elseif hasLeftVowel then			-- Reverse text of left pseudoglide.			text = string.gsub(text, "\t *_(.)", "\t%1_")		end	end

if liaison then

-- Remove whitespace from bookends. text = string.gsub(text, " *\t *", "\t")

-- Prepare liaisons. text = string.gsub(text, "[ _]+", "_")

else

-- Strip liaisons. text = string.gsub(text, " ", "")

end

if enunciate then -- Convert bookends to spaces. text = string.gsub(text, "\t+", " ") end

needPhoneticMap

-- Convert pseudo-X-SAMPA to phonetic IPA. text = string.gsub(text, ".[jGw1-5]?%^?", phoneticMap)

-- Output unique pronunciations. string.gsub(text, "\n[^\n]*\n", function(result)		addUnique(outSeq, fastTrim(result))		return ""	end)

end

export._parse = parse export._toBender = toBender export._toMOD = toMOD export._toPhonemic = toPhonemic export._toPhonetic = toPhonetic

function export.bender(frame) return table.concat(toBender(parse(frame.args[1], frame.args)), ", ") end

function export.MOD(frame) return toMOD(frame.args[1]) end

function export.parse(frame) return table.concat(parse(frame.args[1]), ", ") end

function export.phonemic(frame) return table.concat(toPhonemic(parse(frame.args[1])), ", ") end

function export.phonetic(frame) return table.concat(toPhonetic(parse(frame.args[1]), frame.args), ", ") end

return export