Module:User:AmazingJus/mn

local export = {}

local find = mw.ustring.find local gmatch = mw.ustring.gmatch local gsubn = mw.ustring.gsub local lc = mw.ustring.lower local match = mw.ustring.match local strip = mw.text.trim local sub = mw.ustring.sub local u = require("Module:string/char")

local lang = require("Module:languages").getByCode("mn") local sc = require("Module:scripts").getByCode("Cyrl") local mn = require("Module:mn-common") local final_clusters = require("Module:mn/data").syll_final_cons

function export.tag_text(text, face) return require("Module:script utilities").tag_text(text, lang, sc, face) end

function export.link(term, face) return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face ) end

-- A version of gsubn that discards all but the first return value. local function gsub(term, foo, bar, n)	local retval = gsubn(term, foo, bar, n)	return retval end

--	Primarily sourced from The Phonology of Mongolian by Jan-Olof Svantesson (2005) --

--	Define certain sets of characters. -- local chars = { c = "бвгджзклмнпрстфхцчшщ", -- Consonants v = "аеёиоуэюяөү", -- All reducible vowels u = "aeiɔoʊu", -- All full vowels (IPA) y = "аеёиыоуэюяөү" -- All vowels }

-- 	Define stress accents with their corresponding IPA representation. -- local stress = { [u(0x0301)] = u(0x02C8), -- Primary stress (acute accent, ˈ) [u(0x0300)] = u(0x02CC) -- Secondary stress (grave accent, ˌ) }

--	Define other symbols. -- local long = u(0x02D0) -- Long vowel mark (ː) local diphthong = u(0x032F) -- Diphthong mark (̯) local primary = u(0x02C8) -- Primary stress mark (ˈ) local secondary = u(0x02CC) -- Secondary stress mark (ˌ)

--	Map letters to their respective representations. -- local mapping = { ["cons"] = { -- Consonants (phonemic) ["б"] = "p", ["в"] = "w", ["г"] = "ɢ", ["г*"] = "ɡ", ["д"] = "t", ["ж"] = "t͡ɕ", ["з"] = "t͡s", ["й"] = "i̯", ["к"] = "kʰ", ["л"] = "ɮ", ["м"] = "m", ["н"] = "n", ["н*"] = "ŋ", ["п"] = "pʰ", ["р"] = "r", ["с"] = "s", ["т"] = "tʰ", ["ф"] = "f", ["х"] = "x", ["ц"] = "t͡sʰ", ["ч"] = "t͡ɕʰ", ["ш"] = "ʃ", ["щ"] = "ʃt͡ɕ", },	["vowels"] = { -- Vowels (phonemic) ["а"] = "a", ["е"] = "je", ["ё"] = "jɔ", ["и"] = "i", ["о"] = "ɔ", ["у"] = "ʊ", ["э"] = "e", ["ю"] = "jʊ", ["я"] = "ja", ["ө"] = "o", ["ү"] = "u", ["ы"] = "i", ["ъ"] = "", ["ь"] = "ʲ" },	["double"] = { -- Double vowels (orthographic) "аа", "ее", "еи", "еө", "ёо", "ий", "оо", "уу", "ээ", "юу", "юү", "яа", "өө", "үү" },	["alloph"] = { -- Consonant allophones (phonemic) ["w"] = "w̜", ["ɡʲ"] = "ɟ", ["xʲ"] = "ç", ["x"] = "χ" },	["diph"] = { -- Diphthongs (phonetic) ["ai"] = "æe", ["ei"] = "e", ["oi"] = "ɞe", ["ui"] = "ɵe", ["üi"] = "ue" } }

--	Determine the position of a vowel in a word.	First, check the vowel harmony of the vowel. If the vowel is not	part of a vowel harmony, return nil. Otherwise, return the position	of the vowel in the word. -- local function get_position(vowel) local vh = mn.vowelharmony(vowel)

if vh[1] then return vh[1].position end

return nil end

--	Check if a given cluster is valid.	Loop through all the final clusters and check if the given cluster	matches any of the patterns. If so, return true, otherwise return false. -- local function is_valid_cluster(a, b)	if b == "j" then return true -- Any cluster ending in /j/ is valid. end for _, depth in ipairs(final_clusters) do -- Loop through every final cluster pattern. for _, pattern in ipairs(depth) do if match(pattern, a .. b) then return true end end end return false end

--	Handle vowel quality based on syllable position by respelling. Matches initial and	non-initial syllables and checks for the quality of the syllable. In initial syllables	(marked with an initial local function respell_vowels(word) -- Handle substitutions to palatalise consonants. word = gsub(word, "([" .. chars.c .. "])и([аоу])", "%1j%2%2") -- иа, ио, иу are long monophthongs which palatalise the preceding consonant. word = gsub(word, "и(й?)", "jи%1") -- Palatalise all other instances of и. (FIXME: need to consider when converting chars.v into Cyrillic) word = gsub(word, "([" .. chars.c .. "])j", function(c) return match("[бвгдлмнпртх]", c) and c .. "ʲ" or c end) -- Certain consonants are palatalised before orthographic й.

-- Handle vowel respellings. word = gsub(word, "#э", "#и") -- э word-initially merges with и. word = gsub(word, "([" .. chars.v .. "])~", "%1" .. long) -- A tilde singifies a vowel's full form word-finally. for _, vowel in pairs(mapping.double) do word = gsub(word, vowel, sub(vowel, 1, 1) .. long) -- Double vowels are long. end word = gsub(word, "([гн])#", "%1*#") -- Mark word-final г and н with an asterisk (important for distinguishing between uvular and alveolar phonemes marked by silent vowels).

-- Handle non-initial syllables. return gsub(word, "(#[^" .. chars.y .. "]*[" .. chars.y .. "]+)(.*#)", function(i, non_i) -- Match initial and non-initial syllables.		non_i = gsub(non_i, "[" .. chars.v .. "]([^" .. long .. "й])", "%1") -- Remove single vowels (excluding diphtongs) in non-initial syllables.		non_i = gsub(non_i, "([" .. chars.v .. "])" .. long, "%1") -- Double vowels are actually short in non-initial syllables.		return i .. non_i -- Concatenate initial and non-initial syllables with schwa handling.	end) end

--	Handle substitutions for consonants. -- local function handle_consonants(word) --	word = gsub(word, "([" .. chars.b .. "]?)г([^* ])", function(b, c) -- Handle further substituions for vular consonants.		if b ~= "" or match(chars.b, c) then			return b .. "г*" .. c -- г is uvular when in contact with back vowels and non-final.		end	end)	-- -- word = gsub(word, "н([" .. chars.v .. "ыгшх])", "н*%1") -- н is uvular preceding a vowel or г, ш, х

word = gsub(word, "#(" .. primary .. "?)лх", "#%1ɬ") -- лх word-initially is a voiceless alveolar lateral fricative (found initially in some Tibetan loanwords).

word = gsub(word, "([" .. chars.c .. "ə]+)ʲ", function(c) return gsub(c, "([" .. chars.c .. "])", "%1ʲ") end) -- All consonants before a palatalised consonant are also palatalised. word = gsub(word, ".%*?", mapping.cons) -- Substitute consonants with their IPA representation. return gsub(word, "ʰʲ", "ʲʰ") -- Swap the palatal mark with the aspirated. end

--	Determine primary stress for any given word.	According to https://roa.rutgers.edu/files/172-0197/172-0197-WALKER-0-1.PDF,	any rightmost non-final heavy syllable is stressed.	If the final syllable is heavy but all previous syllables are non-heavy,	that syllable is stressed. Otherwise, the first syllable is stressed. -- local function add_stress(word) if match(word, u(0x0301)) then word = gsub(word, ".", stress) -- No need to determine stress if a primary stress mark is present. else word = gsub(word, "(#[^" .. chars.u .. "]*[" .. chars.u .. "]+[й" .. long .. "]?)(.*#)", function(i, non_i) -- Match initial and non-initial syllables.			local _, non_i_count = gsubn(non_i, "[" .. chars.u .. "]", "") -- Count the number of full vowels in the non-initial syllable.			if non_i_count == 0 or (non_i_count == 1 and match("[" .. long .. "й]", i)) then				i = gsub(i, "#", "#" .. primary)				return i .. non_i -- Stress the initial syllable if there is no full vowel in the non-initial syllable or a single long vowel in the initial syllable.			else				non_i = gsub(non_i, "(.*)([" .. chars.u .. "])", "%1" .. primary .. "%2") -- Find the last instance of a full vowel and add primary stress before the vowel.				non_i = gsub(non_i, "(.*)([" .. chars.u .. "].*)" .. primary, "%1" .. primary .. "%2") -- However, if there is a full vowel in a previous syllable, shift the stress there instead.				return i .. non_i			end		end) end

return gsub(word, "([" .. chars.c .. "][^" .. chars.c .. "]*)" .. primary, primary .. "%1") -- Shift the stress before the consonant of the stressed syllable. end

--	Adds reduced vowels when there is no valid consonant cluster.	Looks at consonant sequences that are two or more characters long	and checks if they form a valid cluster. If not, it adds a schwa after	the first consonant. -- local function add_reduced(word) return word end

--	Transcribe the term phonemically into IPA. local function pron_m(term) return gsub(term, "(#[^#]*#)", function(word) -- Match every word.	-- return gsub(word, "(#[^" .. chars.y .. "]*[" .. chars.y .. "]+)(.*#)", function(i, non_i) -- Match the initial and non-initial syllables of each word. -- Handle substitutions for vowels. word = respell_vowels(word) -- Handle each vowel based on its syllable position. word = add_reduced(word) -- Add reduced vowels based on Mongolian phonological rules word = gsub(word, ".", mapping.vowels) -- Substitute vowels with their IPA representation. word = add_stress(word) -- Add stress marks to the word. word = handle_consonants(word) -- Handle consonants.

return word end) end

--	Main function for the module. -- function export.toIPA(term) if type(term) == "table" then term = term.args[1] -- Get the user input as a table. end

-- Handle the term. term = lc(term) -- Make the text lowercase. term = gsub(term, "([^ ]+)", "#%1#") -- Mark all word borders with a #. term = pron_m(term) -- Get phonemic transcription. return gsub(term, "#", "") -- Remove all instances of #. end

return export