Module:sa-pronunc/sandbox2

local export = {}

local u = mw.ustring.char local gsub = mw.ustring.gsub

local HIGH     = u(0x0301) local RISING   = u(0x030C) local LOW      = u(0x0300) local PEAKING  = u(0x1DC8) local FALLING  = u(0x0302) local COARTIC  = u(0x0361) local DENTAL   = u(0x032A) local FLAP     = u(0x0306) local NORELEASE = u(0x031A) local SYLLABIC = u(0x0329) local NASAL    = u(0x0303)

local m_IPA = require("Module:IPA") local lang = require("Module:languages").getByCode("sa") local m_a = require("Module:accent qualifier")

local consonants = { ["क"] = "k", ["ग"] = "ɡ", ["ख"] = "kʰ", ["घ"] = "ɡʱ", ["ङ"] = "ŋ", ["च"] = "c", ["ज"] = "ɟ", ["छ"] = "cʰ", ["झ"] = "ɟʱ", ["ञ"] = "ɲ", ["त"] = "t̪", ["द"] = "d̪", ["थ"] = "t̪ʰ", ["ध"] = "d̪ʱ", ["न"] = "n̪", ["ट"] = "ʈ", ["ड"] = "ɖ", ["ठ"] = "ʈʰ", ["ढ"] = "ɖʱ", ["ण"] = "ɳ", ["प"] = "p", ["ब"] = "b", ["फ"] = "pʰ", ["भ"] = "bʱ", ["म"] = "m", ["य"] = "j", ["र"] = "ɽ", ["ल"] = "l̪", ["व"] = "w", ["ळ"] = "ɭ̆", ["ळ्ह"] = "ɭ̆ʱ", ["श"] = "ɕ", ["ष"] = "ʂ", ["स"] = "s̪", ["ह"] = "ɦ", }

local diacritics = { ["ा"] = "ɑː", ["ि"] = "i", ["ी"] = "iː", ["ु"] = "u", ["ू"] = "uː", ["ृ"] = "r̩", ["ॄ"] = "r̩ː", ["ॢ"] = "l̩", ["ॣ"] = "l̩ː", ["े"] = "ɐɪ", ["ै"] = "ɑːɪ", ["ो"] = "ɐʊ", ["ौ"] = "ɑːʊ", ["्"] = "", }

local vowel_list = { ["ɐ"] = true, ["ɑː"] = true, ["i"] = true, ["iː"] = true, ["u"] = true, ["uː"] = true, ["r̩"] = true, ["r̩ː"] = true, ["l̩"] = true, ["l̩ː"] = true, ["ɐɪ"] = true, ["ɑːɪ"] = true, ["ɐʊ"] = true, ["ɑːʊ"] = true, }

local stop_list = { ["k"] = true, ["ɡ"] = true, ["kʰ"] = true, ["ɡʱ"] = true, ["c"] = true, ["ɟ"] = true, ["cʰ"] = true, ["ɟʱ"] = true, ["t̪"] = true, ["d̪"] = true, ["t̪ʰ"] = true, ["d̪ʱ"] = true, ["ʈ"] = true, ["ɖ"] = true, ["ʈʰ"] = true, ["ɖʱ"] = true, ["p"] = true, ["b"] = true, ["pʰ"] = true, ["bʱ"] = true, }

local consonant_sonority = { -- voiceless stops and affricates ["k"] = 1, ["kʰ"] = 1, ["c"] = 1, ["cʰ"] = 1, ["t̪"] = 1, ["t̪ʰ"] = 1, ["ʈ"] = 1, ["ʈʰ"] = 1, ["p"] = 1, ["pʰ"] = 1, -- voiceless fricatives ["ɕ"] = 2, ["ʂ"] = 2, ["s̪"] = 2, ["h"] = 2, ["x"] = 2, ["ɸ"] = 2, -- voiced stops and affricates ["ɡ"] = 3, ["ɡʱ"] = 3, ["ɟ"] = 3, ["ɟʱ"] = 3, ["d̪"] = 3, ["d̪ʱ"] = 3, ["ɖ"] = 3, ["ɖʱ"] = 3, ["b"] = 3, ["bʱ"] = 3, -- voiced fricatives ["ɦ"] = 4, -- nasals ["ŋ"] = 5, ["ɲ"] = 5, ["n̪"] = 5, ["ɳ"] = 5, ["m"] = 5, ["m̐"] = 5, ["ṃ"] = 5, -- flaps ["ɽ"] = 6, -- laterals ["l̪"] = 7, ["ɭ̆"] = 7, ["ɭ̆ʱ"] = 7, -- glides ["j"] = 8, ["w"] = 8, }

local tt = { -- vowels ["अ"] = "ɐ", ["आ"] = "ɑː", ["इ"] = "i", ["ई"] = "iː", ["उ"] = "u", ["ऊ"] = "uː", ["ऋ"] = "r̩", ["ॠ"] = "r̩ː", ["ऌ"] = "l̩", ["ॡ"] = "l̩ː", ["ए"] = "ɐɪ", ["ऐ"] = "ɑːɪ", ["ओ"] = "ɐʊ", ["औ"] = "ɑːʊ", -- visarga ["ः"] = "h", -- chandrabindu ["ँ"] = "m̐", -- anusvara ["ं"] = "ṃ", -- avagraha ['ऽ'] = "", --Vedic extensions ['ᳵ'] = "x", ['ᳶ'] = "ɸ", }

local rising_vowel = { ["ɐ"] = "ɐ" .. RISING, ["ɑː"] = "ɑ" .. RISING .. "ː", ["i"] = "i" .. RISING, ["iː"] = "i" .. RISING .. "ː", ["u"] = "u" .. RISING, ["uː"] = "u" .. RISING .. "ː", ["r̩"] = "r" .. RISING .. "̩", ["r̩ː"] = "r" .. RISING .. "̩ː", ["l̩"] = "l" .. RISING .. "̩", ["l̩ː"] = "l" .. RISING .. "̩ː", ["ɐɪ"] = "ɐ" .. RISING .. "ɪ", ["ɑːɪ"] = "ɑ" .. RISING .. "ːɪ", ["ɐʊ"] = "ɐ" .. RISING .. "ʊ", ["ɑːʊ"] = "ɑ" .. RISING .. "ːʊ", }

local low_vowel = { ["ɐ"] = "ɐ" .. LOW, ["ɑː"] = "ɑ" .. LOW .. "ː", ["i"] = "i" .. LOW, ["iː"] = "i" .. LOW .. "ː", ["u"] = "u" .. LOW, ["uː"] = "u" .. LOW .. "ː", ["r̩"] = "r" .. LOW .. "̩", ["r̩ː"] = "r" .. LOW .. "̩ː", ["l̩"] = "l" .. LOW .. "̩", ["l̩ː"] = "l" .. LOW .. "̩ː", ["ɐɪ"] = "ɐ" .. LOW .. "ɪ", ["ɑːɪ"] = "ɑ" .. LOW .. "ːɪ", ["ɐʊ"] = "ɐ" .. LOW .. "ʊ", ["ɑːʊ"] = "ɑ" .. LOW .. "ːʊ", }

local peaking_vowel = { ["ɐ"] = "ɐ" .. PEAKING, ["ɑː"] = "ɑ" .. PEAKING .. "ː", ["i"] = "i" .. PEAKING, ["iː"] = "i" .. PEAKING .. "ː", ["u"] = "u" .. PEAKING, ["uː"] = "u" .. PEAKING .. "ː", ["r̩"] = "r" .. PEAKING .. "̩", ["r̩ː"] = "r" .. PEAKING .. "̩ː", ["l̩"] = "l" .. PEAKING .. "̩", ["l̩ː"] = "l" .. PEAKING .. "̩ː", ["ɐɪ"] = "ɐ" .. PEAKING .. "ɪ", ["ɑːɪ"] = "ɑ" .. PEAKING .. "ːɪ", ["ɐʊ"] = "ɐ" .. PEAKING .. "ʊ", ["ɑːʊ"] = "ɑ" .. PEAKING .. "ːʊ", }

local function shift_to_codas(syllables) -- shift codas to previous syllable using the Weerasinghe-Wasala-Gamage method local to_move = 0 for i, syll in ipairs(syllables) do		if i == 1 then -- no need to shift to coda if in the first syllable elseif #syll < 3 then -- coda movement only needed for onset clusters of 2 or more elseif #syll == 3 then -- V.CCV => VC.CV			to_move = 1 elseif #syll == 4 then if syll[#syll - 1] == "ɽ" or syll[#syll - 1] == "j" or (stop_list[syll[1]] and stop_list[syll[2]]) then -- V.CCrV or V.CCyV => VC.CrV or VC.CyV -- if the first two consonants are stops, VC.CCV to_move = 1 else -- V.CCCV => VCC.CV				to_move = 2 end else -- 4 consonants or more if syll[#syll - 1] == "ɽ" or syll[#syll - 1] == "j" then to_move = #syll - 3 else -- find index of consonant of least sonority to_move = #syll - 1 local min_son = consonant_sonority[syll[#syll - 1]] for i = (#syll - 1), 1, -1 do					if consonant_sonority[syll[i]] < min_son then to_move = i						min_son = consonant_sonority[syll[i]] end end end end while to_move > 0 do			table.insert(syllables[i - 1], table.remove(syllables[i], 1)) to_move = to_move - 1 end end return syllables end

local function syllabify(remainder, accent) local syllables = {} local syll = {} while #remainder > 0 do		local phoneme = table.remove(remainder, 1) if vowel_list[phoneme] then table.insert(syll, phoneme) table.insert(syllables, syll) syll = {} else table.insert(syll, phoneme) end end -- store whatever consonants remain local final_cons = syll -- Vedic pitch accent if accent ~= nil and accent <= #syllables then syll = syllables[accent] syllables[accent][#syll] = rising_vowel[syll[#syll]] if accent - 1 > 0 then -- sannatara takes precendence syll = syllables[accent - 1] syllables[accent - 1][#syll] = low_vowel[syll[#syll]] end if accent + 1 <= #syllables then -- then svarita syll = syllables[accent + 1] syllables[accent + 1][#syll] = peaking_vowel[syll[#syll]] end end syllables = shift_to_codas(syllables) local short_vowel_patt = "^[ɐiurl]" .. SYLLABIC .. "?[" .. RISING .. LOW .. PEAKING .. "]?$"

-- Classic stress accent local num_sylls = #syllables if num_sylls == 2 then table.insert(syllables[1], 1, 'ˈ') elseif num_sylls == 3 then -- if the final segment of the second syllable is not a short vowel, stress the second syllable if mw.ustring.match(syllables[2][#syllables[2]], short_vowel_patt) == nil then table.insert(syllables[2], 1, 'ˈ') -- else stress the third else table.insert(syllables[1], 1, 'ˈ') end elseif num_sylls >= 4 then if mw.ustring.match(syllables[num_sylls - 1][#syllables[num_sylls - 1]], short_vowel_patt) == nil then table.insert(syllables[num_sylls - 1], 1, 'ˈ') elseif mw.ustring.match(syllables[num_sylls - 2][#syllables[num_sylls - 2]], short_vowel_patt) == nil then table.insert(syllables[num_sylls - 2], 1, 'ˈ') else table.insert(syllables[num_sylls - 3], 1, 'ˈ') end end

-- If there are phonemes left, then the word ends in a consonant -- Add them to the last syllable for _, phoneme in ipairs(final_cons) do		table.insert(syllables[#syllables], phoneme) end for i, _ in ipairs(syllables) do		syllables[i] = table.concat(syllables[i], "") end return table.concat(syllables, ".") end

local anu_to_nasals = { --earlier ["s̪"] = "ŋ̊", ["ɕ"] = "ŋ̊", ["ʂ"] = "ŋ̊", ["h"] = "ŋ̊", ["ɦ"] = "ŋ", ["ɽ"] = "ŋ", --later ["k"] = "ŋ", ["ɡ"] = "ŋ", ["c"] = "ɲ", ["ɟ"] = "ɲ", ["t̪"] = "n̪", ["d̪"] = "n̪", ["ʈ"] = "ɳ", ["ɖ"] = "ɳ", ["p"] = "m", ["b"] = "m", }

local function anusvara(text) text = gsub(text, "ṃ$", "m") text = gsub(		text,		"ṃ([ %.ˈ]?)([kɡtdʈɖcɟpbsɕʂhɦɽ])(" .. DENTAL .. "?)",		function(div, cons, mark)			return anu_to_nasals[cons .. mark] .. div .. cons .. mark		end	) text = gsub(		text,		"([ɐɑiurleo])(" .. SYLLABIC .. "?)(" .. RISING .. LOW .. PEAKING .. "?)(ː?)([ɪʊ]?)ṃ",		"%1%2" .. NASAL .. "%3%4%5"	) return text end

local function convert_word(word, accent) local chars = {} local t = {} gsub(word, ".", function(c) table.insert(chars, c) end) for i, c in ipairs(chars) do		if consonants[c] then table.insert(t, consonants[c]) if not diacritics[chars[i + 1]] then table.insert(t, "ɐ") end elseif c == "्" then -- do nothing elseif diacritics[c] then table.insert(t, diacritics[c]) elseif tt[c] then table.insert(t, tt[c]) end end word = syllabify(t, accent) word = gsub(word, "%.ˈ", "ˈ") -- chandrabindu word = gsub(		word,		"([ɐɑiurleo])(" .. SYLLABIC .. "?)(" .. RISING .. LOW .. PEAKING .. "?)(ː?)([ɪʊ]?)m̐",		"%1%2" .. NASAL .. "%3%4%5"	) return word end

local function convert_words(words, accents) local result = {} local word_num = 1 for word in mw.text.gsplit(words, " ") do		table.insert(result, convert_word(word, accents[word_num])) word_num = word_num + 1 end text = table.concat(result, " ") return text end

local function phon_procs(text) -- Anusvāra text = anusvara(text) return text end

local function abhinidhana(text) text = gsub(		text,		"([kɡtdʈɖcɟpb])(" .. DENTAL .. "?)([ %.ˈ]?)([kɡtdʈɖcɟpb])",		"%1%2" .. NORELEASE .. "%3%4"	) return text end

local superscript = { ["ɐ"] = "ᵄ", ["ɑ"] = "ᵅ", ["e"] = "ᵉ", ["o"] = "ᵒ", ["i"] = "ⁱ", ["u"] = "ᵘ", }

local function make_dialects(text) local dialects = {}

-- Rigvedic Sanskrit local rig_phnm = text rig_phnm = gsub(rig_phnm, "^ˈ", "") rig_phnm = gsub(rig_phnm, "ˈ", ".") rig_phnm = gsub(rig_phnm, " %.", " ") local rig_phnt = abhinidhana(rig_phnm) -- visarga alternation rig_phnt = gsub(rig_phnt, "h([ %.ˈ]?)([p])", "ɸ%1%2") rig_phnt = gsub(rig_phnt, "h([ %.ˈ]?)([k])", "x%1%2") -- nasalized semivowels rig_phnt = gsub(		rig_phnt,		"([ŋɲnɳm])(" .. DENTAL .. "?)([ %.ˈ]?)([lɭɪʊ])([" .. DENTAL .. FLAP .. "]?)(ʱ?)",		"%4%5" .. NASAL .. "%3%4%5%6"	) -- nasalized yama rig_phnt = gsub(		rig_phnt,		"([kɡtdʈɖcɟpb])(" .. DENTAL .. "?)([ʰʱ]?)([ %.ˈ]?)([nŋɲɳm])",		"%1%2%3ⁿ%4%5"	) rig_phnt = gsub(rig_phnt, "(ɦ)([ %.ˈ]?)([nɳm])", "%1ⁿ%2%3") -- remove sannatara and svarita from phonemic rig_phnm = gsub(rig_phnm, "[" .. LOW .. PEAKING .. "]", "") rig_phnm = gsub(rig_phnm, RISING, HIGH) dialects['rig'] = { label = "Vedic", phonemic = rig_phnm, phonetic = rig_phnt, }	-- Classical Sanskrit local cla_phnm = text cla_phnm = gsub(cla_phnm, "([ɐɑeoiurl])[" .. RISING .. LOW .. PEAKING .. "]", "%1") cla_phnm = gsub(cla_phnm, "ɐ(" .. NASAL .. "?)ɪ", "e%1ː") cla_phnm = gsub(cla_phnm, "ɐ(" .. NASAL .. "?)ʊ", "o%1ː") cla_phnm = gsub(cla_phnm, "ɑ(" .. NASAL .. "?)ː([ɪʊ])", "ɑ%1%2") cla_phnm = gsub(cla_phnm, "w", "ʋ") local cla_phnt = abhinidhana(cla_phnm) -- cla_pron = gsub(cla_pron, "r̩(" .. NASAL .. "?)(" .. RISING .. "?)(ː?)", "ɽi%1%2%3") -- cla_pron = gsub(cla_pron, "l̩(" .. NASAL .. "?)(" .. RISING .. "?)(ː?)", "l̪i%1%2%3") --nasalized yama cla_phnt = gsub(		cla_phnt,		"([kɡtdʈɖcɟpb])(" .. DENTAL .. "?)([ʰʱ]?)([ %.ˈ]?)([nŋɲɳm])",		"%1%2%3ⁿ%4%5"	) cla_phnt = gsub(cla_phnt, "(ɦ)([ %.ˈ]?)([nɳm])", "%1ⁿ%2%3") cla_phnt = gsub(		cla_phnt,		"([ɐɑeoiu])(" .. NASAL .. "?)(ː?)([ɪʊ]?)h$",		function (vow, nas, length, glide)			return vow .. nas .. length .. glide .. "h" .. superscript[vow]		end	) cla_phnt = gsub(		cla_phnt,		"([ɐɑeoiu])(" .. NASAL .. "?)(ː?)([ɪʊ]?)h ",		function (vow, nas, length, glide)			return vow .. nas .. length .. glide .. "h" .. superscript[vow] .. " "		end	) dialects['cla'] = { label = "Classical Sanskrit", phonemic = cla_phnm, phonetic = cla_phnt, }	return dialects end

local function make_table(dialects, novedic) local dial_types = {'rig', 'cla'} if novedic then table.remove(dial_types, 1) end if #dial_types == 1 then local dial = dial_types[1] local IPA_args = if dialects[dial].phonemic ~= dialects[dial].phonetic then table.insert(IPA_args, {pron = '[' .. dialects[dial].phonetic .. ']'}) end return table.concat{ '\n* ', m_a.format_qualifiers(lang, {dialects[dial].label}), ' ',			m_IPA.format_IPA_full { lang = lang, items = IPA_args }, }	else local inline_args = if dialects.cla.phonemic ~= dialects.cla.phonetic then table.insert(inline_args, {pron = '['.. dialects.cla.phonetic ..']'}) end local inline = table.concat{ '\n* ', m_IPA.format_IPA_full { lang = lang, items = inline_args }, }		local full = {} table.insert(full, '\n \n\n') for _, dial in ipairs(dial_types) do			local full_args = if dialects[dial].phonemic ~= dialects[dial].phonetic then table.insert(full_args, {pron = '['.. dialects[dial].phonetic ..']'}) end table.insert(full, table.concat{				'\n* ',				m_a.format_qualifiers(lang, {dialects[dial].label}),				' ',				m_IPA.format_IPA_full { lang = lang, items = full_args },			}) end table.insert(full, ' ') return table.concat{ ' ',			inline, table.concat(full, ""), ' ',		}	end end

function export.show(frame) local params = { [1] = {alias_of = 'w'}, w = {default = mw.title.getCurrentTitle.text}, a = {list = true, allow_holes = true, type = 'number'}, novedic = {type = 'boolean'} }	local args = require("Module:parameters").process(frame:getParent.args, params) local text = convert_words(args.w, args.a)	text = phon_procs(text) local dialects = make_dialects(text) return make_table(dialects, args.novedic) end

return export