Module:sa-translit

local export = {}

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub local toNFC = mw.ustring.toNFC local U = m_str_utils.char

local grave = U(0x300) local acute = U(0x301) local diaeresis = U(0x308) local svar = U(0x951) local anud = U(0x952) local d_svar = U(0x1CDA) -- double svarita, sometimes used for long vowel with svarita

local consonants = { ['क']='k', ['ख']='kh', ['ग']='g', ['घ']='gh', ['ङ']='ṅ', ['च']='c', ['छ']='ch', ['ज']='j', ['झ']='jh', ['ञ']='ñ', ['ट']='ṭ', ['ठ']='ṭh', ['ड']='ḍ', ['ढ']='ḍh', ['ण']='ṇ', ['त']='t', ['थ']='th', ['द']='d', ['ध']='dh', ['न']='n', ['प']='p', ['फ']='ph', ['ब']='b', ['भ']='bh', ['म']='m', ['य']='y', ['र']='r', ['ल']='l', ['व']='v', ['ळ']='ḷ', ['श']='ś', ['ष']='ṣ', ['स']='s', ['ह']='h', }

local diacritics = { ['ा']='ā', ['ि']='i', ['ी']='ī', ['ु']='u', ['ू']='ū', ['ृ']='ṛ', ['ॄ']='ṝ', ['ॢ']='ḷ', ['ॣ']='ḹ', ['े']='e', ['ै']='ai', ['ो']='o', ['ौ']='au', ['्']='', }

local tt = { -- vowels ['अ']='a', ['आ']='ā', ['इ']='i', ['ई']='ī', ['उ']='u', ['ऊ']='ū', ['ऋ']='ṛ', ['ॠ']='ṝ', ['ऌ']='ḷ', ['ॡ']='ḹ', ['ए']='e', ['ऐ']='ai', ['ओ']='o', ['औ']='au', -- chandrabindu ['ँ']='m̐', --until a better method is found -- anusvara ['ं']='ṃ', --until a better method is found ['ꣳ']='ṃ', -- candrabindu virama -- visarga ['ः']='ḥ', -- avagraha ['ऽ']='ʼ', --numerals ['०']='0', ['१']='1', ['२']='2', ['३']='3', ['४']='4', ['५']='5', ['६']='6', ['७']='7', ['८']='8', ['९']='9',	--punctuation -- ['॥']='.', --double danda --	['।']='.', --danda --Vedic extensions ['ᳵ']='x', ['ᳶ']='f', --Om ['ॐ']='oṃ', --reconstructed ['*'] = '', }

function export.tr(text, lang, sc) if sc ~= "Deva" then return nil end

-- Vedic accent handling if text:match(anud) or text:match(svar) or text:match(d_svar) then -- insert 'a' after consonants without vowel diacritic or virama text = gsub(text, '([क-ह])([ा-्ॢॣ]?)', 			function(c,d)				if d == "" then return c .. 'a' else return c .. d end			end) local vow_list = "aअ-औा-ौॠ-ॣ" local vow = "[" .. vow_list .. "]"		local extra_list = "ःंँ" -- visarga, anusvara, candrabindu local extra = "[" .. extra_list .. "]"				local acc_list = grave .. acute .. svar .. anud .. d_svar --		local cons_list = "क-हᳵᳶऽ् \'" -- consonants + avagraha + virama + space + apostrophe (from e.g. bold formatting) -- Workaround: the consonants (plus a few other signs, see outcommented 'local cons_list') -- are defined by negating the non-consonants, so as to include -- the munged versions of formatting characters (e.g. bold formatting) local cons = "[^" .. vow_list .. acc_list .. extra_list .. "।॥ॐ]" -- independent svarita before udatta or other independent svarita (indicated by १/३ with both svarita and anudatta sign) text = gsub(text, "(" .. extra .. "?)" .. anud .. "?[१३][" .. anud .. svar .. d_svar .. "]+(" .. cons .. "*" .. vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)",			function(a,b,c,d)				if d ~= "" then					return grave .. a .. b .. grave .. c	-- 2 × independent svarita				else					return grave .. a .. b .. acute .. c	-- independent svarita + udatta				end			end) -- optional: a few non-Rigvedic ways to mark the independent svarita (but compatible with Rigvedic system) -- 1) ᳡ (U+1CE1) used by Atharvavedic Śaunakīya Saṃhitā 		-- 2) ᳖ (U+1CD6) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'standard' independent svarita -- 3) ᳕ (U+1CD5) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'aggravated' independent svarita (before udatta)		-- note that the Rigvedic system doesn't distinguish between dependent vs. independendent 		-- svarita after udatta (the latter would need manual addition of grave)		text = gsub(text, "(" .. extra .. "?)[᳡᳖]", grave .. "%1")		text = gsub(text, "(" .. extra .. "?)᳕(" .. cons .. "*" .. vow ..")", grave .. "%1%2" .. acute)		-- initial udatta/svarita		text = gsub(text, "^(" .. cons .. "*" .. vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])", function(a,b,c) if c == svar or c == d_svar then return a .. grave .. b -- initial svarita else return a .. acute .. b .. c -- initial udatta end end)		-- the same, after (double) danda or 'om'		text = gsub(text, "([।॥ॐ]" .. cons .. "*" .. vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])", function(a,b,c) if c == svar or c == d_svar then return a .. grave .. b -- initial svarita else return a .. acute .. b .. c -- initial udatta end end)		-- in case of anudatta sign not before other anudatta sign (nor before grave accent from १/३)		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" .. 			vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])", function(a,b,c) if c == svar or c == d_svar then return a .. grave .. b -- independent svarita else return a .. acute .. b .. c -- udatta end end)		-- and again (excluding acute on next vowel), in case of overlapping patterns (if 'c' above happens to be another vowel with anudatta)		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" .. 			vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. acute .. extra_list .. "])", function(a,b,c) if c == svar or c == d_svar then return a .. grave .. b -- independent svarita else return a .. acute .. b .. c -- udatta end end)		-- the same, string final		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" .. 			vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)$", function(a,b,c) if c ~= "" then return a .. grave .. b -- independent svarita else return a .. acute .. b -- udatta end end)		-- unmarked vowel after udatta is also udatta		text = gsub(text, "(" .. vow .. acute .. extra .. "?" .. cons .. "*" .. 			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")		-- and again, in case of three udatta's in a row		text = gsub(text, "(" .. vow .. acute .. extra .. "?" .. cons .. "*" .. 			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")		-- yet again: 4 udatta's in a row occurs in RV.1.164.39		text = gsub(text, "(" .. vow .. acute .. extra .. "?" .. cons .. "*" .. 			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")		-- the same, string final		text = gsub(text, "(" .. vow .. acute .. extra .. "?" .. cons .. "*" .. 			vow .. ")(" .. extra .. "?)$", "%1" .. acute .. "%2")		-- remove remaining anudatta and svarita signs		text = gsub(text, "[" .. anud .. svar .. d_svar .. "]", "")		text = gsub(text, '.', consonants)		text = gsub(text, '.', diacritics)	else -- no Vedic accents		text = gsub( text, '([क-ह])'.. '([ािीुूृॄॢॣेैोौ्]?)'..		'([अ-औ]?)', function(c, d, e)			if d == "" and e ~= "" then if tt[e] == "i" or tt[e] == "u" then return consonants[c] .. 'a' .. tt[e] .. diaeresis else return consonants[c] .. 'a' .. tt[e] end elseif e ~= "" then return consonants[c] .. diacritics[d] .. tt[e] elseif d == "" then return consonants[c] .. 'a'			else return consonants[c] .. diacritics[d] end end)	end	text = gsub(text, '([aअ][' .. acute .. grave .. ']?[इउ])', '%1' .. diaeresis)	text = gsub(text, '.', tt)	text = gsub(text, " ?[।॥]", ".")	text = toNFC(text)	return text end return export