Module:User:Qnm/mnw-translit

local export = {} local gsub = mw.ustring.gsub

function export.trwo(text, lang, sc, options) if sc == 'Beng' or sc == 'Mymr' or sc == 'Lana' or sc == 'Khmr' or sc == 'Thai' or sc == 'Laoo' then local u = mw.ustring.char local function dc(x) -- Use this to make marks legible. The name 'dc' means 'drop carrier'. return gsub(x, "[𑀓कকකကกᨠកກ]", "") -- These are the letter ka in the 9 supported Indic scripts. end local consonants = { -- And parts 1 of NFC-multipart independent vowels! -- Myanmar ['က']='k', ['ခ']='kh', ['ဂ']='g', ['ဃ']='gh', ['င']='ṅ', ['စ']='c', ['ဆ']='ch', ['ဇ']='j', ['ဈ']='jh', ['ဉ']='ñ', ['ဋ']='ṭ', ['ဌ']='ṭh', ['ဍ']='ḍ', ['ဎ']='ḍh', ['ဏ']='ṇ', ['တ']='t', ['ထ']='th', ['ဒ']='d', ['ဓ']='dh', ['န']='n', ['ပ']='p', ['ဖ']='ph', ['ဗ']='b', ['ဘ']='bh', ['မ']='m', ['ယ']='y', ['ရ']='r', ['လ']='l', ['ဝ']='v', ['ဠ']='ḷ', ['ၐ']='ś', ['ၑ']='ṣ', ['သ']='s', ['ဟ']='h', ['ည']='ññ', ['ဿ']='ss', ['အ'] = 'ʼ', -- Subscript consonants: 103B..103E, 105E..1060 [dc('ကျ')]='y', [dc('ကြ')]='r', [dc('ကွ')]='v', [dc('ကှ')]='h', [dc('ကၞ')]='n', [dc('ကၟ')]='m', [dc('ကၠ')]='l', -- Mon Pali consonants ['ၚ']='ṅ', ['ၛ']='jh', ['ၜ']='ṗ', ['ၝ']='ḅ', -- ?			['ဣ']='i', ['ဥ']='u', }

local diacritics = { -- Myanmar [dc('ကာ')]='ā', [dc('ကါ')]='ā', [dc('ကိ')]='i', [dc('ကီ')]='ī', [dc('ကု')]='u', [dc('ကူ')]='ū', [dc('ကၖ')]='ṛ', [dc('ကၗ')]='ṝ', [dc('ကၘ')]='ḷ', [dc('ကၙ')]='ḹ', [dc('ကေ')]='e', [dc('ကဲ')]='ai', -- The following are multicharacter! [dc('ကော')]='o', [dc('ကော်')]='au', [dc('က်က္')]='', [dc('ကေါ')]='o', [dc('ကေါ်')]='au', [dc('က္')]=,  [dc('က်')]=, -- Mon - treatment of Sanskrit au is to be determined! [dc('ကဳ')]='ī', --			[dc('ကို')]='iu', [dc('ကာံ')]='āṃ', [dc('ကုံ')]='uṃ', [dc('ကေံ')]='eṃ', [dc('ကောံ')]='oṃ', [dc('ကီ')]='aṁ', [dc('ကီု')]='uṁ', [dc('ကာဲ')]='āai', [dc('ကုဲ')]='uai', [dc('ကေဲ')]='eai', [dc('ကောဲ')]='oai', [dc('ကိုဲ')]='iuai', [dc('ကဵု')]='uew', -- Shan [dc('ကႃ')]='ā', [dc('ကေႃ')]='o', -- Lanna [dc('ᨠᩣ')]='ā', [dc('ᨠᩤ')]='ā', [dc('ᨠᩥ')]='i', [dc('ᨠᩦ')]='ī', [dc('ᨠᩩ')]='u', [dc('ᨠᩪ')]='ū', ['ᩂ']='ṛ', ['ᩄ']='ḷ', -- Syllabic consonants may be very wrong! [dc('ᨠᩮ')]='e', [dc('ᨠᩱ')]='ai', [dc('ᨠᩰ')]='o', -- The next two rows are are multicharacter! [dc('ᨠᩮᩣ')]='o', [dc('ᨠᩮᩢᩣ')]='au', [dc('ᨠᩮᩫᩢᩣ')]='au', [dc('ᨠᩮᩫᩣ')] = 'au', [dc('ᨠᩮᩤ')]='o', [dc('ᨠᩮᩢᩤ')]='au', [dc('ᨠᩮᩫᩢᩤ')]='au', [dc('ᨠᩮᩫᩤ')] = 'au', [dc('ᨠ᩠')]=, [dc('ᨠ᩺')]=, [dc('ᨠ᩼')]='', -- Results of subscripts - for 2nd level special subscripts. ['ṭ']='ṭ', ['n']='n', ['p']='p', ['m']='m', ['y']='y', ['r']='r', ['l']='l', ['w']='w', ['s']='s', ['h']='h', }

local tt = { -- Myanmar independent vowels ['အ']='a', ['အာ']='ā', ['ဣ']='i', ['ဤ']='ī', ['ဥ']='u', ['ဦ']='ū', ['ၒ']='ṛ', ['ၓ']='ṝ', ['ၔ']='ḷ', ['ၕ']='ḹ', ['ဧ']='e', ['အဲ']='ai', ['ဩ']='o', ['ဪ']='au', -- 2 of these are multi-character keys! -- Mon ['ဣဳ'] = 'ī', ['ဥု'] = 'ū', ['ဨ'] = 'e', -- Shan ['ဢ'] = 'a', ['ဢႃ'] = 'ā', ['ဢိ'] = 'i', ['ဢီ'] = 'ī', ['ဢု'] = 'u', ['ဢူ'] = 'ū', ['ဢေ'] = 'e', ['ဢေႃ'] = 'o', ['ဢဲ'] = 'ai', ['ဢော်'] = 'au', -- chandrabindu, anusvara, visarga & avagraha --			[dc('က')]='m̐', [dc('ကံ')]='ṃ', ['း']='ḥ', -- 		['']='’',	--numerals ['၀']='0', ['၁']='1', ['၂']='2', ['၃']='3', ['၄']='4', ['၅']='5', ['၆']='6', ['၇']='7', ['၈']='8', ['၉']='9',	-- chandrabindu, anusvara, visarga & avagraha --			[dc('')]='m̐', [dc('ᨠᩴ')]='ṃ', ['ᩡ']='ḥ', [dc('ᨠᩘ')]='ṅ', --			['']='’', -- All scripts --punctuation ['॥']='.', ['။']='.', ['᪩']='.', ['᪫']='.', ['៕']='.', ['๚']='.', --double danda ['।']='.', ['၊']='.', ['‌᪨']='.', ['᪪']='.', ['។']='.', ['ฯ']='.', ['ຯ']='.', --danda --Vedic extensions ['ᳵ']='x', ['ᳶ']='f', --Om -- 		['ॐ']='oṃ', --reconstructed ['*'] = '',		}   -- Also handle subscript consonants encoded as marks. local S =	dc('ကျကြကွကှကၞကၟကၠ').. -- Myanmar subscripts dc('ᨠᩕᨠᩖᨠᩛᨠᩜᨠᩝᨠᩞ')    -- Lanna subscripts -- consonants and part 1 of NFC-multi-part independent vowels. local C =	'['.. 'ကခဂဃငစဆဇဈဉဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝဠၐၑသဟညဿအ'.. -- Myanmar Part 1 'ၚၛၵၶၷꧠၸꧡၹꧢၺꩦꩧꩨꩩꧣၻꩪၼၽၿꧤꩮႁဢဣဥ'.. -- Myanmar Part 2 (Mon and Shan) 'ၜၝ'.. --				S..']['..u(0x09bc, 0x200d)..']?' -- And allow Bengali nukta or necessary ZWJ.

-- One character diacritics local dia = dc('[ကာကါကိကီကုကူကၖကၗကၘကၙကေကဲက္က်ကဳကႃ'.. -- Myanmar				'ᨠᩣᨠᩤᨠᩥᨠᩦᨠᩩᨠᩪᩂᩄᨠᩮᨠᩱᨠᩰᨠ᩠ᨠ᩺ᨠ᩼'.. -- Lanna				'ᨠᩫᩢ'..          -- Lanna diacritics in second or third place.			']') local diax = {} local ti = table.insert; ti(diax, '(')		ti(diax, dia) ti(diax, '?')		ti(diax, dia) ti(diax, '?')		ti(diax, dia) ti(diax, '?')		ti(diax, dia) ti(diax, '?)') diax = table.concat(diax) local explicit = nil if options and options.impl then if options.impl == 'yes' then explicit = false elseif options.impl == 'no' then explicit = true end end if sc == 'Lana' then -- Disambiguate lanna combining loop below. cl_search = dc('[ᨲᨻᨾ]ᨠᩛ') text = gsub(text, cl_search,				{['ᨲᩛ']='ᨲ᩠ᨳ', ['ᨻᩛ']='ᨻ᩠ᨻ', ['ᨾᩛ']='ᨾ᩠ᨻ'}) end if sc == 'Mymr' or sc == 'Lana' then local fn = function(c, d) return consonants[c]..d end local search = '('..C..')(['..S..'])' text = gsub(text, search, fn); text = gsub(text, search, fn); -- and again end text = gsub(text, '('..C..')'..diax,			function(c, d)				local val = tt[c..d]				if val then return val end				local cn = consonants[c]				if not cn then return 'X('..c..')' end				if d ~= "" then       					return cn .. (diacritics[d] or 'NIL('..d..')')				elseif explicit then					return cn				else					return cn .. 'a'				end			end		) text = gsub(text, '.', tt) else text = nil -- Not ready for use yet! end return text end

function export.tr(text, lang, sc) return export.trwo(text, lang, sc, {}) end

return export