Module:hi-translit

-- Transliteration for Hindi (possibly other languages using Devanagari script, except for Sanskrit) local export = {}

local m_str_utils = require("Module:string utilities")

local gmatch = m_str_utils.gmatch local gsub = m_str_utils.gsub local match = m_str_utils.match local plain_gsub = m_str_utils.plain_gsub local reverse = m_str_utils.reverse local toNFC = mw.ustring.toNFC

local conv = { -- consonants ['क'] = 'k', ['ख'] = 'kh', ['ग'] = 'g', ['घ'] = 'gh', ['ङ'] = 'ṅ', ['च'] = 'c', ['छ'] = 'ch', ['ज'] = 'j', ['झ'] = 'jh', ['ञ'] = 'ñ', ['ट'] = 'ṭ', ['ठ'] = 'ṭh', ['ड'] = 'ḍ', ['ढ'] = 'ḍh', ['ण'] = 'ṇ', ['त'] = 't', ['थ'] = 'th', ['द'] = 'd', ['ध'] = 'dh', ['न'] = 'n', ['प'] = 'p', ['फ'] = 'ph', ['ब'] = 'b', ['भ'] = 'bh', ['म'] = 'm', ['य'] = 'y', ['र'] = 'r', ['ल'] = 'l', ['व'] = 'v', ['ळ'] = 'ḷ', ['श'] = 'ś', ['ष'] = 'ṣ', ['स'] = 's', ['ह'] = 'h', ['क़'] = 'q', ['ख़'] = 'x', ['ग़'] = 'ġ', ['ऴ'] = 'ḻ', ['ज़'] = 'z', ['ष़'] = 'ẓ', ['झ़'] = 'ź', ['ड़'] = 'ṛ', ['ढ़'] = 'ṛh', ['फ़'] = 'f', ['ऩ'] = 'ṉ', ['ऱ'] = 'ṟ', ['य़'] = 'ẏ', ['व़'] = 'w', -- ['ज्ञ'] = 'jñ',

-- vowel diacritics ['ि'] = 'i', ['ु'] = 'u', ['े'] = 'e', ['ो'] = 'o', ['ॊ'] = 'ǒ', ['ॆ'] = 'ě', ['ा'] = 'ā', ['ी'] = 'ī', ['ू'] = 'ū', ['ृ'] = 'ŕ', ['ै'] = 'ai', ['ौ'] = 'au', ['ॉ'] = 'ŏ', ['ॅ'] = 'ĕ',

-- vowel signs ['अ'] = 'a', ['इ'] = 'i', ['उ'] = 'u', ['ए'] = 'e', ['ओ'] = 'o', ['आ'] = 'ā', ['ई'] = 'ī', ['ऊ'] = 'ū', ['ऎ'] = 'ě', ['ऒ'] = 'ǒ', ['ऋ'] = 'ŕ', ['ऐ'] = 'ai', ['औ'] = 'au', ['ऑ'] = 'ŏ', ['ऍ'] = 'ĕ', ['ॐ'] = 'om', -- chandrabindu ['ँ'] = '̃',	-- anusvara ['ं'] = '̃',	-- visarga ['ः'] = 'ḥ', -- virama ['्'] = '',	-- numerals ['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4',	['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9',	-- punctuation ['।'] = '.', -- danda ['॥'] = '.', -- double danda ['+'] = '', -- compound separator -- abbreviation sign ['॰'] = '.', }

local nasal_assim_short = { ['क'] = 'ङ', ['ख'] = 'ङ', ['ग'] = 'ङ', ['घ'] = 'ङ', ['ङ'] = 'ङ', ['च'] = 'ञ', ['छ'] = 'ञ', ['ज'] = 'ञ', ['झ'] = 'ञ', ['ञ'] = 'ञ', ['ट'] = 'ण', ['ठ'] = 'ण', ['ड'] = 'ण', ['ढ'] = 'ण', ['ण'] = 'ण', ['त'] = 'न', ['थ'] = 'न', ['द'] = 'न', ['ध'] = 'न', ['न'] = 'न', ['प'] = 'म', ['फ'] = 'म', ['ब'] = 'म', ['भ'] = 'म', ['म'] = 'म', ['य'] = 'ञ', ['र'] = 'न', ['ल'] = 'न', ['व'] = 'म', ['श'] = 'ञ', ['ष'] = 'ण', ['स'] = 'न', ['ह'] = 'ँ', ['ज़'] = 'न', ['फ़'] = 'म', ['क़'] = 'ङ', ['ख़'] = 'ङ', ['ग़'] = 'ङ', ['ड़'] = 'ँ', ['ढ़'] = 'ँ' }

local nasal_assim_long = { ['क'] = 'ँ', ['ख'] = 'ँ', ['ग'] = 'ङ', ['घ'] = 'ङ', ['ङ'] = 'ँ', ['च'] = 'ँ', ['छ'] = 'ँ', ['ज'] = 'ञ', ['झ'] = 'ञ', ['ञ'] = 'ँ', ['ट'] = 'ँ', ['ठ'] = 'ँ', ['ड'] = 'ण', ['ढ'] = 'ण', ['ण'] = 'ँ', ['त'] = 'ँ', ['थ'] = 'ँ', ['द'] = 'न', ['ध'] = 'न', ['न'] = 'ँ', ['प'] = 'ँ', ['फ'] = 'ँ', ['ब'] = 'म', ['भ'] = 'म', ['म'] = 'ँ', ['ह'] = 'ँ', ['ज़'] = 'न', ['फ़'] = 'म', ['क़'] = 'ङ', ['ख़'] = 'ङ', ['ग़'] = 'ङ', ['ड़'] = 'ँ', ['ढ़'] = 'ँ' }

-- These clusters when occurring word-finally will not trigger a schwa added -- after them even though the second consonant is in special_cons, which normally -- causes the extra schwa to be added. NOTE: The clusters are reversed from their -- ultimate effect, e.g. the first cluster is written 'ml' but actually applies -- to words ending in 'lm'. The clusters below overall refer to the six clusters -- describable by [rl][mnv], i.e. rm, rn, rv, lm, ln, lv (plus rṇ). -- Plus some word-final geminates. local perm_cl = { ['म्ल'] = true, ['व्ल'] = true, ['न्ल'] = true, ['म्र'] = true, ['व्र'] = true, ['न्र'] = true, ['ण्र'] = true, ['न्न'] = true, ['म्म'] = true, ['ण्ण'] = true, ['ल्ल'] = true, ['र्र'] = true }

local all_cons, special_cons = 'कखगघङचछजझञटठडढणतथदधनपफबभमयरलवषशसह', 'यरलवहनमञण' local vowel, vowel_sign = '*aिुृेोाीूैौॉॅॆॊ\, 'अइउएओआईऊऋऐऔऑऍ\ local long_vowel, short_vowel = 'ाीूेैोौआईऊएऐओऔ', '*aिुृॆॊॅॉअइउऋऍऑऎऒ\'' local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])a(़?[' .. all_cons .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])'

function export.tr(text, lang, sc) -- treat anusvara + nasal as geminate nasal after short vowels text = gsub(text, '([' .. short_vowel .. all_cons .. '])ं([नम])', '%1%2्%2') -- word-final apostrophe (e.g. from bold formatting) does not delete schwa text = gsub(text, '([' .. all_cons .. ']़?)(\'%A)', '%1a%2') text = gsub(text, '([' .. all_cons .. ']़?)(\')$', '%1a%2') text = gsub(text, '([' .. all_cons .. ']़?)([' .. vowel .. '्]?)', function(c, d)		return c .. (d == "" and 'a' or d) end) -- to detect words, include Unicode 0900-0963 and 0971-097F (plus 'a' and '*'), -- excluding e.g. danda and abbreviation dot ॰ for word in gmatch(text, "[ऀ-ॣॱ-ॿa*]+") do		local orig_word = word word = reverse(word) word = gsub(word, '^a(़?)([' .. all_cons .. '])(.)(.?)', function(opt, first, second, third)			return (((match(first, '[' .. special_cons .. ']') and match(second, '्') and not perm_cl[first..second..third])				or match(first .. second, 'य[ीिई]')) and 'a' or "") .. opt .. first .. second .. third end) while match(word, syncope_pattern) do			word = gsub(word, syncope_pattern, '%1%2%3%4') end word = reverse(word) -- The special_vowel category consists of ī/e/o/ai/au, these vowels are often -- written with anusvara for 'aesthetic' reasons as the vowel diacritic -- gets in the way of the candrabindu. For ī/e/o/ai/au, anusvara will trigger -- nasal_assim_long and candrabindu will force a nasal vowel. To force a nasal -- consonant before voiceless stops (mostly in loanwords), respell with -- homorganic nasal ङ/ञ/ण/न/म + virama ् -- Exception: vowel 'e' when written as standalone ए local special_vowel, normal_vowel = 'ीेैोौईऐओऔ', '*aिुाूृॆॅॊॉअइउआऊऋऎऍऒऑए\'' word = gsub(word, '([' .. special_vowel .. '])ँ(.़?)', function(prev, succ)			return prev .. "̃" .. succ		end) --sometimes chandrabindu != anusvara word = gsub(word, '([' .. normal_vowel .. '])ं([सशषवयकखटतथदडपचछ]़?)', function(prev, succ)			return prev .. (nasal_assim_short[succ] or "̃") .. succ		end) word = gsub(word, '([' .. normal_vowel .. '])ँ([सशषवयकखटतथदडपचछ]़?)', function(prev, succ)			return prev .. "̃" .. succ		end) --force chandrabindu to behave as anusvara word = gsub(word, 'ँ', 'ं') word = gsub(word, '([' .. short_vowel .. '])ं(.़?)', function(prev, succ)			return prev .. (nasal_assim_short[succ] or "̃") .. succ		end) word = gsub(word, '([' .. long_vowel .. '])ं(.़?)', function(prev, succ)			return prev .. (nasal_assim_long[succ] or "̃") .. succ		end) text = plain_gsub(text, orig_word, word) end text = gsub(text, '.़?', conv) text = gsub(text, 'a([iu])̃', 'a͠%1') text = gsub(text, 'ñz', 'nz') text = gsub(text, '%*', 'a') return toNFC(text) end

return export