Module:bho-IPA2

local export = {} local gsub = mw.ustring.gsub local match = mw.ustring.match local consonants = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषहड़ढ़" local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊऻऻॊॆॏ꣱’", "अइउएओआईऊऋॠऎऒव़य़ॵॳॴऐऔऑऍ" local lvowel = "आईऊएओाीूेो" local el = "िु"

local conv = { -- consonants ['क'] = 'k', ['ख'] = 'kʰ', ['ग'] = 'ɡ', ['घ'] = 'ɡʱ', ['ङ'] = 'ŋ', ['च'] = 'ʦ', ['छ'] = 'ʦʰ', ['ज'] = 'ʣ', ['झ'] = 'ʣʱ', ['ञ'] = 'ɲ', ['ट'] = 'ʈ', ['ठ'] = 'ʈʰ', ['ड'] = 'ɖ', ['ढ'] = 'ɖʱ', ['ण'] = 'ɳ', ['त'] = 't', ['थ'] = 'tʰ', ['द'] = 'd', ['ध'] = 'dʱ', ['न'] = 'n', ['प'] = 'p', ['फ'] = 'pʰ', ['ब'] = 'b', ['भ'] = 'bʱ', ['म'] = 'm', ['य'] = 'j', ['र'] = 'ɾ', ['ल'] = 'l', ['व'] = 'ʋ', ['श'] = 's', ['ष'] = 's', ['स'] = 's', ['ह'] = 'ɦ',

['क़'] = 'q', ['ख़'] = 'x', ['ग़'] = 'ɣ', ['ऴ'] = 'ɭ', ['ळ'] = 'ɭ', ['ज़'] = 'z', ['श़'] = 'ʒ', ['झ़'] = 'ʒ', ['ड़'] = 'ɽ', ['ढ़'] = 'ɽʱ', ['फ़'] = 'f', ['थ़'] = 'θ', ['द़'] = 'ð', ['ऩ'] = 'n̪', ['ऱ'] = 'ɹ', ['ॽ'] = "ʔ", ['ॹ'] = 'ʒ', -- vowel diacritics ['ि'] = 'ɪ', ['ु'] = 'ʊ', ['े'] = 'eː', ['ॆ'] = 'e', ['ॊ'] = 'o', ['ो'] = 'oː', ['ा'] = 'aː', ['ी'] = 'iː', ['ू'] = 'uː', ['ृ'] = 'ri', ['ॄ'] = 'ri', ['ॢ'] = 'liɾi', ['ॣ'] = 'liɾi', ['ै'] = 'əɪ', ['ौ'] = 'əʊ', ['ॉ'] = 'ɔ', ['ॅ'] = 'æ', ['ꣿ'] = 'əɪ', ['ॏ'] = 'əʊ', ['ऺ'] = "ᵊ", -- vowel signs ['अ'] = 'ə', ['इ'] = 'ɪ', ['उ'] = 'ʊ', ['ए'] = 'eː', ['ओ'] = 'oː', ['आ'] = 'aː', ['ई'] = 'iː', ['ऊ'] = 'uː', ['ऋ'] = 'ri', ['ॠ'] = 'ri', ['ऌ'] = 'liɾi', ['ॡ'] = 'liɾi', ['ऐ'] = 'əɪ', ['औ'] = 'əʊ', ['ऑ'] = 'ɔ', ['ॲ'] = 'æ', ['ऍ'] = 'æ', ['ऎ'] = 'e', ['ऒ'] = 'o', ['ꣾ'] = 'əĕ', ['ॵ'] = 'əŏ', ['व़'] = 'ŏ',['य़'] = 'ĕ', ['ॴ'] = 'a',

['िं'] = 'ɪ̃', ['ुँ'] = 'ʊ̃', ['ें'] = 'ẽː', ['ॆं'] = 'ẽ', ['ॊं'] = 'õ', ['ों'] = 'õː', ['ाँ'] = 'ãː', ['ीं'] = 'ĩː', ['ूँ'] = 'ũː', ['ैं'] = 'ə̃ɪ̃', ['ौं'] = 'ə̃ʊ̃', ['ॉं'] = 'ɔ̃', ['ॅं'] = 'æ̃', ['ꣿं'] = 'ə̃ɪ̃', ['ॏं'] = 'ə̃ʊ̃',

['꣱'] = "əː", -- chandrabindu ['ँ'] = '̃', -- anusvara ['ं'] = 'ṃ', -- visarga ['ः'] = 'ʰ', -- virama ['्'] = '', -- om ['ॐ'] = 'oːm', -- anusvara ['ऽ'] = 'əː', -- zero-width non joiner ['‌'] = '', -- zero-width joiner ['‍'] = 'ə', -- diphthong marker ['ॱ'] = '̯', -- numerals ['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4', ['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9', -- punctuation ['।'] = '.', -- danda ['॥'] = '.', -- double danda ['+'] = '', -- compound separator

-- abbreviation sign ['॰'] = '.', }

local nasal_assim = { ["क"] = "ङ", ["ख"] = "ङ", ["ग"] = "ङ", ["घ"] = "ङ", ["च"] = "ञ", ["छ"] = "ञ", ["ज"] = "ञ", ["झ"] = "ञ", ["ट"] = "ण", ["ठ"] = "ण", ["ड"] = "ण", ["ढ"] = "ण", ["प"] = "म", ["फ"] = "म", ["ब"] = "म", ["भ"] = "म", ["म"] = "म", ["त"] = "न", ["थ"] = "न", ["द"] = "न", ["ध"] = "न", ["न"] = "न" } local perm_cl = { ["म्ल"] = true, ["व्ल"] = true, ["न्ल"] = true }

local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "छकखगतसहयथडढठपदणधरषटलवब भडचनशम" local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊऻऻॊॆॏ꣱꣱’", "अइउएओआईऊऋॠऎऒव़य़ॵॳॴऐऔऑऍ" local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])ə(़?[' .. all_cons .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])'

local function rev_string(text) local result, length = {}, mw.ustring.len(text) for i = length, 1, -1 do		table.insert(result, mw.ustring.sub(text, i, i)) end return table.concat(result) end function export.tr(text, lang, sc) text = gsub(		text,		"([" .. all_cons .. "]़?)([" .. vowel .. "्]?)",		function(c, d)			return c .. (d == "" and "ə" or d)		end	) for word in mw.ustring.gmatch(text, "[ऀ-ॿə]+") do		local orig_word = word word = rev_string(word) word = gsub(word, '^ə(़?)([' .. all_cons .. '])(.)(.?)', function(opt, first, second, third)			return (((match(first, '[' .. special_cons .. ']') and match(second, '्') and not perm_cl[first..second..third])				or match(first .. second, 'य[ी]')) and 'ə' or "") .. opt .. first .. second .. third end)

while match(word, syncope_pattern) do																																																																																																																																																												word = gsub(word, syncope_pattern, '%1%2ᵊ%3%4') end word = gsub(			word,			"(.?)ं(.)",			function(succ, prev)				return succ ..					(succ .. prev == "ə" and "्म" or (succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "̃")) ..						prev			end		)

local escaped_orig_word = gsub(orig_word, "%+", "") text = gsub(text, orig_word, rev_string(word)) text = gsub(text, "ज्ञ", "gj") text = gsub(text, "इऺ", "ɪ̆" ) text = gsub(text, "उऺ", "ʊ̆" ) text = gsub(text, "ॳ", "ᵊ" ) text = gsub(text, "अ꣱", "əː") end

text = gsub(text, '.़?', conv) text = gsub(text, "[<>]", "") text = gsub(text, "ॱ", "") text= gsub(text, "([aāäeâôoʌiuɪʊe̯eëəᵊ])ː([kɦgɕʑṅcjñṭḍṇɽtʈɖdnʦʣpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcjñṭḍṇɽtdnʈɖpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcʈɖjñṭḍṇɽtdnpbmɽ̃yrlɳwvɾjwśṣsʰʱhɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪᵊʊ]?(̃?))$", "%1ˑ%2%3%4%5%6%7%8%9") text= gsub(text, "([aāäeâôoʌiuɪʊe̯eëəᵊ])ː([kɦgɕʑṅcjñṭḍʈɖṇɽtdnʦʣpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcjñṭḍṇɽtdnʈɖpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcʈɖjñṭḍṇɽtdnpbmɽ̃yrlɳwvɾjwśṣshɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪᵊʊ]?(̃?)) ", "%1ˑ%2%3%4%5%6%7%8%9 ")

text = gsub(text, 'ɦri', 'ri') text = gsub(text, 'ː̃', '̃ː') text = gsub(text, 'ː̃ː', '̃ː') text = gsub(text, 'ː̤ː', 'ː') text = gsub(text, "a([ɪʊ])̃", "a%̃1") text = gsub(text, "ʊʊ", "ʊ") text = gsub(text, "([iuɪʊïüaô])(ː?)ᵊ", "%1%2") text = gsub(text, "əᵊ", "ə") text = gsub(text, "ᵊə", "ə") text = gsub(text, "əə", "ə") text = gsub(text, "ᵊ([ɪʊ])", "ə%1") text = gsub(text, "ə([ɪʊ])", "ə%1") text = gsub(text, "([ɪʊ])̯̯", "%1") text = gsub(text, "ɪɪ", "ɪ") text = gsub(text, "ʦ", "t͡ɕ") text = gsub(text, "ʣ", "d͡ʑ") text = gsub(text, "ĕ", "ĕ") text = gsub(text, "ŏ", "ŏ") text = gsub(text, 'ːː', 'ː') text = gsub(text, "aːˑ", "ə") text = gsub(text, "iːˑ", "ɪ") text = gsub(text, "uːˑ", "ʊ") text = gsub(text, "eːˑ", "e") text = gsub(text, "oːˑ", "o") text = gsub(text, "aˑ", "ə") text = gsub(text, "iˑ", "ɪ") text = gsub(text, "uˑ", "ʊ") text = gsub(text, "eˑ", "e") text = gsub(text, "oˑ", "o") text = gsub(text, "ə̃əː", "ə̃ː") return mw.ustring.toNFC(text) end return export