Module:bn-translit

-- Transliteration for Bengali local export = {}

local m_str_utils = require("Module:string utilities")

local u = m_str_utils.char local gmatch = m_str_utils.gmatch local gsub = m_str_utils.gsub local len = m_str_utils.len local match = m_str_utils.match local sub = m_str_utils.sub local toNFC = mw.ustring.toNFC

local QO = u(0x003F) -- question mark

local char = { -- consonants ["ক"] = "k", ["খ"] = "kh", ["গ"] = "g", ["ঘ"] = "gh", ["ঙ"] = "ṅ", ["চ"] = "c", ["ছ"] = "ch", ["জ"] = "j", ["ঝ"] = "jh", ["ঞ"] = "ñ", ["ট"] = "ṭ", ["ঠ"] = "ṭh", ["ড"] = "ḍ", ["ঢ"] = "ḍh", ["ণ"] = "ṇ", ["ত"] = "t", ["থ"] = "th", ["দ"] = "d", ["ধ"] = "dh", ["ন"] = "n", ["প"] = "p", ["ফ"] = "ph", ["ব"] = "b", ["ভ"] = "bh", ["ম"] = "m", ["য"] = "j", ["র"] = "r", ["ল"] = "l", ["শ"] = "ś", ["ষ"] = "ṣ", ["স"] = "s", ["হ"] = "h", ["ড়"] = "ṛ", ["ঢ়"] = "ṛh", ["য়"] = "ẏ",

-- vowel diacritics ["ি"] = "i", ["ু"] = "u", ["ৃ"] = "ri", ["ে"] = "e", ["ো"] = "ō", ["া"] = "a", ["ী"] = "i", ["ূ"] = "u", ["ৈ"] = "ōi", ["ৌ"] = "ōu",

-- archaic vowel diacritics ["ৄ"] = "ri", ["ৢ"] = "li", ["ৣ"] = "li",

-- visarga ["ঃ"] = "ḥ",

-- vowel signs ["অ"] = "o", ["ই"] = "i", ["উ"] = "u", ["ঋ"] = "ri", ["এ"] = "e", ["ও"] = "ō", ["আ"] = "a", ["ঈ"] = "i", ["ঊ"] = "u", ["ঐ"] = "ōi", ["ঔ"] = "ōu",

-- archaic vowel signs ["ৠ"] = "ri", ["ঌ"] = "li", ["ৡ"] = "li",

--virama ["্"] = "",

-- chandrabindu ["ঁ"] = "̃",

-- avagraha ['ঽ'] = '’',

-- anusvara ["ং"] = "ṅ",

-- khandata, ["ৎ"] = "t",

-- numerals ["০"] = "0",	["১"] = "1",	["২"] = "2",	["৩"] = "3",	["৪"] = "4",	["৫"] = "5",	["৬"] = "6",	["৭"] = "7",	["৮"] = "8",	["৯"] = "9",

-- punctuation ["।"] = ".", -- dãri }

local consonant, vowel, vowel_sign = "ক-হড়-য়", "oা-ৌ’", "অ-ঔ" local c = "[" .. consonant .. "]" local cc = "়?" .. c local v = "[" .. vowel .. vowel_sign .. "o]" local syncope_pattern = "(" .. v .. cc .. v .. cc .. ")o(" .. cc .. "ঁ?" .. v .. ")"

local deaspirate = "[কগচজটডতদপব]"

local function rev_string(text) local result, length = "", len(text) for i = 1, length do result = result .. sub(text, length - i + 1, length - i + 1) end return result end

function export.tr(text, lang, sc, override) text = gsub(text, "(" .. c .. ")ও", "%1্ও") text = gsub(text, "^(" .. c .. ")্ও", "%1ও")

text = gsub(text, "(" .. c .. ")্‌(" .. c .. ")$", "%1্%2্")	text = gsub(text, "(" .. c .. ")্‌(" .. c .. ") ", "%1্%2্ ")

text = gsub(text, "(" .. v .. ")ঞ(" .. v .. ")", "%1̃%2")

text = gsub(text, "(" .. c .. "়?)([" .. vowel .. "’?্]?)", function(a, b)		return a .. (b == "" and "o" or b)	end)

for word in gmatch(text, "[ঁ-৽o’]+") do		local orig_word = word word = rev_string(word) word = gsub(word, "^o(়?" .. c .. ")(ঁ?" .. v .. ")", "%1%2")		while match(word, syncope_pattern) do			word = gsub(word, syncope_pattern, "%1%2") end text = gsub(text, orig_word, rev_string(word)) end

text = gsub(text, "(" .. deaspirate .. ")হ", "%1'h")

text = gsub(text, "্ম", "ṃ") text = gsub(text, "্য", "y") text = gsub(text, "্ব", "v")

text = gsub(text, "িত$", "ito") text = gsub(text, "িত ", "ito ")

text = gsub(text, "ৃত$", "rito") text = gsub(text, "ৃত ", "rito ")

text = gsub(text, "িব$", "ibo") text = gsub(text, "িব ", "ibo ")

text = gsub(text, "র্চ$", "র্চ্‌") text = gsub(text, "র্চ ", "র্চ্‌ ")

text = gsub(text, "ছিল$", "chilo") text = gsub(text, "ছিল ", "chilo ")

text = gsub(text, "র([মফ])o", "রo%1")

text = gsub(text, "(" .. cc .. ")o([অআ])", "%1%2") text = gsub(text, "(" .. cc .. ")ও", "%1oō")

text = gsub(text, ".[়’]?", char) text = gsub(text, ".", char)

local v_Latn = "[oaiueō]̃?" local c_Latn = "[bcdḍghjklmṃnṇprsśṣtṭvẇyẏ]" local consonants_no_h = "[bcdgjklmnpsśtṭḍ]"

-- inherent vowel deletion text = gsub(text, "(" .. v_Latn .. ")bo([bdps])(" .. v_Latn .. ")", "%1b%2%3") text = gsub(text, "(" .. v_Latn .. ")cho([bpt])(" .. v_Latn .. ")", "%1ch%2%3") text = gsub(text, "(" .. v_Latn .. ")do([bp])(" .. v_Latn .. ")", "%1d%2%3") text = gsub(text, "(" .. v_Latn .. ")dho([bp])(" .. v_Latn .. ")", "%1dh%2%3")

text = gsub(text, "(" .. v_Latn .. ")go([bpr])(" .. v_Latn .. ")", "%1g%2%3")

text = gsub(text, "(" .. v_Latn .. ")jo([bpr])(" .. v_Latn .. ")", "%1j%2%3") text = gsub(text, "(" .. v_Latn .. ")ko([bmprsśtṭ])(" .. v_Latn .. ")", "%1k%2%3") text = gsub(text, "(" .. v_Latn .. ")kho([bmpt])(" .. v_Latn .. ")", "%1kh%2%3") text = gsub(text, "(" .. v_Latn .. ")lo([bdp]h?)(" .. v_Latn .. ")", "%1l%2%3") text = gsub(text, "(" .. v_Latn .. ")lo([dp]v)(" .. v_Latn .. ")", "%1l%2%3")

text = gsub(text, "(" .. v_Latn .. ")mo([bckprṛ])(" .. v_Latn .. ")", "%1m%2%3") text = gsub(text, "(" .. v_Latn .. ")no([bcglpṭ]?)(" .. v_Latn .. ")", "%1n%2%3") text = gsub(text, "(" .. v_Latn .. ")ṅo([blmp]h?)(" .. v_Latn .. ")", "%1ṅ%2%3") text = gsub(text, "(" .. v_Latn .. ")po([bcp])(" .. v_Latn .. ")", "%1p%2%3") text = gsub(text, "(" .. v_Latn .. ")pho([bdjmtpz]?)(" .. v_Latn .. ")", "%1ph%2%3")

text = gsub(text, "(" .. v_Latn .. ")ro([bcdghjklsṣś]h?)(" .. v_Latn .. ")", "%1r%2%3") text = gsub(text, "(" .. v_Latn .. ")ṣo([bjlmp])(" .. v_Latn .. ")", "%1ṣ%2%3") text = gsub(text, "(" .. v_Latn .. ")śo([bgjlmp])(" .. v_Latn .. ")", "%1ś%2%3") text = gsub(text, "(" .. v_Latn .. ")so([bjlmp])(" .. v_Latn .. ")", "%1s%2%3") text = gsub(text, "(" .. v_Latn .. ")ṭo([bgkp])(" .. v_Latn .. ")", "%1ṭ%2%3")

text = gsub(text, "(" .. v_Latn .. ")ẏo([j])(" .. v_Latn .. ")", "%1ẏ%2%3")

-- exceptional text = gsub(text, "([cr])ch$", "%1cho") text = gsub(text, "([cr])ch ", "%1cho ") text = gsub(text, "([cr])ch(" .. QO .. ")", "%1cho%2")

text = gsub(text, "apon(" .. v_Latn .. ")", "apn%1") text = gsub(text, "arbi", "arobi")

text = gsub(text, "goñjo$", "gonj") text = gsub(text, "goñjo ", "gonj ") text = gsub(text, "got", "goto")

text = gsub(text, "hojjo", "hojj")

text = gsub(text, "ikta$", "ikota") text = gsub(text, "ikta ", "ikota ")

text = gsub(text, "iẏ$", "iẏo") text = gsub(text, "iẏ ", "iẏo ")

text = gsub(text, "ken$", "keno") text = gsub(text, "ken ", "keno ") text = gsub(text, "ken(" .. QO .. ")", "keno%1")

text = gsub(text, "korob", "korbo")

text = gsub(text, "sṭo$", "sṭ") text = gsub(text, "sṭo ", "sṭ ")

text = gsub(text, "ajon(" .. v_Latn .. ")", "ajn") text = gsub(text, "(" .. v_Latn .. ")koṭr(" .. v_Latn .. ")", "%1kṭr%2") text = gsub(text, "(" .. v_Latn .. ")khost(" .. v_Latn .. ")", "%1khst%2") text = gsub(text, "(" .. v_Latn .. ")jost(" .. v_Latn .. ")", "%1jst%2") text = gsub(text, "(" .. v_Latn .. ")no(" .. c_Latn .. "h?)(" .. c_Latn .. "h?)(" .. v_Latn .. ")", "%1n%2%3%4") text = gsub(text, "(" .. v_Latn .. ")rkoṭ(" .. v_Latn .. ")", "%1rkṭ%2") text = gsub(text, "(" .. v_Latn .. ")ṣdh(" .. v_Latn .. ")", "%1ṣodh%2") text = gsub(text, "(" .. v_Latn .. ")sm(" .. v_Latn .. ")", "%1śom%2")

text = gsub(text, "^up(" .. c_Latn .. ")", "upo%1") text = gsub(text, " up(" .. c_Latn .. ")", " upo%1")

-- qualifiers text = gsub(text, "(" .. c_Latn .. ")oṭa$", "%1ṭa") text = gsub(text, "(" .. c_Latn .. ")oṭa ", "%1ṭa ") text = gsub(text, "(" .. c_Latn .. ")oṭi$", "%1ṭi") text = gsub(text, "(" .. c_Latn .. ")oṭi ", "%1ṭi ")

-- Cv	text = gsub(text, "([bgmr])v", "%1b") text = gsub(text, "udv", "udb") text = gsub(text, "ttv", "tt") text = gsub(text, "^sv", "ś")                          -- initial text = gsub(text, "([sś])v", "śś")                   -- medial

text = gsub(text, "^(" .. consonants_no_h .. "h?)v", "%1") -- initial text = gsub(text, " (" .. consonants_no_h .. "h?)v", " %1") -- initial text = gsub(text, "([lṅ])(" .. consonants_no_h .. "h?)v", "%1%2") text = gsub(text, "(" .. consonants_no_h .. ")v", "%1%1") -- medial text = gsub(text, "(" .. consonants_no_h .. ")hv", "%1%1h") -- medial_h

--ahv, ihv text = gsub(text, "ahv", "aubh") text = gsub(text, "ihv", "iubh")

text = gsub(text, "hv", "hb")

-- kṣ	text = gsub(text, "^kṣ", "kh") -- initial text = gsub(text, " kṣ", " kh") -- initial text = gsub(text, "ṅkṣ", "ṅkh") -- after_ṅ text = gsub(text, "kṣ", "kkh") -- medial text = gsub(text, "kkhṃ", "kkh") -- before_ṃ

-- sm text = gsub(text, "^([ṣs])ṃ(" .. v_Latn .. ")", "ś%2̃") -- initial text = gsub(text, "([ṣs])ṃ(" .. v_Latn .. ")", "śś%2̃") -- medial

-- tm	text = gsub(text, "^tṃ", "t") -- initial text = gsub(text, "tṃ", "tt") -- medial

text = gsub(text, "ṃ", "m") text = gsub(text, "ṣ", "ś")

-- visarga deletion text = gsub(text, "ḥkh", "kkh")

-- foreign conjuncts text = gsub(text, "([ln])ḍo$", "%1ḍ") text = gsub(text, "([ln])nḍo ", "%1ḍ ")

text = gsub(text, "rko$", "rk") text = gsub(text, "rko ", "rk ")

text = gsub(text, "(" .. v_Latn .. ")h$", "%1ho") text = gsub(text, "(" .. v_Latn .. ")h ", "%1ho ")

text = gsub(text, "([glś])aho$", "%1ah") text = gsub(text, "([glś])aho ", "%1ah ")

text = gsub(text, "ṇn", "ṇon") text = gsub(text, "ṇ", "n")

text = gsub(text, "^eya", "ê") text = gsub(text, " eya", " ê") text = gsub(text, "^oya", "ê") text = gsub(text, " oya", " ê")

text = gsub(text, "^(" .. consonants_no_h .. "h?)ya", "%1ê")                            -- initial text = gsub(text, " (" .. consonants_no_h .. "h?)ya", " %1ê")                           -- initial text = gsub(text, "^(" .. consonants_no_h .. "h?)(" .. consonants_no_h .. "h?)ya", "%1%2ê") -- initial_double text = gsub(text, " (" .. consonants_no_h .. "h?)(" .. consonants_no_h .. "h?)ya", " %1%2ê") -- initial_double text = gsub(text, "^hya", "hê")                                                         -- h_initial text = gsub(text, "yal$", "êl")                                                         -- final_l

text = gsub(text, "^jñan", "gên")                                                      -- jñan_initial text = gsub(text, " jñan", " gên")                                                     -- jñan_initial text = gsub(text, "jñan", "ggên")                                                      -- jñan_medial

text = gsub(text, "ñ", "n")

text = gsub(text, "yanḍ", "ênḍ")

text = gsub(text, "^(" .. consonants_no_h .. "h?)yo", "%1ê") -- initial text = gsub(text, " (" .. consonants_no_h .. "h?)yo", " %1ê") -- initial

-- Cy text = gsub(text, "^(" .. consonants_no_h .. "h?)y", "%1") -- initial text = gsub(text, "ṅ(" .. consonants_no_h .. "h?)y", "ṅ%1") text = gsub(text, "(" .. consonants_no_h .. ")y", "%1%1") -- medial text = gsub(text, "(" .. consonants_no_h .. ")hy", "%1%1h") -- medial_h

-- hy	text = gsub(text, "^hy", "h") -- initial text = gsub(text, " hy", " h") -- initial text = gsub(text, "hy", "jjh") -- medial

-- ry	text = gsub(text, "ry", "rj")

text = gsub(text, "ẏo([gklmn])([aeiīōuū])", "ẏ%1%2") text = gsub(text, "ẏoō", "ẏō") text = gsub(text, "oō$", "ō")

text = gsub(text, "([ei])ẏ([" .. consonant .. "])", "%1ẏo%2") text = gsub(text, "([ei])ẏ$", "%1ẏo")

-- rules for changing s to ś (applicable for native words only) text = gsub(text, "s(" .. v_Latn .. ")$", "ś%1") -- final text = gsub(text, "s(" .. v_Latn .. ") ", "ś%1 ") -- final text = gsub(text, "s(" .. v_Latn .. ")", "ŝ%1") -- medial

text = gsub(text, "([ai])s$", "%1ś") text = gsub(text, "([ai])s ", "%1ś ")

text = gsub(text, "os$", "oŝ") text = gsub(text, "os ", "oŝ ")

text = gsub(text, "^(" .. c_Latn .. ")oŝ$", "%1os") text = gsub(text, " (" .. c_Latn .. ")oŝ$", " %1os") text = gsub(text, "^(" .. c_Latn .. ")oŝ ", "%1os ")

text = gsub(text, "^ŝe(" .. c_Latn .. ")$", "^se%1") text = gsub(text, " ŝe(" .. c_Latn .. ")$", " se%1") text = gsub(text, "^ŝe(" .. c_Latn .. ") ", "^se%1 ") text = gsub(text, " ŝe(" .. c_Latn .. ") ", " se%1 ")

text = gsub(text, "ŝalam", "salam")

text = gsub(text, "ŝ", "ś")

text = gsub(text, "śl", "sl") text = gsub(text, "śr", "sr") text = gsub(text, "sp", "śp") text = gsub(text, "^śp", "sp") text = gsub(text, " śp", " sp")

text = gsub(text, "śṭh$", "śṭho")

text = gsub(text, "^([kg]h?)([dḍtṭ])", "%1o%2") text = gsub(text, "^(" .. c_Latn .. ")([aou])b$", "%1bo") text = gsub(text, "^(" .. c_Latn .. ")([aou])b ", "%1bo ")

text = gsub(text, "^([bcdḍghjkmṃnṇprsśṣtṭẇẏ])([aou])bh$", "%1%2bho") text = gsub(text, "^([bcdḍghjkmṃnṇprsśṣtṭẇẏ])([aou])bh ", "%1%2bho ")

text = gsub(text, "lona$", "lna") text = gsub(text, "nola$", "nla")

text = gsub(text, "ōẏ", "ōẇ") text = gsub(text, "ō̃ẏ", "ō̃ẇ")

text = gsub(text, "ōẇ$", "ōẏ") text = gsub(text, "ōẇ ", "ōẏ ")

text = gsub(text, "oo", "o")

if match(text, "[ঁ-৽]") and mode ~= "debug" then return nil else return toNFC(text) end end

return export