Module:abq-translit

local m_str_utils = require("Module:string utilities")

local gmatch = m_str_utils.gmatch local gsub = m_str_utils.gsub local lower = m_str_utils.lower local toNFC = mw.ustring.toNFC local toNFD = mw.ustring.toNFD local u = m_str_utils.char

local GRAVE, ACUTE, CIRC, CARON, DOTBELOW = u(0x300), u(0x301), u(0x302), u(0x30C), u(0x323)

local export = {}

local tt = { ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "x", ["ц"] = "c", ["ч"] = "ć", ["ш"] = "š", ["щ"] = "ś", ["ъ"] = "ʔ", ["ы"] = "ə", ["ь"] = "ʲ", ["э"] = "e", ["ю"] = "ju", ["я"] = "ja", ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Jo", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "X", ["Ц"] = "C", ["Ч"] = "Ć", ["Ш"] = "Š", ["Щ"] = "Ś", ["Ъ"] = "Ɂ", ["Ы"] = "Ə", ["Ь"] = "ʲ", ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja" }

local digraphs = { ["гъ"] = "ɣ", ["гӏ"] = "ʻ", ["жь"] = "ź", ["къ"] = "qʼ", ["кӏ"] = "kʼ", ["пӏ"] = "pʼ", ["тл"] = "tˡ", ["тӏ"] = "tʼ", ["фӏ"] = "fʼ", ["хъ"] = "q", ["хӏ"] = "ḥ", ["цӏ"] = "cʼ", ["чӏ"] = "ćʼ", ["шӏ"] = "čʼ", ["Гъ"] = "Ɣ", ["Гӏ"] = "ʻ", ["Жь"] = "Ź", ["Къ"] = "Qʼ", ["Кӏ"] = "Kʼ", ["Пӏ"] = "Pʼ", ["Тл"] = "Tˡ", ["Тӏ"] = "Tʼ", ["Фӏ"] = "Fʼ", ["Хъ"] = "Q", ["Хӏ"] = "Ḥ", ["Цӏ"] = "Cʼ", ["Чӏ"] = "Ćʼ", ["Шӏ"] = "Čʼ" }

-- Prevents overlapping substitutions. local digraphs2 = { ["ль"] = "lᶻ", ["лӏ"] = "lˢʼ", ["Ль"] = "Lᶻ", ["Лӏ"] = "Lˢʼ" }

function export.tr(text, lang, sc) -- Convert uppercase palochka to lowercase, along with any "false" palochkas (entered as Latin "I" or "l", or Cyrillic "І"). Lowercase palochka is found in tables above. text = gsub(text, "[IlІӀ]", "ӏ") -- Contextual substitution of "j" before "е", "w" for "у" and ʷ for "в". text = gsub(text, "^е", "jе") text = gsub(text, "^Е", "Jе") text = gsub(text, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ%s%p])е", "%1jе") text = gsub(text, "([%s%p])Е", "%1Jе") text = gsub(text, "у([аиоуыэ])", "w%1") text = gsub(text, "У([аиоуыэ])", "W%1") text = gsub(text, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ])у", "%1w") text = gsub(text, "([бгджзклмнпрстфхцчшщъьӏБГДЖЗКЛМНПРСТФХЦЧШЩЪЬӀ])в", "%1ʷ") for digraph, replacement in pairs(digraphs) do		text = gsub(text, digraph, replacement) end for digraph, replacement in pairs(digraphs2) do		text = gsub(text, digraph, replacement) end text = gsub(text, ".", tt) -- Reposition apostrophes then decompose. text = toNFD(gsub(gsub(text, "ʼʲ", "ʲʼ"), "ʼʷ", "ʷʼ")) -- When double letters both have a modifier letter and/or an apostrophe, only show on the second for readability purposes. for letter in gmatch("abcdefghijklmnopqrstuvxzəɣʔABCDEFGHIJKLMNOPQRSTUVXZƏƔɁ", ".") do text = gsub(text, letter .. "([" .. GRAVE .. ACUTE .. CIRC .. CARON .. DOTBELOW .. "]?)([ʲˡˢʷᶻ]?[ʲʷ]?ʼ?)" .. lower(letter) .. "%1%2", letter .. "%1" .. lower(letter) .. "%1%2") end -- Remove consecutive j/ʲ and w/ʷ then recompose. return toNFC(gsub(gsub(text, "ʲ?([Jj])ʲ?", "%1"), "ʷ?([Ww])ʷ?", "%1")) end

return export