Module:User:Sameerhameedy/fa-IPA/romanize

local export = {}

local lang = require("Module:languages").getByCode("fa-cls") --transliteration must be clasical

local U = mw.ustring.char local consonants = "bptTṭjčhxdDðḍrzžsšʔʾğGfqkglmnŋhɦwvy'" local vowels = "aiuāīūüēō" local consonant = "[^" .. vowels .. ". -]" local vowel = "[" .. vowels .. "]" local pitchaccent = U(0x301) local rsplit = mw.text.split local rsubn = mw.ustring.gsub local ulen = mw.ustring.len local hyphen = U(0x02D) local devoice = U(0x325) local dtack = U(0x31E) local gstop = U(0x027) local dental = U(0x32A)

local function transliterate(text, sc) return require("Module:fa-cls-translit").tr(text) end

local function fix_romanization(text, sc, options) -- common fixes if type(text) == "table" then options = {} text, script = text.args[1], text.args[2] end -- xwV clusters text = rsubn(text, "xw", "xW") if not sc then sc = require("Module:languages").getByCode("fa"):findBestScript(text):getCode end if sc == "fa-Arab" then text = transliterate(text, sc) end text = rsubn(text, "xw", "xʷ") text = rsubn(text, "W", "w") text = rsubn(text, " | ", "# | #") text = rsubn(text, "[,]".. " ", ",") text = rsubn(text, "[,]", "#,#") text = rsubn(text, " ", "# #") text = rsubn(text, "^", "#") text = rsubn(text, "$", "#") text = rsubn(text, "ˈ", "`") text = rsubn(text, "`([" .. consonants .. "])([ʷ]?)([" .. vowels .. "])", "%1%2%3" .. pitchaccent .. "") text = rsubn(text, "`([" .. vowels .. "])", "%1" .. pitchaccent .. "") text = rsubn(text, "([" .. dental .. devoice .. dtack .. "ʰ])", "") text = rsubn(text, "([ɴŋ])", "n") text = rsubn(text, "e", "ē") text = rsubn(text, "o", "ō") text = rsubn(text, "G", "ğ") text = rsubn(text, "ḍ", "z") text = rsubn(text, "ṭ", "t") text = rsubn(text, "ṯ", "s") text = rsubn(text, "ṣ", "s") text = rsubn(text, "ḥ", "h") text = rsubn(text, "v", "w") return text end

function export.romanize_fa_cls(text, sc, options) text = fix_romanization(text) --ensure vowels are paired to a consonant text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")	text = rsubn(text, "([.])", "") text = rsubn(text, "([" .. vowels .. "])([dḍ])", "%1ḏ") text = rsubn(text, "([" .. vowels .. "](%-?))b", "%1ḇ") text = rsubn(text, "ḏ", "ḏ") text = rsubn(text, "ḏd", "ḏḏ") text = rsubn(text, "ḇb", "ḇḇ") -- remove Hazaragi retroflexes text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") text = rsubn(text, "ɖ", "d") text = rsubn(text, "ʈ", "t") text = rsubn(text, "#(['])", "") text = rsubn(text, "#", "") return text end

function export.romanize_prs(text, sc, options) text = fix_romanization(text) text = rsubn(text, "i((" .. pitchaccent .. "?)['h])", "e%1") text = rsubn(text, "u((" .. pitchaccent .. "?)['h])", "o%1") -- Replace xw clusters text = rsubn(text, "xw([āē])", "x%1") text = rsubn(text, "xwa", "xu") text = rsubn(text, "a%-yi", "e") text = rsubn(text, "a%-i", "e") -- for rare exceptions text = rsubn(text, "ʷ", "w") --ensure vowels are paired to a consonant text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")	text = rsubn(text, "([.])", "") -- THIS SHOULD ONLY BE DONE FOR HAZARAGI RETROFLEX ENTRIES -- THEY SHOULD NEVER APPEAR IN A MAIN ENTRY text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") text = rsubn(text, "ɖ", "d") text = rsubn(text, "ʈ", "t") text = rsubn(text, "ḏ", "z") -- remove unnecessary marks text = rsubn(text, "#", "") return text end

function export.romanize_ira(text, sc, options) text = fix_romanization(text) -- Replace xw clusters text = rsubn(text, "ʷ", "w") text = rsubn(text, "xw([āē])", "x%1") text = rsubn(text, "xwa", "xu") text = rsubn(text, "w(" .. vowel .. ")", "v%1") text = rsubn(text, "w(" .. consonant .. ")", "w%1") text = rsubn(text, "([iuāīūüēō])w", "%1v") text = rsubn(text, "v%(w", "v(v") text = rsubn(text, "(" .. consonant .. ")w#", "%1v#") text = rsubn(text, "wv", "vv") text = rsubn(text, "wæ", "væ") --ensure vowels are paired to a consonant text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")	text = rsubn(text, "([.])", "") text = rsubn(text, "iy", "īy") text = rsubn(text, "ayy", "Ayy") -- Replace diphthong text = rsubn(		text,		"a([wy])",		function(semivowel, position)			local consonant = mw.ustring.sub(text, position, position)			if consonant == "" or consonant:find(consonant) then				if semivowel == "w" then					return "uw"				else					return "iy"				end			end		end	) text = rsubn(text, "A", "a") text = rsubn(text, "q", "ğ") text = rsubn(text, "ā", "â") text = rsubn(text, "u", "o") text = rsubn(text, "i", "e") -- remove Hazaragi retroflexes text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") text = rsubn(text, "ɖ", "d") text = rsubn(text, "ʈ", "t") text = rsubn(text, "ḏ", "z") -- IP does not have vowel length text = rsubn(text, "([ēī])", "i") text = rsubn(text, "([ūō])", "u") -- terminal w is only possible in a dipthong text = rsubn(text, "([o]0)w#", "v#") text = rsubn(text, "a#", "e#") text = rsubn(text, "a%-", "e-") text = rsubn(text, "æ", "a") text = rsubn(text, "#(['])", "") text = rsubn(text, "#", "") return text end

function export.romanize_tg(text, sc, options) text = fix_romanization(text) text = rsubn(text, "i(['h])", "ē%1") text = rsubn(text, "u(['h])", "ō%1") text = rsubn(text, "w", "v") text = rsubn(text, "ʷ", "v") -- Replace xw clusters text = rsubn(text, "xv([āē])", "x%1") text = rsubn(text, "xva", "xu") --ensure vowels are paired to a consonant text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")	text = rsubn(text, "([.])", "") text = rsubn(text, "(['])", "ʾ") text = rsubn(text, "ğ", "ġ") text = rsubn(text, "ē", "e") text = rsubn(text, "ō", "ü") text = rsubn(text, "ā", "o") -- remove Hazaragi retroflexes text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") text = rsubn(text, "ɖ", "d") text = rsubn(text, "ʈ", "t") text = rsubn(text, "ḏ", "z") -- Tajik does not have vowel length text = rsubn(text, "([iī])", "i") text = rsubn(text, "`([" .. consonants .. "])i#", "%1ī#") text = rsubn(text, "([ūu])", "u") text = rsubn(text, "`", "") text = rsubn(text, "ˈ", "") text = rsubn(text, "#([ʾ])", "") text = rsubn(text, "#", "") text = rsubn(text, "([" .. vowels .. "])%-i", "%1yi") text = rsubn(text, "%-i", "i") return text end

--- regional/colloquial varieties local function remove_glottal_c(text) --only for regional dialects --completely delete GC if both vowels are the either the same or similar text = rsubn(text, "([" .. consonants .. "])([uū])(" .. pitchaccent .. "?)([h'])([uū])", "`%1ū") text = rsubn(text, "([" .. consonants .. "])([iī])(" .. pitchaccent .. "?)([h'])([iī])", "`%1ī") text = rsubn(text, "([" .. consonants .. "])([aā])(" .. pitchaccent .. "?)([h'])([aā])", "`%1ā") -- remove glottal consonants with appropriate glide text = rsubn(text, "([" .. vowels .. "](" .. pitchaccent .. "?))%-([īēi])#", "%1-y%3") text = rsubn(text, "([" .. vowels .. "](" .. pitchaccent .. "?))%-([uūō])#", "%1-y%3") text = rsubn(text, "(" .. consonant .. ")%-(" .. vowel .. ")", "%1'%2")	--ensure vowels are paired to a consonant text = rsubn(text, "([aā](" .. pitchaccent .. "?))([hɦ'])([uū])", "%1w%4") text = rsubn(text, "([iī])(" .. pitchaccent .. "?)([hɦ'])([auāēōū])", "i%2y%4") text = rsubn(text, "([auāēōū](" .. pitchaccent .. "?))([hɦ'])([iīē])", "%1y%4") text = rsubn(text, "([ē](" .. pitchaccent .. "?))([hɦ'])([auāēōū])", "%1y%4") text = rsubn(text, "([uū])(" .. pitchaccent .. "?)([hɦ'])([aāiīēō])", "u%2w%4") text = rsubn(text, "([ō](" .. pitchaccent .. "?))([hɦ'])([aāiīēō])", "%1w%4") text = rsubn(text, "([" .. consonants .. "])([h\'])", "%1%1") --Else, turn GC into majhul long vowels text = rsubn(text, "([aā])((%.?)[h'])", "ā") text = rsubn(text, "([iī])((%.?)['])", "ē") text = rsubn(text, "([uū])((%.?)['])", "ō")

text = rsubn(text, "(['h])", "") -- lastly, remove all remaning GC	return text end

function export.romanize_haz(text, sc, options) text = fix_romanization(text) -- Replace xw clusters text = rsubn(text, "xw([āē])", "x%1") text = rsubn(text, "xwa", "xu") text = rsubn(text, "(" .. vowel .. ")%-([īē])", "%1-y%2") text = rsubn(text, "a%-yi", "I") text = rsubn(text, "a%-i", "I") text = rsubn(text, "%-i#", "-I#") text = rsubn(text, "%-i%-", "I-") -- for rare exceptions text = rsubn(text, "ʷ", "w") --ensure vowels are paired to a consonant text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")	text = rsubn(text, "([.])", "") -- THIS SHOULD ONLY BE DONE FOR HAZARAGI RETROFLEX ENTRIES -- THEY SHOULD NEVER APPEAR IN A MAIN ENTRY text = rsubn(text, "D", "ḍ") text = rsubn(text, "T", "ṭ") text = rsubn(text, "ɖ", "ḍ") text = rsubn(text, "ʈ", "ṭ") text = rsubn(text, "ḏ", "z") --Vowel Harmony text = rsubn(text, "ē(" .. pitchaccent .. "?)(" .. consonant .. ")([ūiī])", "%3%1%2%3") text = rsubn(text, "ē(" .. pitchaccent .. "?)(" .. consonant .. ")(" .. consonant .. ")([ī])", "%4%1%2%3%4") text = rsubn(text, "i(" .. pitchaccent .. "?)(" .. consonant .. ")([ouū])", "%3%1%2%3") text = rsubn(text, "ī(" .. pitchaccent .. "?)(" .. consonant .. ")([ēōuūiī])", "%3%1%2%3") text = rsubn(text, "ō(" .. pitchaccent .. "?)(" .. consonant .. ")([uū])", "%3%1%2%3") text = rsubn(text, "ō(" .. pitchaccent .. "?)(" .. consonant .. ")([i])", "u%1%2%3") text = rsubn(text, "ō(" .. pitchaccent .. "?)(" .. consonant .. ")([ī])", "ū%1%2%3") text = remove_glottal_c(text) text = rsubn(text, "i", "ī") text = rsubn(text, "u", "ū") text = rsubn(text, "I", "i") -- remove unnecessary marks text = rsubn(text, "#(['])", "") text = rsubn(text, "#", "") return text end

function export.romanize_kbl(text, sc, options) text = fix_romanization(text) -- Replace xw clusters text = rsubn(text, "xw([āē])", "x%1") text = rsubn(text, "xwa", "xu") text = rsubn(text, "(" .. vowel .. ")%-ī", "%1-yī") text = rsubn(text, "a%-yi", "i") text = rsubn(text, "a%-i", "i") text = rsubn(text, "i(" .. pitchaccent .. "?)#", "I#") text = rsubn(text, "i(" .. pitchaccent .. "?)%-#", "I-#") -- for rare exceptions text = rsubn(text, "ʷ", "w") --ensure vowels are paired to a consonant text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")	text = rsubn(text, "([.])", "") -- THIS SHOULD ONLY BE DONE FOR HAZARAGI RETROFLEX ENTRIES -- THEY SHOULD NEVER APPEAR IN A MAIN ENTRY text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") text = rsubn(text, "ɖ", "d") text = rsubn(text, "ʈ", "t") text = rsubn(text, "ḏ", "z") text = remove_glottal_c(text) text = rsubn(text, "I", "i") return text end

return export