Module:fa-IPA

--[=[

FIXME:

1. (ir) q and ğ should both be ɣ intervocally (āqā should give ɒːɣɒː) 2. (prs) disable auto lowering of long vowels before /h/ & /ʔ/, (causes too many issues). 3. (tg, prs) change /q/ to /ɢ/ before a voiced consonant 4. (cls) prevent the appearance of β after a final consonant (e.g -atb should not give β) 5. (cls) fix geminated β and ð should be a normal b and d 6. FIXED // (ir) محوطه gives [mo.ɦav.væ.t̪ʰé] instead of [mo.ɦæv.væ.t̪ʰé] 7. (all except cls) final geminates such as خط should transcribe as [xat(ː)] 8. (ir) add support for Shirazi dialect

]=]

local export = {}

local m_str_utils = require("Module:string utilities")

local U = m_str_utils.char local lang = require("Module:languages").getByCode("fa") local m_IPA = require("Module:IPA") local m_table = require("Module:table") local m_qual = require("Module:qualifier") local all_consonants = "bptTjčhxdDðrzžsšʔʾğGfqkglmnŋhɦwvy'" --needed for syllables local stop_cons = "bptTjčdDðqkg" local non_stopc = "hxrzžsšʔğGflmhɦwvy'"

local rsplit = m_str_utils.split local rsubn = m_str_utils.gsub local toNFC = mw.ustring.toNFC local ulen = m_str_utils.len local usub = m_str_utils.sub local pitchaccent = U(0x301) local devoice = U(0x325) local dtack = U(0x31E) local gstop = U(0x027) local dental = U(0x32A)

-- version of rsubn that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end

export.all_styles = {"cls", "prs", "kbl", "haz", "fa", "teh", "tg"} export.all_style_groups = { all = export.all_styles, cls = {"cls"}, dari = {"prs", "kbl", "haz"}, ir = {"fa", "teh"}, tg = {"tg"} }

export.all_style_descs = { cls = "Classical Persian", prs = "Dari Persian", kabul = "Kabuli", haz = "Hazaragi", fa = "Iranian Persian", teh = "Tehrani", tg = "Tajik" }

local function flatmap(items, fun) local new = {} for _, item in ipairs(items) do		local results = fun(item) for _, result in ipairs(results) do			table.insert(new, result) end end return new end

local common_consonants = { ["j"] = "d͡ʒ", ["'"] = "ʔ", ["ḍ"] = "z", ["D"] = "d", --these are here for Hazaragi ["T"] = "t", --they are retroflexes in haz ["ğ"] = "ɣ", ["G"] = "ɣ", ["ḥ"] = "h", ["r"] = "ɾ", ["ṣ"] = "s", ["š"] = "ʃ", ["ṯ"] = "s", ["ṭ"] = "t", ["y"] = "j", ["ž"] = "ʒ", ["ẓ"] = "z", ["č"] = "t͡ʃ", ["g"] = "ɡ", ["`"] = "ˈ" }

local iranian_persian_short_vowels = {["a"] = "æ", ["i"] = "e", ["u"] = "o"}

local iranian_persian_long_vowels = { ["ā"] = "ɒː", ["ī"] = "iː", ["ū"] = "uː", ["ō"] = "uː", ["ē"] = "iː" }

local iranian_persian_consonants = {["ḏ"] = "z", ["q"] = "ɢ", ["ğ"] = "ɢ", ["k"] = "c", ["g"] = "ɟ"}

local dari_persian_short_vowels = {["a"] = "ä", ["i"] = "ɪ", ["u"] = "ʊ"}

local dari_persian_long_vowels = { ["ā"] = "ɑː", ["ī"] = "iː", ["ū"] = "uː", ["ō"] = "oː", ["ē"] = "eː" }

local dari_persian_consonants = {["ḏ"] = "z", ["v"] = "w"}

local tajik_short_vowels = {["a"] = "ä", ["i"] = "i", ["u"] = "u"}

local tajik_long_vowels = { ["ā"] = "ɔ", ["ī"] = "i", ["ū"] = "u", ["ō"] = "ɵ", ["ē"] = "e" }

local tajik_vowels = "aieuɵɔ"

local tajik_consonants = {["ḏ"] = "z", ["ɣ"] = "ʁ", ["x"] = "χ"}

local classical_persian_short_vowels = {["a"] = "a", ["i"] = "i", ["u"] = "u"}

local classical_persian_long_vowels = { ["ā"] = "ɑː", ["ī"] = "iː", ["ū"] = "uː", ["ō"] = "oː", ["ē"] = "eː" }

local classical_persian_consonants = {["ḏ"] = "ð", ["v"] = "w"}

local vowels_minus_a = "iuāīūüēōːʷ" local vowels = "aiuāīūüēōːʷ" --ʷ and ː are counted as vowels to prevent them from being put in the next syllable local consonant = "[^" .. vowels .. ". -]" local vowel = "[" .. vowels .. "]" local syllabify_pattern = "(" .. vowel .. ")(" .. consonant .. ")(" .. consonant .. "?)(" .. vowel .. ")"

local function syllabify(text) text = rsubn(text, "%-(" .. consonant .. ")%-(" .. consonant .. ")", "%1.%2")	text = rsubn(text, "([" .. all_consonants .. vowels .. "])`", "%1.`")

-- Add syllable breaks. for _ = 1, 2 do		text = rsubn(			text,			syllabify_pattern,			function(a, b, c, d)				if c == "" and b ~= "" then					c, b = b, ""				end

return a .. b .. "." .. c .. d			end )	end

-- syllable boundry consonants text = rsubn(		text,		"([" .. non_stopc .. "])([" .. stop_cons .. "])([" .. non_stopc .. "]+ʷ?)([" .. all_consonants .. vowels .. "])",		"%1%2.%3%4"	)	text = rsubn(text, "([" .. all_consonants .. "])([" .. all_consonants .. "])([" .. all_consonants .. "])", "%1%2.%3")	-- ALL syllables are CV- so vowels NEED an intial consonant (ʔ) text = rsubn(text, "([" .. all_consonants .. "])([.])i#", "%1%2i#") --exclude izafa/ezafe text = rsubn(text, "([.])([" .. vowels .. "])", "%1ʔ%2") text = rsubn(text, "#([" .. vowels .. "])", "#ʔ%1")

return text end

local function remove_glottal_c(text) --only for regional dialects -- remove glottal consonants with appropriate glide text = rsubn(text, "([aā](%.?))([hɦ'])([uū])", "%1w%4") text = rsubn(text, "([iī])(%.?)([hɦ'])([auāēōū])", "i%2y%4") text = rsubn(text, "([auāēōū](%.?))([hɦ'])([iīē])", "%1y%4") text = rsubn(text, "([ē](%.?))([hɦ'])([auāēōū])", "%1y%4") text = rsubn(text, "([uū])(%.?)([hɦ'])([aāiīēō])", "u%2w%4") text = rsubn(text, "([ō](%.?))([hɦ'])([aāiīēō])", "%1w%4") --completely delete GC if both vowels are the either the same or similar text = rsubn(text, "([" .. all_consonants .. "])([uū])`([h'])([uū])", "`%1ū") text = rsubn(text, "([" .. all_consonants .. "])([iī])`([h'])([iī])", "`%1ī") text = rsubn(text, "([" .. all_consonants .. "])([aā])`([h'])([aā])", "`%1ā") --Else, turn GC into majhul long vowels text = rsubn(text, "([aā])((%.?)[h'])", "ā") text = rsubn(text, "([iī])((%.?)['])", "ē") text = rsubn(text, "([uū])((%.?)['])", "ō")

text = rsubn(text, "(['h])", "") -- lastly, remove all remaning GC	return text end

local function con_assimilation(text) --DONT USE THIS ON CLASSICAL -- assimilation/placement of certain consonants text = rsubn(text, "l((%.?)[ʈɖ])", "ɭ%1") --retroflexes are only in hazaragi text = rsubn(text, "([nl])((%.?)[td])", "%1" .. dental .. "%2") text = rsubn(text, "n((%.?)[ʈɖ])", "ɳ%1") text = rsubn(text, "([td])", "%1" .. dental .. "") text = rsubn(text, "n((%.?)[kg])", "ŋ%1") text = rsubn(text, "n((%.?)[cɟy])", "ɲ%1") text = rsubn(text, "n((%.?)[bp])", "m%1") text = rsubn(text, "n((%.?)[qɢ])", "ɴ%1") text = rsubn(text, "([nm])((%.?)[fv])", "ɱ%2") text = rsubn(text, "([āʌɑɒäæeēīioɔōuūʊɪ](%" .. dtack .. "?)(%" .. pitchaccent .. "?)(%ː?)(%.?))([h])", "%1ɦ") text = rsubn(text, "r([tdszšlž])", "ɹ%1") -- formally, f only assimiates in the same syllable text = rsubn(text, "f([bjdžğ])", "v%1") text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r") text = rsubn(text, "ä(" .. pitchaccent .. ")", "æ%1") text = rsubn(text, "([ɦ])#", "ʱ#") text = rsubn(text, "([h])#", "ʰ#") return text end

function export.fa_IPA(text) text = rsubn(text, "a[-]([" .. all_consonants .. "])[-]", "a%1") text = rsubn(text, "a%-", "e-") text = rsubn(text, "ˈ", "`") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "%-i#", "i#") text = rsubn(text, "[-]([" .. vowels .. all_consonants .. "])[-]", "%1")	text = rsubn(text, "[-]", ".") text = rsubn(text, "v", "w") -- Replace xwa with xu	text = rsubn(text, "xwa", "xu") -- Replace xwā with xā	text = rsubn(text, "xwā", "xā") -- Replace xwē with xē	text = rsubn(text, "xwē", "xē")

text = rsubn(text, "w(" .. vowel .. ")", "v%1") text = rsubn(text, "w(" .. consonant .. ")", "w%1") text = rsubn(text, "([" .. vowels_minus_a .. "])w", "%1v") text = rsubn(text, "(" .. consonant .. ")w#", "%1v#") text = rsubn(text, "v%(w", "v(v") -- Replace diphthong text = rsubn(		text,		"a([wy])",		function(semivowel, position)			local consonant = usub(text, position, position)			if consonant == "" or consonant:find(consonant) then				if semivowel == "w" then					return "uw"				else					return "ey"				end			end		end	) --automatically denote syllables text = syllabify(text) -- then do pitch accent mark text = rsubn(text, "`([" .. all_consonants .. "])([" .. vowels .. "])", "%1%2" .. pitchaccent .. "") text = rsubn(text, "([iī])(" .. pitchaccent .. "?)(%.?)y", "E%2%3y") text = rsubn(text, "([ptkč](%" .. dental .. "?))([" .. vowels .. "])", "%1ʰ%3") text = rsubn(text, "([" .. vowels .. "](%.?))q", "%1ʁ") -- Replace final a with e (can be overwritten by entering æ) text = rsubn(text, "([a])(%" .. pitchaccent .. "?)#", "e%2#") -- Replace short vowels text = rsubn(text, ".", iranian_persian_short_vowels) -- Replace long vowels text = rsubn(text, ".", iranian_persian_long_vowels) text = rsubn(text, "E(" .. pitchaccent .. "?)(%.?)y", "i%1%2y") -- Replace jj with dj	text = rsubn(text, "jj", "dj") -- Replace čč with tč	text = rsubn(text, "čč", "tč") -- Replace owv- with avv- text = rsubn(text, "owv", "ævv") text = rsubn(text, "ow.v", "æv.v") -- Allephones text = rsubn(text, "([gbdjl](%" .. dental .. "?))#", "%1" .. devoice .. "#") -- Replace consonants text = rsubn(text, ".", iranian_persian_consonants) text = rsubn(text, "cʰ([ɒuo])", "kʰ%1") text = rsubn(text, "ɟ(%" .. devoice .. "?)([ɒuo])", "g%1%2") text = con_assimilation(text) text = rsubn(text, "#(g)", "%1" .. devoice .. "") text = rsubn(text, ".", common_consonants) text = rsubn(text, "ɾ", "ɹ") --fix the pitch accent on long vowels text = rsubn(text, "([ɒiu])ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː") text = rsubn(text, "([ɢʁ])(%.?)t", "x%2t") text = rsubn(text, "([ʁɢ])#", "ɢ" .. devoice .. "#") text = rsubn(text, "#([ʁɢ])", "#q") text = rsubn(text, "ʁɢ", "ɢɢ") text = rsubn(text, "#g", "#k") text = rsubn(text, "c", "kʲ") text = rsubn(text, "ɟ", "ɡʲ") text = rsubn(text, "ʲʰ", "ʰʲ")

text = rsubn(text, "#", "")

text = toNFC(text) return text end

function export.prs_IPA(text) --based on formal speech URBAN kabul text = rsubn(text, "ˈ", "`") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "%-i#", "i#") text = rsubn(text, "[-]([" .. vowels .. all_consonants .."])[-]", "%1") text = rsubn(text, "[-]", ".") text = rsubn(text, "v", "w") -- Replace xwa with xu	text = rsubn(text, "xwa", "xu") -- Replace xwā with xā	text = rsubn(text, "xwā", "xā") -- Replace xwē with xē	text = rsubn(text, "xwē", "xē") text = rsubn(text, "xwē", "xē") text = rsubn(text, "([iī](%`?))(%.?)y", "i%2y") -- Iranian and Classical dictionaries list -iyy, -- But Tajik and Dari ones dont text = rsubn(text, "([iī]y(%`?))(%.?)y", "i%2y")

--automatically denote syllables text = syllabify(text) --pitch accent mark text = rsubn(text, "`([" .. all_consonants .. "])([ʷ]?)([" .. vowels .. "])", "%1%2%3" .. pitchaccent .. "") text = rsubn(text, "([ptkč])([" .. vowels .. "])", "%1ʰ%2")

-- Replace ih, īh, i\', ī\' by ēh, ē\' text = rsubn(text, "i((%.?)[ɦh'])", "e" .. dtack .. "%1") text = rsubn(text, "ī((%.?)[ɦh'])", "ē%1") -- Replace uh, ūh, u\', ū\' by ɵh, ɵ\' text = rsubn(text, "u((%.?)[hɦ'])", "o" .. dtack .. "%1") text = rsubn(text, "ū((%.?)[hɦ'])", "ō%1") -- Replace short vowels text = rsubn(text, ".", dari_persian_short_vowels) -- Replace long vowels text = rsubn(text, ".", dari_persian_long_vowels) -- Replace jj with dj	text = rsubn(text, "jj", "dj") -- Replace čč with tč	text = rsubn(text, "čč", "tč") --fix the pitch accent on long vowels text = rsubn(text, "([ʌɑeiou])ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː") -- Allephones text = con_assimilation(text) -- Replace consonants text = rsubn(text, ".", common_consonants) text = rsubn(text, "f([bjdžğ])", "v%1") text = rsubn(text, ".", dari_persian_consonants) text = rsubn(text, "ɾ(%.?)ɾ", "r%1r") text = rsubn(text, "ɪ(" .. pitchaccent .. "?)(%.?)j", "i%1%2j") text = rsubn(text, "#ɾ", "#r")

text = rsubn(text, "#", "")

text = toNFC(text)

return text end

function export.prs_kbl_IPA(text) --Colloquial dialect of Kabul text = rsubn(text, "ˈ", "`") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "%-i#", "i#") text = rsubn(text, "[-]([" .. vowels .. all_consonants .."])[-]", "%1") text = rsubn(text, "[-]", ".")

text = rsubn(text, "v", "w") -- Replace xwa with xu	text = rsubn(text, "xwa", "xu") -- Replace xwā with xā	text = rsubn(text, "xwā", "xā") -- Replace xwē with xē	text = rsubn(text, "xwē", "xē") text = rsubn(text, "xwē", "xē") text = rsubn(text, "([iī](%`?))(%.?)y", "i%2y") -- Iranian and Classical dictionaries list -iyy, -- But Tajik and Dari ones dont text = rsubn(text, "([iī]y(%`?))(%.?)y", "i%2y") --pitch accent mark text = rsubn(text, "`([" .. all_consonants .. "])([" .. vowels .. "])", "%1%2" .. pitchaccent .. "") -- remove glottal consonants for some dialects text = remove_glottal_c(text) --automatically denote syllables text = syllabify(text) -- universal aspiration text = rsubn(text, "([ptkč])([" .. vowels .. "])", "%1ʰ%2")

-- Replace short vowels text = rsubn(text, ".", dari_persian_short_vowels) -- Replace long vowels text = rsubn(text, ".", dari_persian_long_vowels) -- Replace jj with dj	text = rsubn(text, "jj", "dj") text = rsubn(text, "āw", "aw") -- lost colloquially -- Replace čč with tč	text = rsubn(text, "čč", "tč") -- Allephones text = con_assimilation(text) -- Replace consonants text = rsubn(text, ".", dari_persian_consonants) text = rsubn(text, "f((%.?)[bjdžğ])", "v%1") text = rsubn(text, ".", common_consonants) --fix the pitch accent on long vowels text = rsubn(text, "([ʌɑeiou])ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː") text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r") text = rsubn(text, "ɪ(" .. pitchaccent .. "?)(%.?)j", "i%1%2j") text = rsubn(text, "#ɾ", "#r")

text = rsubn(text, "#", "")

text = toNFC(text)

return text end

function export.prs_haz_IPA(text) --Hazaragi text = rsubn(text, "ˈ", "`") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "([iī](%`?))(%.?)y", "i%2y") -- Iranian and Classical dictionaries list -iyy, -- But Tajik and Dari ones dont text = rsubn(text, "([iī]y(%`?))(%.?)y", "i%2y") text = rsubn(text, "%-i#", "i#") text = rsubn(text, "[-]([" .. vowels .. all_consonants .."])[-]", "%1") text = rsubn(text, "[-]", ".")

--these conversions need to happen BEFORE EVERYTHING ELSE --pitch accent mark text = rsubn(text, "`([" .. all_consonants .. "])([" .. vowels .. "])", "%1%2" .. pitchaccent .. "") --Vowel Harmony text = rsubn(text, "ē(" .. pitchaccent .. "?)([" .. all_consonants .. "])([ūiī])", "%3%1%2%3") text = rsubn(text, "ē(" .. pitchaccent .. "?)([" .. all_consonants .. "])([" .. all_consonants .. "])([ī])", "%4%1%2%3%4") text = rsubn(text, "i(" .. pitchaccent .. "?)([" .. all_consonants .. "])([ouū])", "%3%1%2%3") text = rsubn(text, "ī(" .. pitchaccent .. "?)([" .. all_consonants .. "])([ēōuūiī])", "%3%1%2%3") text = rsubn(text, "ō(" .. pitchaccent .. "?)([" .. all_consonants .. "])([uū])", "%3%1%2%3") text = rsubn(text, "ō(" .. pitchaccent .. "?)([" .. all_consonants .. "])([i])", "u%1%2%3") text = rsubn(text, "ō(" .. pitchaccent .. "?)([" .. all_consonants .. "])([ī])", "ū%1%2%3") -- Replace xwa with xu	text = rsubn(text, "xwa", "xu") -- Replace xwā with xā	text = rsubn(text, "xwā", "xā") -- Replace xwē with xē	text = rsubn(text, "xwē", "xē") text = rsubn(text, "xwē", "xē") text = rsubn(text, "v", "w") -- remove glottal consonants for some dialects text = remove_glottal_c(text) --automatically denote syllables text = syllabify(text)

-- universal aspiration text = rsubn(text, "([ptkč])([" .. vowels .. "])", "%1ʰ%2") --delete certain consonant clusters and dipthongs text = rsubn(text, "āy", "ay") text = rsubn(text, "āw", "aw") --retroflex consonants text = rsubn(text, "D", "ɖ") text = rsubn(text, "T", "ʈ") -- Replace short vowels -- no consistent vowel length for i or u	text = rsubn(text, "ī", "i") text = rsubn(text, "ū", "u") text = rsubn(text, ".", classical_persian_short_vowels) --approximate vowels text = rsubn(text, "ā", "ɔː") text = rsubn(text, "ō", "ʊː") text = rsubn(text, "a", "ä") -- Replace long vowels text = rsubn(text, ".", dari_persian_long_vowels) --fix the pitch accent on long vowels text = rsubn(text, "([ɔeiʊu])ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː") -- Replace jj with dj	text = rsubn(text, "jj", "dj") -- Replace čč with tč	text = rsubn(text, "čč", "tč") --allophones text = con_assimilation(text) text = rsubn(text, "([gbdjl](%" .. dental .. "?))#", "%1" .. devoice .. "#") --Terminal voicing in not phonetic text = rsubn(text, "([gbdj])#", "%1" .. devoice .. "#") -- Replace consonants text = rsubn(text, ".", dari_persian_consonants) text = rsubn(text, "f((%.?)[bjdžğ])", "v%1") text = rsubn(text, ".", common_consonants) text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r") text = rsubn(text, "#ɾ", "#r")

text = rsubn(text, "#", "")

text = toNFC(text)

return text end

function export.tg_IPA(text) text = rsubn(text, "ˈ", "`") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "%-i#", "i#") -- Iranian and Classical dictionaries list -iyy, -- But Tajik and Dari ones dont text = rsubn(text, "([iī]y(%`?))(%.?)y", "i%2y") text = rsubn(text, "[-]([" .. vowels .. all_consonants .."])[-]", "%1") text = rsubn(text, "[-]", ".")

text = rsubn(text, "v", "w") -- Replace xwa with xu	text = rsubn(text, "xwa", "xu") -- Replace xwā with xā	text = rsubn(text, "xwā", "xā") -- Replace xwē with xē	text = rsubn(text, "xwē", "xē") text = rsubn(text, "xwē", "xē") --automatically denote syllables text = syllabify(text)

--pitch accent mark text = rsubn(text, "`([" .. all_consonants .. "])([" .. vowels .. "])", "%1%2" .. pitchaccent .. "")

-- Replace jj with dj	text = rsubn(text, "jj", "dj") -- Replace čč with tč	text = rsubn(text, "čč", "tč") text = rsubn(text, "w([" .. vowels .. "])", "v%1") -- universal aspiration text = rsubn(text, "([ptkč])([" .. vowels .. "])", "%1ʰ%2")

-- Replace ih, īh, i\', ī\' by ēh, ē\' text = rsubn(text, "([iī])(%" .. pitchaccent .. "?)([h'ʔɦ])([^" .. tajik_vowels .. "])", "e%2%3%4") -- Replace uh, ūh, u\', ū\' by ɵh, ɵ\' text = rsubn(text, "([uū])(%" .. pitchaccent .. "?)([hʔ'ɦ])([^" .. tajik_vowels .. "])", "ɵ%2%3%4") --aspiration text = rsubn(text, "([ptkč](%" .. dental .. "?))([" .. vowels .. "])", "%1ʰ%3")

-- Replace short vowels text = rsubn(text, ".", tajik_short_vowels) -- Replace long vowels text = rsubn(text, ".", tajik_long_vowels) --allophones text = con_assimilation(text) -- Replace consonants text = rsubn(text, ".", common_consonants) text = rsubn(text, ".", tajik_consonants) text = rsubn(text, "([aä])(" .. pitchaccent .. ")", "æ%2")

text = rsubn(text, "#", "")

text = toNFC(text)

return text end

function export.fa_cls_IPA(text) text = rsubn(text, "ˈ", "`") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "([iī](%`?))(%.?)y", "i%2y") text = rsubn(text, "%-i#", "i#") text = rsubn(text, "[-]([" .. vowels .. all_consonants .."])[-]", "%1") text = rsubn(text, "[-]", ".")

text = rsubn(text, "v", "w") -- Replace xwa with xʷa text = rsubn(text, "xwa", "xʷa") -- Replace xwā with xʷā text = rsubn(text, "xwā", "xʷā") -- Replace xwē with xʷē text = rsubn(text, "xwē", "xʷē") --automatically denote syllables text = syllabify(text) -- Replace short vowels text = rsubn(text, ".", classical_persian_short_vowels) -- Replace d with ḏ after vowels text = rsubn(text, "([" .. vowels .. "]+.?)(%`?)([d])", "%1%2ḏ") text = rsubn(text, "([" .. vowels .. "]+.?)(%`?)([b])", "%1%2β") -- Replace long vowels text = rsubn(text, ".", classical_persian_long_vowels) -- Replace jj with dj	text = rsubn(text, "jj", "dj") -- Replace čč with tč	text = rsubn(text, "čč", "tč") -- Replace consonants text = rsubn(text, ".", common_consonants) text = rsubn(text, ".", classical_persian_consonants) text = rsubn(text, "#", "")

text = toNFC(text)

return text end

-- ROMANIZATIONS

function export.romanize_fa_cls(text, script, options) if type(text) == "table" then options = {} text, script = text.args[1], text.args[2] end text = rsubn(text, "`", "") text = rsubn(text, "ˈ", "") text = rsubn(text, "[,]", ", ") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "([iī])(%.?)y", "iy")

--kill incorrect characters text = rsubn(text, "([" .. dental .. pitchaccent .. devoice .. dtack .. "ʰ])", "") text = rsubn(text, "([ɴŋ])", "n") --remove v	text = rsubn(text, "v", "w") --ensure vowels are paired to a consonant text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")	text = rsubn(text, "([.])", "") text = rsubn(text, "([" .. vowels .. "])([dḍ])", "%1ḏ") text = rsubn(text, "([" .. vowels .. "](%-?))b", "%1ḇ") text = rsubn(text, "ḏ", "ḏ") text = rsubn(text, "ḏd", "ḏḏ") text = rsubn(text, "ḇb", "ḇḇ") text = rsubn(text, "G", "ğ") text = rsubn(text, "ḍ", "z") text = rsubn(text, "ṭ", "t") text = rsubn(text, "ṯ", "s") text = rsubn(text, "ṣ", "s") text = rsubn(text, "ḥ", "h") -- remove Hazaragi retroflexes text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") text = rsubn(text, "ɖ", "d") text = rsubn(text, "ʈ", "t") text = rsubn(text, "#'", "#") text = rsubn(text, "#", "") return text end

function export.romanize_prs(text, script, options) if type(text) == "table" then options = {} text, script = text.args[1], text.args[2] end text = rsubn(text, "`", "") text = rsubn(text, "ˈ", "") text = rsubn(text, "[,]", ", ") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "([iī](%.?))y", "i%2y") text = rsubn(text, "([iī]y(%.?))y", "i%2y") text = rsubn(text, "i(['h])", "e%1") text = rsubn(text, "u(['h])", "o%1")

--kill incorrect characters text = rsubn(text, "([" .. dental .. pitchaccent .. devoice .. dtack .. "ʰ])", "") text = rsubn(text, "([ɴŋ])", "n") text = rsubn(text, "v", "w") -- Replace xw clusters text = rsubn(text, "xw([āē])", "x%1") text = rsubn(text, "xwa", "xu") -- for rare exceptions text = rsubn(text, "ʷ", "w") --ensure vowels are paired to a consonant text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")	text = rsubn(text, "([.])", "") text = rsubn(text, "ḍ", "z") text = rsubn(text, "ḏ", "z") text = rsubn(text, "ṯ", "s") text = rsubn(text, "ṭ", "t") text = rsubn(text, "G", "ğ") text = rsubn(text, "ṣ", "s") text = rsubn(text, "ḥ", "h") -- THIS SHOULD ONLY BE DONE FOR HAZARAGI RETROFLEX ENTRIES -- THEY SHOULD NEVER APPEAR IN A MAIN ENTRY text = rsubn(text, "D", "ḍ") text = rsubn(text, "T", "ṭ") text = rsubn(text, "ɖ", "ḍ") text = rsubn(text, "ʈ", "ṭ") text = rsubn(text, "#'", "#")

-- remove unnecessary marks text = rsubn(text, "#", "") return text end

function export.romanize_ira(text, script, options) if type(text) == "table" then options = {} text, script = text.args[1], text.args[2] end text = rsubn(text, "`", "") text = rsubn(text, "ˈ", "") text = rsubn(text, "[,]", ", ") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"

--kill incorrect characters text = rsubn(text, "([" .. dental .. pitchaccent .. devoice .. dtack .. "ʰ])", "") text = rsubn(text, "([ɴŋ])", "n") text = rsubn(text, "v", "w") -- Replace xw clusters text = rsubn(text, "xw([āē])", "x%1") text = rsubn(text, "xwa", "xu") text = rsubn(text, "ʷ", "") text = rsubn(text, "w(" .. vowel .. ")", "v%1") text = rsubn(text, "w(" .. consonant .. ")", "w%1") text = rsubn(text, "([" .. vowels_minus_a .. "])w", "%1v") text = rsubn(text, "v%(w", "v(v") text = rsubn(text, "(" .. consonant .. ")w#", "%1v#") text = rsubn(text, "wv", "vv") text = rsubn(text, "wæ", "væ") --ensure vowels are paired to a consonant text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")	text = rsubn(text, "([.])", "") text = rsubn(text, "iy", "īy") text = rsubn(text, "ay", "ey") text = rsubn(text, "aw", "ow") text = rsubn(text, "ḍ", "z") text = rsubn(text, "ḏ", "z") text = rsubn(text, "ṭ", "t") text = rsubn(text, "G", "ğ") text = rsubn(text, "q", "ğ") text = rsubn(text, "ṯ", "s") text = rsubn(text, "ṣ", "s") text = rsubn(text, "ḥ", "h") text = rsubn(text, "ā", "â") text = rsubn(text, "u", "o") text = rsubn(text, "i", "e") -- remove Hazaragi retroflexes text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") text = rsubn(text, "ɖ", "d") text = rsubn(text, "ʈ", "t") -- Tajik does not have vowel length text = rsubn(text, "([ēī])", "i") text = rsubn(text, "([ūō])", "u") -- terminal w is only possible in a dipthong text = rsubn(text, "([o]0)w#", "v#") text = rsubn(text, "a#", "e#") text = rsubn(text, "a%-", "e-") text = rsubn(text, "æ", "a") text = rsubn(text, "#'", "#") text = rsubn(text, "#", "") return text end

function export.romanize_tg(text, script, options) if type(text) == "table" then options = {} text, script = text.args[1], text.args[2] end text = rsubn(text, "[,]", ", ") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "i(['h])", "ē%1") text = rsubn(text, "u(['h])", "ō%1") text = rsubn(text, "([iī]y(%.?))y", "i%2y") text = rsubn(text, "ˈ", "`") text = rsubn(text, "([iī]y`y)", "i`y")

--kill incorrect characters text = rsubn(text, "([" .. dental .. pitchaccent .. devoice .. dtack .. "ʰ])", "") text = rsubn(text, "([ɴŋ])", "n") text = rsubn(text, "w", "v") -- Replace xw clusters text = rsubn(text, "xv([āē])", "x%1") text = rsubn(text, "xva", "xu") text = rsubn(text, "ʷ", "") --ensure vowels are paired to a consonant text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")	text = rsubn(text, "([.])", "") text = rsubn(text, "ḍ", "z") text = rsubn(text, "ḏ", "z") text = rsubn(text, "ṯ", "s") text = rsubn(text, "ṭ", "t") text = rsubn(text, "(['])", "ʾ") text = rsubn(text, "ṣ", "s") text = rsubn(text, "ḥ", "h") text = rsubn(text, "G", "ġ") text = rsubn(text, "ğ", "ġ") text = rsubn(text, "ē", "e") text = rsubn(text, "ō", "ü") text = rsubn(text, "ā", "o") -- remove Hazaragi retroflexes text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") text = rsubn(text, "ɖ", "d") text = rsubn(text, "ʈ", "t") -- Tajik does not have vowel length text = rsubn(text, "([iī])", "i") text = rsubn(text, "`([" .. all_consonants .. "])i#", "%1ī#") text = rsubn(text, "([ūu])", "u") text = rsubn(text, "`", "") text = rsubn(text, "#([ʾ])", "") text = rsubn(text, "#", "") return text end

local function one_term_ipa(text, style) if style == "cls" then text = export.fa_cls_IPA(text) elseif style == "prs" then text = export.prs_IPA(text) elseif style == "kbl" then text = export.prs_kbl_IPA(text) or export.prs_IPA(text) --should ignore conversion if specified elseif style == "haz" then text = export.prs_haz_IPA(text) elseif style == "fa" then text = export.fa_IPA(text) elseif style == "teh" then text = export.fa_IPA(text) elseif style == "tg" then text = export.tg_IPA(text) end

return text end

-- style == one of the following: -- "cls": Classical Persian -- "prs": Dari Persian -- "kbl": Kabuli -- "haz": Hazaragi -- "fa": Iranian Persian -- "teh": Tehrani -- "tg": Tajik function export.IPA(text, style) local variants = {text}

local function apply_sub(from, to1, to2) return function(item) if rfind(item, from) then if to2 then return {rsub(item, from, to1), rsub(item, from, to2)} else return {rsub(item, from, to1)} end else return {item} end end end

local function call_one_term_ipa(variant) local result = { {				phonemic = one_term_ipa(variant, style, false, err) }		}		local function apply_sub(item, from, to1, qual1, to2, qual2) if rfind(item.phonemic, from) or rfind(item.phonetic, from) then return { {						phonemic = rsub(item.phonemic, from, to1), qualifiers = qual1 },					{						phonemic = rsub(item.phonemic, from, to2), qualifiers = qual2 }				}			else return {item} end end

return result end

return flatmap(variants, call_one_term_ipa) end

function export.express_styles(inputs, args_style) local pronuns_by_style = {} local expressed_styles = {}

local function dostyle(style) pronuns_by_style[style] = {} for _, val in ipairs(inputs[style]) do			local pronuns = export.IPA(val, style) for _, pronun in ipairs(pronuns) do				table.insert(pronuns_by_style[style], pronun) end end end

local function all_available(styles) local available_styles = {} for _, style in ipairs(styles) do			if pronuns_by_style[style] then table.insert(available_styles, style) end end return available_styles end

local function express_style(hidden_tag, tag, styles, indent) indent = indent or 1 if hidden_tag == true then hidden_tag = tag end if type(styles) == "string" then styles = {styles} end styles = all_available(styles) if #styles == 0 then return end local style = styles[1]

-- If style specified, make sure it matches the requested style. local style_matches if not args_style then style_matches = true else local or_styles = rsplit(args_style, "%s*,%s*") for _, or_style in ipairs(or_styles) do				local and_styles = rsplit(or_style, "%s*%+%s*") local and_matches = true for _, and_style in ipairs(and_styles) do					local negate if and_style:find("^%-") then and_style = and_style:gsub("^%-", "") negate = true end local this_style_matches = false for _, part in ipairs(styles) do						if part == and_style then this_style_matches = true break end end if negate then this_style_matches = not this_style_matches end if not this_style_matches then and_matches = false end end if and_matches then style_matches = true break end end end if not style_matches then return end

local new_style = { tag = tag, represented_styles = styles, pronuns = pronuns_by_style[style], indent = indent }		for _, hidden_tag_style in ipairs(expressed_styles) do			if hidden_tag_style.tag == hidden_tag then table.insert(hidden_tag_style.styles, new_style) return end end table.insert(			expressed_styles,			{				tag = hidden_tag,				styles = {new_style}			}		) end

for style, _ in pairs(inputs) do		dostyle(style) end

local function diff(style1, style2) if not pronuns_by_style[style1] or not pronuns_by_style[style2] then return true end return not m_table.deepEquals(pronuns_by_style[style1], pronuns_by_style[style2]) end

local fa_teh_different = diff("fa", "teh") local prs_kbl_different = diff("prs", "kbl") local prs_haz_different = diff("prs", "haz")

-- Classical Persian express_style("Classical Persian", "Classical Persian", "cls")

-- Dari Persian express_style(		"Dari, formal",		"Dari, formal",		"prs"	) express_style("Dari, formal", "Kabuli", "kbl", 2) express_style("Dari, formal", "Hazaragi", "haz", 2)

-- Iranian Persian express_style(		"Iran, formal",		"Iran, formal",		"fa"	) if fa_teh_different then express_style("Iran, formal", "Tehrani", "teh", 2) end

-- Tajik express_style(		"Tajik, formal",		"Tajik, formal",		"tg"	)

return expressed_styles end

function export.show(frame) -- Create parameter specs local params = { [1] = {}, -- this replaces style group 'all' ["pre"] = {}, ["post"] = {}, ["ref"] = {}, ["style"] = {}, ["bullets"] = {type = "number", default = 1} }	for group, _ in pairs(export.all_style_groups) do		if group ~= "all" then params[group] = {} end end for _, style in ipairs(export.all_styles) do		params[style] = {} end

-- Parse arguments local parargs = frame:getParent.args local args = require("Module:parameters").process(parargs, params)

-- Set inputs local inputs = {} -- If 1= specified, do all styles. if args[1] then for _, style in ipairs(export.all_styles) do			inputs[style] = args[1] end end -- Then do remaining style groups other than 'all', overriding 1= if given. for group, styles in pairs(export.all_style_groups) do		if group ~= "all" and args[group] then for _, style in ipairs(styles) do				inputs[style] = args[group] end end end -- Then do individual style settings. for _, style in ipairs(export.all_styles) do		if args[style] then inputs[style] = args[style] end end -- If no inputs given, set all styles based on current pagename. if not next(inputs) then local text = mw.title.getCurrentTitle.text for _, style in ipairs(export.all_styles) do			inputs[style] = text end end

for style, input in pairs(inputs) do		inputs[style] = rsplit(input, ",") end local expressed_styles = export.express_styles(inputs, args.style)

local lines = {}

local function format_style(tag, expressed_style, is_first) local pronunciations = {} local formatted_pronuns = {} for _, pronun in ipairs(expressed_style.pronuns) do			table.insert(				pronunciations,				{					pron = "[" .. pronun.phonemic .. "]",					qualifiers = pronun.qualifiers				}			) local formatted_phonemic = "[" .. pronun.phonemic .. "]"			if pronun.qualifiers then formatted_phonemic = "(" .. table.concat(pronun.qualifiers, ", ") .. ") " .. formatted_phonemic end table.insert(formatted_pronuns, formatted_phonemic) end -- Number of bullets: When indent = 1, we want the number of bullets given by `args.bullets`, -- and when indent = 2, we want `args.bullets + 1`, hence we subtract 1. local bullet = string.rep("*", args.bullets + expressed_style.indent - 1) .. " "		-- Here we construct the formatted line in `formatted`, and also try to construct the equivalent without HTML -- and wiki markup in `formatted_for_len`, so we can compute the approximate textual length for use in sizing -- the toggle box with the "more" button on the right. local pre = is_first and args.pre and args.pre .. " " or "" local pre_for_len = pre .. (tag and "(" .. tag .. ") " or "") pre = pre .. (tag and m_qual.format_qualifier(tag) .. " " or "") local post = is_first and (args.ref or "") .. (args.post and " " .. args.post or "") or "" local formatted = bullet .. pre .. m_IPA.format_IPA_full { lang = lang, items = pronunciations } .. post local formatted_for_len = bullet .. pre .. "IPA(key): " .. table.concat(formatted_pronuns, ", ") .. post return formatted, formatted_for_len end

for i, style_group in ipairs(expressed_styles) do		if #style_group.styles == 1 then style_group.formatted, style_group.formatted_for_len = format_style(style_group.styles[1].tag, style_group.styles[1], i == 1) else style_group.formatted, style_group.formatted_for_len = format_style(style_group.tag, style_group.styles[1], i == 1) for j, style in ipairs(style_group.styles) do				style.formatted, style.formatted_for_len = format_style(style.tag, style, i == 1 and j == 1) end end end

local function textual_len(text) text = rsub(text, "<.->", "") return ulen(text) end

local maxlen = 0 for i, style_group in ipairs(expressed_styles) do		local this_len = textual_len(style_group.formatted_for_len) if #style_group.styles > 1 then for _, style in ipairs(style_group.styles) do				this_len = math.max(this_len, textual_len(style.formatted_for_len)) end end maxlen = math.max(maxlen, this_len) end

for i, style_group in ipairs(expressed_styles) do		if #style_group.styles == 1 then table.insert(lines, " \n" .. style_group.formatted .. " ") else local inline = '\n \n' .. style_group.formatted .. " "			local full_prons = {} for _, style in ipairs(style_group.styles) do				table.insert(full_prons, style.formatted) end local full = '\n \n' .. table.concat(full_prons, "\n") .. " "			local em_length = math.floor(maxlen * 0.68) -- from Module:grc-pronunciation table.insert(				lines,				' ' ..							inline .. full .. " "			) end end

-- major hack to get bullets working on the next line return table.concat(lines, "\n") .. "\n " end

return export