Module:fa-IPA/sandbox

local export = {}

local U = mw.ustring.char local lang = require("Module:languages").getByCode("fa") local m_IPA = require("Module:IPA") local m_table = require("Module:table") local m_qual = require("Module:qualifier") local all_consonants = "bptTjčhxdDðrzžsšʔğGfqkglmnhɦwvy\'" --needed for syllables local stop_cons = "bptTjčdDðqkg" local non_stopc = "hxrzžsšʔğGflmnhɦwvy'"

local rsplit = mw.text.split local rsubn = mw.ustring.gsub local ulen = mw.ustring.len local pitchaccent = U(0x301) local devoice = U(0x325) local dtack = U(0x31E) local gstop = U(0x027) local dental = U(0x32A)

-- version of rsubn that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end

export.all_styles = {"cls", "prs", "kbl", "haz", "fa", "teh", "tg"} export.all_style_groups = { all = export.all_styles, cls = {"cls"}, dari = {"prs", "kbl", "haz"}, ir = {"fa", "teh"}, tg = {"tg"} }

export.all_style_descs = { cls = "Classical Persian", prs = "Dari Persian", kabul = "Kabuli", haz = "Hazaragi", fa = "Iranian Persian", teh = "Tehrani", tg = "Tajik" }

local function flatmap(items, fun) local new = {} for _, item in ipairs(items) do		local results = fun(item) for _, result in ipairs(results) do			table.insert(new, result) end end return new end

local common_consonants = { ['j'] = 'd͡ʒ', ['\''] = 'ʔ', ['ḍ'] = 'z', ['D'] = 'd',--these are here for Hazaragi ['T'] = 't',--they are retroflexes in haz ['ğ'] = 'ɣ', ['G'] = 'ɣ', ['ḥ'] = 'h', ['r'] = 'ɾ', ['ṣ'] = 's', ['š'] = 'ʃ', ['ṯ'] = 's', ['ṭ'] = 't', ['y'] = 'j', ['ž'] = 'ʒ', ['ẓ'] = 'z', ['č'] = 't͡ʃ', ['g'] = 'ɡ', ['`'] = 'ˈ' }

local iranian_persian_short_vowels = {['a'] = 'æ', ['i'] = 'e', ['u'] = 'o'}

local iranian_persian_long_vowels = { ['ā'] = 'ɒː', ['ī'] = 'iː', ['ū'] = 'uː', ['ō'] = 'uː', ['ē'] = 'iː' }

local iranian_persian_consonants = {['ḏ'] = 'z', ['q'] = 'ɢ', ['ğ'] = 'ɢ', ['k'] = 'c', ['g'] = 'ɟ'}

local dari_persian_short_vowels = {['a'] = 'ä', ['i'] = 'ɪ', ['u'] = 'ʊ'}

local dari_persian_long_vowels = { ['ā'] = 'ɑː', ['ī'] = 'iː', ['ū'] = 'uː', ['ō'] = 'oː', ['ē'] = 'eː' }

local dari_persian_consonants = {['ḏ'] = 'z', ['v'] = 'w'}

local tajik_short_vowels = {['a'] = 'ä', ['i'] = 'i', ['u'] = 'u'}

local tajik_long_vowels = { ['ā'] = 'ɔ', ['ī'] = 'i', ['ū'] = 'u', ['ō'] = 'ɵ', ['ē'] = 'e' }

local tajik_vowels = "aieuɵɔ"

local tajik_consonants = {['ḏ'] = 'z', ['ɣ'] = 'ʁ', ['x'] = 'χ'}

local classical_persian_short_vowels = {['a'] = 'a', ['i'] = 'i', ['u'] = 'u'}

local classical_persian_long_vowels = { ['ā'] = 'ɑː', ['ī'] = 'iː', ['ū'] = 'uː', ['ō'] = 'oː', ['ē'] = 'eː' }

local classical_persian_consonants = {['ḏ'] = 'ð', ['v'] = 'w'}

local vowels = "aiuāīūēōːʷ" --ʷ and ː are counted as vowels to prevent them from being put in the next syllable local consonant = "[^" .. vowels .. ". -]" local vowel = "[" .. vowels .. "]" local syllabify_pattern = "(" .. vowel .. ")(" .. consonant .. ")(" .. consonant .. "?)(" .. vowel .. ")"

local function syllabify(text) text = rsubn(text, "%-(" .. consonant .. ")%-(" .. consonant .. ")", "%1.%2")	text = rsubn(text, "(["..all_consonants..vowels.."])`", "%1.`")

-- Add syllable breaks. for _ = 1, 2 do		text = rsubn(				text,				syllabify_pattern,				function(a, b, c, d)					if c == "" and b ~= "" then						c, b = b, ""					end

return a .. b .. "." .. c .. d				end )	end	-- syllable boundry consonants	text = rsubn(text, "([" .. non_stopc .. "])([" .. stop_cons .. "])([" .. non_stopc .. "]+ʷ?)(["..all_consonants..vowels.."])", "%1.%2ᵊ.%3%4")	text = rsubn(text, "([" .. stop_cons .. "])([" .. non_stopc .. "]+ʷ?)(["..all_consonants..vowels.."])", "%1ᵊ.%2%3")	-- ALL syllables are CV- so vowels NEED an intial consonant (ʔ)	text = rsubn(text, "([.])(["..vowels.."])", "%1ʔ%2")	text = rsubn(text, "#([" .. vowels .. "])", "#ʔ%1")

return text end

local function remove_glottal_c(text) --only for regional dialects -- remove glottal consonants with appropriate glide text = rsubn(text, "([aā](%.?))([hɦ'])([uū])", "%1w%4") text = rsubn(text, "([iī])(%.?)([hɦ'])([auāēōū])", "i%2y%4") text = rsubn(text, "([auāēōū](%.?))([hɦ'])([iīē])", "%1y%4") text = rsubn(text, "([ē](%.?))([hɦ'])([auāēōū])", "%1y%4") text = rsubn(text, "([uū])(%.?)([hɦ'])([aāiīēō])", "u%2w%4") text = rsubn(text, "([ō](%.?))([hɦ'])([aāiīēō])", "%1w%4") --completely delete GC if both vowels are the either the same or similar text = rsubn(text, "(["..all_consonants.."])([uū])`([h'])([uū])", "`%1ū") text = rsubn(text, "(["..all_consonants.."])([iī])`([h'])([iī])", "`%1ī") text = rsubn(text, "(["..all_consonants.."])([aā])`([h'])([aā])", "`%1ā") --Else, turn GC into majhul long vowels text = rsubn(text, "([aā])((%.?)[h'])", "ā") text = rsubn(text, "([iī])((%.?)['])", "ē") text = rsubn(text, "([uū])((%.?)['])", "ō") text = rsubn(text, "(['h])", "") -- lastly, remove all remaning GC	return text end

local function con_assimilation(text) --DONT USE THIS ON CLASSICAL -- assimilation/placement of certain consonants text = rsubn(text, "l((%.?)[ʈɖ])", 'ɭ%1') --retroflexes are only in hazaragi text = rsubn(text, "([nl])((%.?)[td])", '%1'..dental..'%2') text = rsubn(text, "n((%.?)[ʈɖ])", 'ɳ%1') text = rsubn(text, "([td])", '%1'..dental..'') text = rsubn(text, "n((%.?)[kg])", 'ŋ%1') text = rsubn(text, "n((%.?)[cɟy])", 'ɲ%1') text = rsubn(text, "n((%.?)[mbpw])", 'm%1') text = rsubn(text, "n((%.?)[qɢ])", 'ɴ%1') text = rsubn(text, "([nm])((%.?)[fv])", 'ɱ%2') text = rsubn(text, "([āʌɑɒäæeēīioɔōuūʊɪ](%"..dtack.."?)(%"..pitchaccent.."?)(%ː?)(%.?))([h])", '%1ɦ') text = rsubn(text, "r([tdszšlž])", 'ɹ%1') -- formally, f only assimiates in the same syllable text = rsubn(text, "f([bjdžğ])", 'v%1') text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r") text = rsubn(text, "ä("..pitchaccent..")", "æ%1") text = rsubn(text, "([ɦ])#", "ʱ#") text = rsubn(text, "([h])#", "ʰ#") return text end

function export.fa_IPA(text) text = rsubn(text, "a%-", "e-") text = rsubn(text, "[-]", ".") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "v", "w") -- Replace xwa with xu	text = rsubn(text, "xwa", "xu") -- Replace xwā with xā	text = rsubn(text, "xwā", "xā") -- Replace xwē with xē	text = rsubn(text, "xwē", "xē") --automatically denote syllables text = syllabify(text) -- then do pitch accent mark text = rsubn(text, "`(["..all_consonants.."])(["..vowels.."])", "%1%2"..pitchaccent.."") -- Replace diphthong text = rsubn(text, "a([wy])", function(semivowel, position)		local consonant = mw.ustring.sub(text, position, position)		if consonant == "" or consonant:find(consonant) then			if semivowel == "w" then				return "uw"			else				return "ey"			end		end	end) -- Replace iy with Ey (temp change) text = rsubn(text, "iy", "Ey") -- Replace w with v	text = rsubn(text, "w([" .. vowels .. "])", "v%1") -- Replace final w with v	text = rsubn(text, "(" .. consonant .. ")w#", "%1v#") text = rsubn(text, "([ptkč](%"..dental.."?))(["..vowels.."])", "%1ʰ%3") text = rsubn(text, "(["..vowels.."](%.?))q", "%1ʁ") -- Replace final a with e (can be overwritten by entering æ) text = rsubn(text, "([a])(%"..pitchaccent.."?)#", "e%2#") -- Replace short vowels text = rsubn(text, ".", iranian_persian_short_vowels) -- Replace long vowels text = rsubn(text, ".", iranian_persian_long_vowels) -- Replace Ey with iy	text = rsubn(text, "Ey", "iy") -- Replace jj with dj	text = rsubn(text, "jj", "dj") -- Replace čč with tč	text = rsubn(text, "čč", "tč") -- Allephones text = rsubn(text, "([gbdjl](%"..dental.."?))#", "%1"..devoice.."#") -- Replace consonants text = rsubn(text, ".", iranian_persian_consonants) text = rsubn(text, "cʰ([ɒuo])", "kʰ%1") text = rsubn(text, "ɟ(%"..devoice.."?)([ɒuo])", "g%1%2") text = con_assimilation(text) text = rsubn(text, "#(g)", "%1"..devoice.."") text = rsubn(text, ".", common_consonants) text = rsubn(text, "ɾ", "ɹ") --fix the pitch accent on long vowels text = rsubn(text, "([ɒiu])ː"..pitchaccent.."", "%1"..pitchaccent.."ː") text = rsubn(text, "([ɢʁ])(%.?)t", "x%2t") text = rsubn(text, "([ʁɢ])#", "q#") text = rsubn(text, "w#", "v#") text = rsubn(text, "#([ʁɢ])", "#q") text = rsubn(text, "#g", "#k") text = rsubn(text, "#", "")

return text end

function export.prs_IPA(text) --based on formal speech URBAN kabul text = rsubn(text, "[-]", ".") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "v", "w") -- Replace xwa with xu	text = rsubn(text, "xwa", "xu") -- Replace xwā with xā	text = rsubn(text, "xwā", "xā") -- Replace xwē with xē	text = rsubn(text, "xwē", "xē") text = rsubn(text, "xwē", "xē") --automatically denote syllables text = syllabify(text) --pitch accent mark text = rsubn(text, "`(["..all_consonants.."])([ʷ]?)(["..vowels.."])", "%1%2%3"..pitchaccent.."") -- Replace ih, īh, i\', ī\' by ēh, ē\' text = rsubn(text, "i((%.?)[ɦh'])", "e"..dtack.."%1") text = rsubn(text, "ī((%.?)[ɦh'])", "ē%1") -- Replace uh, ūh, u\', ū\' by ɵh, ɵ\' text = rsubn(text, "u((%.?)[hɦ'])", "o"..dtack.."%1") text = rsubn(text, "ū((%.?)[hɦ'])", "ō%1") text = rsubn(text, "([ptkč])(["..vowels.."])", "%1ʰ%2") -- Replace short vowels text = rsubn(text, ".", dari_persian_short_vowels) -- Replace long vowels text = rsubn(text, ".", dari_persian_long_vowels) -- Replace jj with dj	text = rsubn(text, "jj", "dj") -- Replace čč with tč	text = rsubn(text, "čč", "tč") --fix the pitch accent on long vowels text = rsubn(text, "([ʌɑeiou])ː"..pitchaccent.."", "%1"..pitchaccent.."ː") -- Allephones text = con_assimilation(text) -- Replace consonants text = rsubn(text, ".", common_consonants) text = rsubn(text, "f([bjdžğ])", 'v%1') text = rsubn(text, ".", dari_persian_consonants) text = rsubn(text, "ɾ(%.?)ɾ", "r%1r") text = rsubn(text, "ɪ(%.?)j", "i%1j") text = rsubn(text, "#ɾ", "#r")

text = rsubn(text, "#", "")

return text end

function export.prs_kbl_IPA(text) --Colloquial dialect of Kabul text = rsubn(text, "[-]", ".") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "v", "w") -- Replace xwa with xu	text = rsubn(text, "xwa", "xu") -- Replace xwā with xā	text = rsubn(text, "xwā", "xā") -- Replace xwē with xē	text = rsubn(text, "xwē", "xē") text = rsubn(text, "xwē", "xē") --pitch accent mark text = rsubn(text, "`(["..all_consonants.."])(["..vowels.."])", "%1%2"..pitchaccent.."") -- remove glottal consonants for some dialects text = remove_glottal_c(text) --automatically denote syllables text = syllabify(text) -- universal aspiration text = rsubn(text, "([ptkč])(["..vowels.."])", "%1ʰ%2") -- Replace short vowels text = rsubn(text, ".", dari_persian_short_vowels) -- Replace long vowels text = rsubn(text, ".", dari_persian_long_vowels) -- Replace jj with dj	text = rsubn(text, "jj", "dj") text = rsubn(text, "āw", "aw") -- lost colloquially -- Replace čč with tč	text = rsubn(text, "čč", "tč") -- Allephones text = con_assimilation(text) -- Replace consonants text = rsubn(text, ".", dari_persian_consonants) text = rsubn(text, "f((%.?)[bjdžğ])", 'v%1') text = rsubn(text, ".", common_consonants) --fix the pitch accent on long vowels text = rsubn(text, "([ʌɑeiou])ː"..pitchaccent.."", "%1"..pitchaccent.."ː") text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r") text = rsubn(text, "ɪ(%.?)j", "i%1j") text = rsubn(text, "#ɾ", "#r")

text = rsubn(text, "#", "")

return text end

function export.prs_haz_IPA(text) --Hazaragi text = rsubn(text, "[-]", ".") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	--these conversions need to happen BEFORE EVERYTHING ELSE --pitch accent mark text = rsubn(text, "`(["..all_consonants.."])(["..vowels.."])", "%1%2"..pitchaccent.."") --Vowel Harmony text = rsubn(text, "ē("..pitchaccent.."?)(["..all_consonants.."])([ūiī])", '%3%1%2%3') text = rsubn(text, "ē("..pitchaccent.."?)(["..all_consonants.."])(["..all_consonants.."])([ī])", '%4%1%2%3%4') text = rsubn(text, "i("..pitchaccent.."?)(["..all_consonants.."])([ouū])", '%3%1%2%3') text = rsubn(text, "ī("..pitchaccent.."?)(["..all_consonants.."])([ēōuūiī])", '%3%1%2%3') text = rsubn(text, "ō("..pitchaccent.."?)(["..all_consonants.."])([uū])", '%3%1%2%3') text = rsubn(text, "ō("..pitchaccent.."?)(["..all_consonants.."])([i])", 'u%1%2%3') text = rsubn(text, "ō("..pitchaccent.."?)(["..all_consonants.."])([ī])", 'ū%1%2%3') -- Replace xwa with xu	text = rsubn(text, "xwa", "xu") -- Replace xwā with xā	text = rsubn(text, "xwā", "xā") -- Replace xwē with xē	text = rsubn(text, "xwē", "xē") text = rsubn(text, "xwē", "xē") text = rsubn(text, "v", "w") -- remove glottal consonants for some dialects text = remove_glottal_c(text) --automatically denote syllables text = syllabify(text) -- universal aspiration text = rsubn(text, "([ptkč])(["..vowels.."])", "%1ʰ%2") --delete certain consonant clusters and dipthongs text = rsubn(text, "āy", "ay") text = rsubn(text, "āw", "aw") --retroflex consonants text = rsubn(text, "D", 'ɖ') text = rsubn(text, "T", 'ʈ') -- Replace short vowels text = rsubn(text, ".", classical_persian_short_vowels) --approximate vowels text = rsubn(text, "ā", 'ɔː') text = rsubn(text, "ō", 'ʊː') text = rsubn(text, "a", 'ä') -- Replace long vowels text = rsubn(text, ".", dari_persian_long_vowels) --fix the pitch accent on long vowels text = rsubn(text, "([ɔeiʊu])ː"..pitchaccent.."", "%1"..pitchaccent.."ː") -- Replace jj with dj	text = rsubn(text, "jj", "dj") -- Replace čč with tč	text = rsubn(text, "čč", "tč") --allophones text = con_assimilation(text) text = rsubn(text, "([gbdjl](%"..dental.."?))#", "%1"..devoice.."#") --Terminal voicing in not phonetic text = rsubn(text, "([gbdj])#", "%1"..devoice.."#") -- Replace consonants text = rsubn(text, ".", dari_persian_consonants) text = rsubn(text, "f((%.?)[bjdžğ])", 'v%1') text = rsubn(text, ".", common_consonants) text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r") text = rsubn(text, "#ɾ", "#r")

text = rsubn(text, "#", "")

return text end

function export.tg_IPA(text) text = rsubn(text, "[-]", ".") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "v", "w") -- Replace xwa with xu	text = rsubn(text, "xwa", "xu") -- Replace xwā with xā	text = rsubn(text, "xwā", "xā") -- Replace xwē with xē	text = rsubn(text, "xwē", "xē") text = rsubn(text, "xwē", "xē") --automatically denote syllables text = syllabify(text)

--pitch accent mark text = rsubn(text, "`(["..all_consonants.."])(["..vowels.."])", "%1%2"..pitchaccent.."") -- Replace jj with dj	text = rsubn(text, "jj", "dj") -- Replace čč with tč	text = rsubn(text, "čč", "tč") text = rsubn(text, "w([" .. vowels .. "])", "v%1") -- universal aspiration text = rsubn(text, "([ptkč])(["..vowels.."])", "%1ʰ%2") -- Replace ih, īh, i\', ī\' by ēh, ē\' text = rsubn(text, "([iī])(%"..pitchaccent.."?)([h'ʔɦ])([^" .. tajik_vowels .. "])", "e%2%3%4") -- Replace uh, ūh, u\', ū\' by ɵh, ɵ\' text = rsubn(text, "([uū])(%"..pitchaccent.."?)([hʔ'ɦ])([^" .. tajik_vowels .. "])", "ɵ%2%3%4") --aspiration text = rsubn(text, "([ptkč](%"..dental.."?))(["..vowels.."])", "%1ʰ%3") -- Replace short vowels text = rsubn(text, ".", tajik_short_vowels) -- Replace long vowels text = rsubn(text, ".", tajik_long_vowels) --allophones text = con_assimilation(text) -- Replace consonants text = rsubn(text, ".", common_consonants) text = rsubn(text, ".", tajik_consonants) text = rsubn(text, "([aä])("..pitchaccent..")", "æ%2")

text = rsubn(text, "#", "")

return text end

function export.fa_cls_IPA(text) text = rsubn(text, "[-]", ".")

text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"

text = rsubn(text, "v", "w") -- Replace xwa with xʷa text = rsubn(text, "xwa", "xʷa") -- Replace xwā with xʷā text = rsubn(text, "xwā", "xʷā") -- Replace xwē with xʷē text = rsubn(text, "xwē", "xʷē") --automatically denote syllables text = syllabify(text) -- Replace short vowels text = rsubn(text, ".", classical_persian_short_vowels) -- Replace d with ḏ after vowels text = rsubn(text, "([" .. vowels .. "]+.?)(%`?)([d])", "%1%2ḏ") text = rsubn(text, "([" .. vowels .. "]+.?)(%`?)([b])", "%1%2β") -- Replace long vowels text = rsubn(text, ".", classical_persian_long_vowels) -- Replace jj with dj	text = rsubn(text, "jj", "dj") -- Replace čč with tč	text = rsubn(text, "čč", "tč") -- Replace consonants text = rsubn(text, ".", common_consonants) text = rsubn(text, ".", classical_persian_consonants) text = rsubn(text, "#", "")

return text end

-- ROMANIZATIONS

function export.romanize_fa_cls(text, script, options) if type(text) == "table" then options = {} text, script = text.args[1], text.args[2] end text = rsubn(text, "`", "") text = rsubn(text, "[,]", ", ") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	--kill incorrect characters text = rsubn(text, "(["..dental..pitchaccent..devoice..dtack.."ʰ])", "") text = rsubn(text, "([ɴŋ])", "n") --remove v	text = rsubn(text, "v", "w") --ensure vowels are paired to a consonant text = rsubn(text, "([.])(["..vowels.."])", "%1'%2") text = rsubn(text, "([.])", "") text = rsubn(text, "([" .. vowels .. "])([dḍ])", "%1ð") text = rsubn(text, "([" .. vowels .. "](%-?))b", "%1β") text = rsubn(text, "ðd", "ðð") text = rsubn(text, "βb", "ββ") text = rsubn(text, "G", "ğ") text = rsubn(text, "ḍ", "z") text = rsubn(text, "ṭ", "t") text = rsubn(text, "ṯ", "s") text = rsubn(text, "ṣ", "s") text = rsubn(text, "ḥ", "h") -- remove Hazaragi retroflexes text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") text = rsubn(text, "ɖ", "d") text = rsubn(text, "ʈ", "t") text = rsubn(text, "#(['])", "") text = rsubn(text, "#", "") return text end

function export.romanize_prs(text, script, options) if type(text) == "table" then options = {} text, script = text.args[1], text.args[2] end text = rsubn(text, "`", "") text = rsubn(text, "[,]", ", ") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	--kill incorrect characters text = rsubn(text, "(["..dental..pitchaccent..devoice..dtack.."ʰ])", "") text = rsubn(text, "([ɴŋ])", "n") text = rsubn(text, "v", "w") -- Replace xw clusters text = rsubn(text, "xw([āē])", "x%1") text = rsubn(text, "xwa", "xu") -- for rare exceptions text = rsubn(text, "ʷ", "w") --ensure vowels are paired to a consonant text = rsubn(text, "([.])(["..vowels.."])", "%1'%2") text = rsubn(text, "([.])", "") text = rsubn(text, "ḍ", "z") text = rsubn(text, "ṯ", "s") text = rsubn(text, "ṭ", "t") text = rsubn(text, "ṣ", "s") text = rsubn(text, "ḥ", "h") -- remove Hazaragi retroflexes text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") -- THIS SHOULD ONLY BE DONE FOR HAZARAGI RETROFLEX ENTRIES text = rsubn(text, "ɖ", "ḍ") text = rsubn(text, "ʈ", "ṭ")

-- remove unnecessary marks text = rsubn(text, "#(['])", "") text = rsubn(text, "#", "") return text end

function export.romanize_ira(text, script, options) if type(text) == "table" then options = {} text, script = text.args[1], text.args[2] end text = rsubn(text, "`", "") text = rsubn(text, "[,]", ", ") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	--kill incorrect characters text = rsubn(text, "(["..dental..pitchaccent..devoice..dtack.."ʰ])", "") text = rsubn(text, "([ɴŋ])", "n") text = rsubn(text, "v", "w") text = rsubn(text, "w(["..vowels.."])", "v%1") -- Replace xw clusters text = rsubn(text, "xv([āē])", "x%1") text = rsubn(text, "xva", "xu") text = rsubn(text, "ʷ", "") --ensure vowels are paired to a consonant text = rsubn(text, "([.])(["..vowels.."])", "%1'%2") text = rsubn(text, "([.])", "") text = rsubn(text, "iy", "īy") text = rsubn(text, "ay", "ey") text = rsubn(text, "aw", "ow") text = rsubn(text, "ḍ", "z") text = rsubn(text, "ṭ", "t") text = rsubn(text, "ṯ", "s") text = rsubn(text, "ṣ", "s") text = rsubn(text, "ḥ", "h") text = rsubn(text, "(["..vowels.."])q", "%1ğ") text = rsubn(text, "âq", "âğ") text = rsubn(text, "ğ#", "q#") text = rsubn(text, "ğq", "ğğ") text = rsubn(text, "ā", "â") text = rsubn(text, "u", "o") text = rsubn(text, "i", "e") -- remove Hazaragi retroflexes text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") text = rsubn(text, "ɖ", "d") text = rsubn(text, "ʈ", "t") -- Tajik does not have vowel length text = rsubn(text, "([ēī])", "i") text = rsubn(text, "([ūō])", "u") text = rsubn(text, "w#", "v#") text = rsubn(text, "a#", "e#") text = rsubn(text, "a%-", "e-") text = rsubn(text, "æ", "a") text = rsubn(text, "#(['])", "") text = rsubn(text, "#", "") return text end

function export.romanize_tg(text, script, options) if type(text) == "table" then options = {} text, script = text.args[1], text.args[2] end text = rsubn(text, "[,]", ", ") text = rsubn(text, " | ", "# | #") text = "##" .. rsubn(text, " ", "# #") .. "##"	text = rsubn(text, "w", "v") -- Replace xw clusters text = rsubn(text, "xv([āē])", "x%1") text = rsubn(text, "xva", "xu") text = rsubn(text, "ʷ", "") --ensure vowels are paired to a consonant text = rsubn(text, "([.])(["..vowels.."])", "%1'%2") text = rsubn(text, "([.])", "") text = rsubn(text, "ḍ", "z") text = rsubn(text, "ṯ", "s") text = rsubn(text, "ṭ", "t") text = rsubn(text, "(['])", "ʾ") text = rsubn(text, "ṣ", "s") text = rsubn(text, "ḥ", "h") text = rsubn(text, "ğ", "ġ") text = rsubn(text, "ē", "e") text = rsubn(text, "ō", "ü") text = rsubn(text, "ā", "o") -- remove Hazaragi retroflexes text = rsubn(text, "D", "d") text = rsubn(text, "T", "t") text = rsubn(text, "ɖ", "d") text = rsubn(text, "ʈ", "t") -- Tajik does not have vowel length text = rsubn(text, "([iī])", "i") text = rsubn(text, "`(["..all_consonants.."])i", "%1ī") text = rsubn(text, "([ūu])", "u") text = rsubn(text, "`", "") text = rsubn(text, "#(['])", "") text = rsubn(text, "#", "") return text end

local function one_term_ipa(text, style)

if style == "cls" then text = export.fa_cls_IPA(text) elseif style == "prs" then text = export.prs_IPA(text) elseif style == "kbl" then text = export.prs_kbl_IPA(text) or export.prs_IPA(text) --should ignore conversion if specified elseif style == "haz" then text = export.prs_haz_IPA(text) elseif style == "fa" then text = export.fa_IPA(text) elseif style == "teh" then text = export.fa_IPA(text) elseif style == "tg" then text = export.tg_IPA(text) end

return text end

-- style == one of the following: -- "cls": Classical Persian -- "prs": Dari Persian -- "kbl": Kabuli -- "haz": Hazaragi -- "fa": Iranian Persian -- "teh": Tehrani -- "tg": Tajik function export.IPA(text, style)

local variants = {text}

local function apply_sub(from, to1, to2) return function(item) if rfind(item, from) then if to2 then return {rsub(item, from, to1), rsub(item, from, to2)} else return {rsub(item, from, to1)} end else return {item} end end end

local function call_one_term_ipa(variant) local result = local function apply_sub(item, from, to1, qual1, to2, qual2) if rfind(item.phonemic, from) or rfind(item.phonetic, from) then return { {						phonemic = rsub(item.phonemic, from, to1), qualifiers = qual1, },					{						phonemic = rsub(item.phonemic, from, to2), qualifiers = qual2, },				}			else return {item} end end

return result end

return flatmap(variants, call_one_term_ipa) end

function export.express_styles(inputs, args_style) local pronuns_by_style = {} local expressed_styles = {}

local function dostyle(style) pronuns_by_style[style] = {} for _, val in ipairs(inputs[style]) do			local pronuns = export.IPA(val, style) for _, pronun in ipairs(pronuns) do				table.insert(pronuns_by_style[style], pronun) end end end

local function all_available(styles) local available_styles = {} for _, style in ipairs(styles) do			if pronuns_by_style[style] then table.insert(available_styles, style) end end return available_styles end

local function express_style(hidden_tag, tag, styles, indent) indent = indent or 1 if hidden_tag == true then hidden_tag = tag end if type(styles) == "string" then styles = {styles} end styles = all_available(styles) if #styles == 0 then return end local style = styles[1]

-- If style specified, make sure it matches the requested style. local style_matches if not args_style then style_matches = true else local or_styles = rsplit(args_style, "%s*,%s*") for _, or_style in ipairs(or_styles) do				local and_styles = rsplit(or_style, "%s*%+%s*") local and_matches = true for _, and_style in ipairs(and_styles) do					local negate if and_style:find("^%-") then and_style = and_style:gsub("^%-", "") negate = true end local this_style_matches = false for _, part in ipairs(styles) do						if part == and_style then this_style_matches = true break end end if negate then this_style_matches = not this_style_matches end if not this_style_matches then and_matches = false end end if and_matches then style_matches = true break end end end if not style_matches then return end

local new_style = { tag = tag, represented_styles = styles, pronuns = pronuns_by_style[style], indent = indent, }		for _, hidden_tag_style in ipairs(expressed_styles) do			if hidden_tag_style.tag == hidden_tag then table.insert(hidden_tag_style.styles, new_style) return end end table.insert(expressed_styles, {			tag = hidden_tag,			styles = {new_style},		}) end

for style, _ in pairs(inputs) do		dostyle(style) end

local function diff(style1, style2) if not pronuns_by_style[style1] or not pronuns_by_style[style2] then return true end return not m_table.deepEquals(pronuns_by_style[style1], pronuns_by_style[style2]) end

local fa_teh_different = diff("fa", "teh") local prs_kbl_different = diff("prs", "kbl") local prs_haz_different = diff("prs", "haz")

-- Classical Persian express_style("Classical Persian",				 "Classical Persian", "cls")

-- Dari Persian express_style("Dari, formal/KBL",				 "Dari, formal/KBL", "prs") if prs_kbl_different then express_style("Dari, formal/KBL", "Kabuli", "kbl", 2) end if prs_haz_different then express_style("Dari, formal/KBL", "Hazaragi/approx.", "haz", 2) end --"approx" because there are still a few small kinks to be worked out

-- Iranian Persian express_style("Iran, formal/TEH",				 "Iran, formal/TEH", "fa") if fa_teh_different then express_style("Iran, formal/TEH", "Tehrani", "teh",					 2) end

-- Tajik express_style("Tajik, formal/DUS",				 "Tajik, formal/DUS", "tg")

return expressed_styles end

function export.show(frame) -- Create parameter specs local params = { [1] = {}, -- this replaces style group 'all' ["pre"] = {}, ["post"] = {}, ["ref"] = {}, ["style"] = {}, ["bullets"] = {type = "number", default = 1}, }	for group, _ in pairs(export.all_style_groups) do		if group ~= "all" then params[group] = {} end end for _, style in ipairs(export.all_styles) do		params[style] = {} end

-- Parse arguments local parargs = frame:getParent.args local args = require("Module:parameters").process(parargs, params)

-- Set inputs local inputs = {} -- If 1= specified, do all styles. if args[1] then for _, style in ipairs(export.all_styles) do			inputs[style] = args[1] end end -- Then do remaining style groups other than 'all', overriding 1= if given. for group, styles in pairs(export.all_style_groups) do		if group ~= "all" and args[group] then for _, style in ipairs(styles) do				inputs[style] = args[group] end end end -- Then do individual style settings. for _, style in ipairs(export.all_styles) do		if args[style] then inputs[style] = args[style] end end -- If no inputs given, set all styles based on current pagename. if not next(inputs) then local text = mw.title.getCurrentTitle.text for _, style in ipairs(export.all_styles) do			inputs[style] = text end end

for style, input in pairs(inputs) do		inputs[style] = rsplit(input, ",") end local expressed_styles = export.express_styles(inputs, args.style)

local lines = {}

local function format_style(tag, expressed_style, is_first) local pronunciations = {} local formatted_pronuns = {} for _, pronun in ipairs(expressed_style.pronuns) do			table.insert(pronunciations, {				pron = "[" .. pronun.phonemic .. "]",				qualifiers = pronun.qualifiers,			}) local formatted_phonemic = "[" .. pronun.phonemic .. "]"			if pronun.qualifiers then formatted_phonemic = "(" .. table.concat(pronun.qualifiers, ", ") .. ") " .. formatted_phonemic end table.insert(formatted_pronuns, formatted_phonemic) end -- Number of bullets: When indent = 1, we want the number of bullets given by `args.bullets`, -- and when indent = 2, we want `args.bullets + 1`, hence we subtract 1. local bullet = string.rep("*", args.bullets + expressed_style.indent - 1) .. " "		-- Here we construct the formatted line in `formatted`, and also try to construct the equivalent without HTML -- and wiki markup in `formatted_for_len`, so we can compute the approximate textual length for use in sizing -- the toggle box with the "more" button on the right. local pre = is_first and args.pre and args.pre .. " " or "" local pre_for_len = pre .. (tag and "(" .. tag .. ") " or "") pre = pre .. (tag and m_qual.format_qualifier(tag) .. " " or "") local post = is_first and (args.ref or "") .. (args.post and " " .. args.post or "") or "" local formatted = bullet .. pre .. m_IPA.format_IPA_full { lang = lang, items = pronunciations } .. post local formatted_for_len = bullet .. pre .. "IPA(key): " .. table.concat(formatted_pronuns, ", ") .. post return formatted, formatted_for_len end

for i, style_group in ipairs(expressed_styles) do		if #style_group.styles == 1 then style_group.formatted, style_group.formatted_for_len = format_style(style_group.styles[1].tag, style_group.styles[1], i == 1) else style_group.formatted, style_group.formatted_for_len = format_style(style_group.tag, style_group.styles[1], i == 1) for j, style in ipairs(style_group.styles) do				style.formatted, style.formatted_for_len = format_style(style.tag, style, i == 1 and j == 1) end end end

local function textual_len(text) text = rsub(text, "<.->", "") return ulen(text) end

local maxlen = 0 for i, style_group in ipairs(expressed_styles) do		local this_len = textual_len(style_group.formatted_for_len) if #style_group.styles > 1 then for _, style in ipairs(style_group.styles) do				this_len = math.max(this_len, textual_len(style.formatted_for_len)) end end maxlen = math.max(maxlen, this_len) end

for i, style_group in ipairs(expressed_styles) do		if #style_group.styles == 1 then table.insert(lines, " \n" .. style_group.formatted .. " ") else local inline = '\n \n' .. style_group.formatted .. " "			local full_prons = {} for _, style in ipairs(style_group.styles) do				table.insert(full_prons, style.formatted) end local full = '\n \n' .. table.concat(full_prons, "\n") .. " "			local em_length = math.floor(maxlen * 0.68) -- from Module:grc-pronunciation table.insert(lines, ' ' .. inline .. full .. " ") end end -- major hack to get bullets working on the next line return table.concat(lines, "\n") .. "\n " end

return export