Module:hi-IPA

local export = {}

local lang = require("Module:languages").getByCode("hi") local sc = require("Module:scripts").getByCode("Deva") local m_IPA = require("Module:IPA") local m_a = require("Module:accent qualifier")

local m_str_utils = require("Module:string utilities")

local find = m_str_utils.find local gcodepoint = m_str_utils.gcodepoint local gmatch = m_str_utils.gmatch local gsub = m_str_utils.gsub local u = m_str_utils.char

local correspondences = { ["ṅ"] = "ŋ", ["g"] = "ɡ", ["c"] = "t͡ʃ", ["j"] = "d͡ʒ", ["ṭ"] = "ʈ", ["ḍ"] = "ɖ", ["ṇ"] = "ɳ", ["t"] = "t̪", ["d"] = "d̪", ["y"] = "j", ["r"] = "ɾ", ["v"] = "ʋ", ["ś"] = "ʃ", ["ṣ"] = "ʂ", ["ź"] = "ʒ", ["ž"] = "ʒ", ["h"] = "ɦ", ["ṛ"] = "ɽ", ["ẓ"] = "ʒ", ["ḷ"] = "l", ["ḻ"] = "l", ["ġ"] = "ɣ", ["q"] = "q", ["x"] = "x", ["ṉ"] = "n", ["ṟ"] = "ɾ",

["a"] = "ə", ["ā"] = "ɑː", ["i"] = "ɪ", ["ī"] = "iː", ["o"] = "oː", ["e"] = "eː", ["u"] = "ʊ", ["ū"] = "uː", ["ŏ"] = "ɔ", ["ĕ"] = "æ",

["ẽ"] = "ẽː", ["ũ"] = "ʊ̃", ["õ"] = "õː", ["ã"] = "ə̃", ["ā̃"] = "ɑ̃ː", ["ĩ"] = "ɪ̃", ["ī̃"] = "ĩː",

["ॐ"] = "oːm", ["ḥ"] = "(ɦ)", ["'"] = "(ʔ)", }

local perso_arabic = { ["x"] = "kh", ["ġ"] = "g", ["q"] = "k", ["ź"] = "z", ["z"] = "j", ["f"] = "ph", ["'"] = "", }

local urdu = { ["ṣ"] = "ʃ", ["ṇ"] = "n", }

local deccani = { ["q"] = "x", }

local lengthen = { ["a"] = "ā", ["i"] = "ī", ["u"] = "ū", }

local vowels = "aāiīuūoǒŏěĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃ː" local vowel = "[aāiīuūoǒŏěĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃]ː?" local weak_h = "([gjdḍbṛnm])h" local aspirate = "([kctṭp])" local syllabify_pattern = "([" .. vowels .. "]̃?)([^" .. vowels .. "%.%-]+)([" .. vowels .. "]̃?)"

local function find_consonants(text) local current = "" local cons = {} for cc in gcodepoint(text .. " ") do		local ch = u(cc) if find(current .. ch, "^[kgṅcjñṭḍṇtdnpbmyrlvśṣshqxġzžḻṛṟfθṉḥ]$") or find(current .. ch, "^[kgcjṭḍtdpbṛ]h$") then current = current .. ch		else table.insert(cons, current) current = ch		end end return cons end

local function syllabify(text) for count = 1, 2 do		text = gsub(text, syllabify_pattern, function(a, b, c)			b_set = find_consonants(b)			table.insert(b_set, #b_set > 1 and 2 or 1, ".")			return a .. table.concat(b_set) .. c			end) text = gsub(text, "(" .. vowel .. ")(?=" .. vowel .. ")", "%1.")	end for count = 1, 2 do text = gsub(text, "(" .. vowel .. ")(" .. vowel .. ")", "%1.%2")	end -- syllabification corrections -- ([^.]) is added in front, just in case one of the (unlikely) clusters -- would occur after a blank space (temporarily reformatted as '..') text = gsub(text, '([^.])%.([kqgcjṭḍtdpb])(h?)([kqgcjṭḍtdpbxġfnɳmsśzź])', '%1%2%3.%4') text = gsub(text, '([^.])%.([qgcjṭḍtdpb])(h?)ṣ', '%1%2%3.ṣ') text = gsub(text, '([^.])%.khṣ', '%1kh.ṣ') 						-- not kṣ/क्ष text = gsub(text, '([^.])%.([xġfnɳmzźyrlv])([kqgcjṭḍtdpbxġfnɳmsśṣzźh])', '%1%2.%3') text = gsub(text, '([^.])%.([sśṣ])([gjḍdbġsśṣzźh])', '%1%2.%3') return text end

local identical = "knlsfzθ" for character in gmatch(identical, ".") do	correspondences[character] = character end

local function transliterate(text) return (lang:transliterate(text)) end

function export.link(term) return require("Module:links").full_link{ term = term, lang = lang, sc = sc } end

function export.toIPA(text, style) text = gsub(text, '॰', '-') local translit = text if lang:findBestScript(text):isTransliterated then translit = transliterate(text) end if not translit then error('The term "' .. text .. '" could not be transliterated.') end if style == "nonpersianized" then translit = gsub(translit, "[xġqźzf']", perso_arabic) end

if style == "dakhini" then translit = gsub(translit, "[q]", deccani) end -- force final schwa for Hindi translit = gsub(translit, "a~$", "ə")

if style == "desanskritize" then translit = gsub(translit, "(...)ə$", "%1ɑ(ː)") translit = gsub(translit, "[ṣṇ]", urdu) end -- vowels translit = gsub(translit, "͠", "̃") translit = gsub(translit, 'a(̃?)i', 'ɛ%1ː') translit = gsub(translit, 'a(̃?)u', 'ɔ%1ː') translit = gsub(translit, "%-$", "") translit = gsub(translit, "^%-", "") translit = gsub(translit, "ŕ$", "r") translit = gsub(translit, "ŕ(" .. vowel .. ")", "r%1") translit = gsub(translit, "ŕ", "ri") translit = gsub(translit, 'jñ', 'gy') translit = gsub(translit, ",", "") translit = gsub(translit, " ", "..") translit = syllabify(translit) translit = gsub(translit, "%.ː", "ː.") translit = gsub(translit, "%.̃", "̃")

translit = gsub(translit, aspirate .. "h", '%1ʰ') translit = gsub(translit, weak_h, '%1ʱ') local result = gsub(translit, ".", correspondences) -- remove final schwa (Pandey, 2014) -- actually weaken result = gsub(result, "(...)ə$", "%1ᵊ") result = gsub(result, "(...)ə ", "%1ᵊ ") result = gsub(result, "(...)ə%.?%-", "%1ᵊ-") -- formatting result = gsub(result, "%.?%-", ".") result = gsub(result, "%.%.", " ") result = gsub(result, "ː̃", "̃ː") result = gsub(result, "ː%.̃", "̃ː.") result = gsub(result, "%.$", "") -- ñ result = gsub(result, "ñ", "n")

-- i and u lengthening result = gsub(result, "ʊ(̃?)(ɦ?)$", "u%1ː%2") result = gsub(result, "ɪ(̃?)(ɦ?)$", "i%1ː%2") -- deaffricate first affricate in geminates result = gsub(result, "t͡ʃ(%.?)t͡ʃ", "t̪%1t͡ʃ") result = gsub(result, "d͡ʒ(%.?)d͡ʒ", "d̪%1d͡ʒ") -- silent h in 'lh-', 'vh-' (Ohala 1983, p.45) result = gsub(result, "^([lʋ])ɦ", "%1") result = gsub(result, "([ .])([lʋ])ɦ", "%1%2") result = gsub(result, "ɛː(%.?)j", function(a)		local res = "ə̯i"		res = res .. a .. "j"		return res	end) result = gsub(result, "ɔː(%.?)ʋ", function(a)		local res = "ə̯u"		res = res .. a .. "ʋ"		return res	end) return result end

function export.narrow_IPA(ipa) -- what /ɑ/ and /ə/ really are ipa = gsub(ipa, 'ɑ', 'ä') ipa = gsub(ipa, 'ə', 'ɐ') -- uvular /x/, /ɣ/ ?? -- ipa = gsub(ipa, 'x', 'χ') -- ipa = gsub(ipa, 'ɣ', 'ʁ') -- retroflex s rules ipa = gsub(ipa, 'ʂ(%.?)([^ʈɖ.])', 'ʃ%1%2') ipa = gsub(ipa, 'ʂ$', 'ʃ') -- nasal allophones ipa = gsub(ipa, 'ŋ(%.?)([qχʁ])', 'ɴ%1%2') ipa = gsub(ipa, 'n%.j', 'ɲ.j') ipa = gsub(ipa, '[nɳ](%.?)ʃ', 'ɲ%1ʃ') -- this nasal is likely more front than before /j/, but not doing a too narrow transcription seems preferable ipa = gsub(ipa, 'n(%.?)([td])̪', 'n̪%1%2̪') ipa = gsub(ipa, 'm(%.?)f', 'ɱ%1f') -- nasals induce nasalization ipa = gsub(ipa, '([ɐäɪiʊueɛoɔæ])(ː?)([nɳɲŋɴmɱ])', '%1̃%2%3') -- cc, jj	ipa = gsub(ipa, 't̪(%.?)t͡ʃ', 't̚%1t͡ʃ') ipa = gsub(ipa, 'd̪(%.?)d͡ʒ', 'd̚%1d͡ʒ') -- syllable boundary consonants ipa = gsub(ipa, '([kɡ])%.([kɡ])', '%1̚.%2') ipa = gsub(ipa, '([ʈɖ])%.([ʈɖ])', '%1̚.%2') ipa = gsub(ipa, '([td]̪?)%.([tdn])', '%1̚.%2') ipa = gsub(ipa, '([pb])%.([pb])', '%1̚.%2') -- aspiration rules ipa = gsub(ipa, 'ɐɦ([%. ])', 'ɛɦ%1') ipa = gsub(ipa, 'ɐɦ$', 'ɛɦ') ipa = gsub(ipa, 'ɐ%.ɦɐ', 'ɛ.ɦɛ') ipa = gsub(ipa, 'ɐ%(ɦ%)', 'ɛ(ɦ)') ipa = gsub(ipa, 'ʊɦ%.', 'ɔɦ.') ipa = gsub(ipa, 'ʊ%.ɦɐ', 'ɔ.ɦɔ') ipa = gsub(ipa, 'ɐ%.ɦʊ', 'ɔ.ɦɔ') ipa = gsub(ipa, '([ɐäɪiʊueɛoɔæ])(̃?)(ː?)ɦ', '%1%2%3ʱ') -- v/w ipa = gsub(ipa, '([kɡŋtdɲʈɖɳnpbm]̪?%.?)ʋ', '%1w')

-- geminate /ɾ/ is trill ipa = gsub(ipa, "ɾ%.ɾ", "r.r") -- for onomatopeic words ending on -र्र ipa = gsub(ipa, "ɾɾ", "rː") -- final geminates often pronounced as singletons ipa = gsub(ipa, "kk", "k(ː)") ipa = gsub(ipa, "ɡɡ", "ɡ(ː)") ipa = gsub(ipa, "ʈʈ", "ʈ(ː)") ipa = gsub(ipa, "ɖɖ", "ɖ(ː)") ipa = gsub(ipa, "ɳɳ", "ɳ(ː)") ipa = gsub(ipa, "t̪t̪", "t̪(ː)") ipa = gsub(ipa, "d̪d̪", "d̪(ː)") ipa = gsub(ipa, "nn", "n(ː)") ipa = gsub(ipa, "pp", "p(ː)") ipa = gsub(ipa, "bb", "b(ː)") ipa = gsub(ipa, "mm", "m(ː)") ipa = gsub(ipa, "ll", "l(ː)") -- final cc, jj	ipa = gsub(ipa, "t̚t͡ʃ", "(t̚)t͡ʃ") ipa = gsub(ipa, "d̚d͡ʒ", "(d̚)d͡ʒ") ipa = gsub(ipa, "ɪ%.j", "i.j") ipa = gsub(ipa, " ", "‿") return ipa end

function export.make(frame) local args = frame:getParent.args local pagetitle = mw.title.getCurrentTitle.text local p, results = {}, {}, {} if args[1] then for index, item in ipairs(args) do			table.insert(p, (item ~= "") and item or nil) end else p = { pagetitle } end for _, Hindi in ipairs(p) do		local persianized = export.toIPA(Hindi, "persianized") local nonpersianized = export.toIPA(Hindi, "nonpersianized") table.insert(results, { pron = "/" .. persianized .. "/" }) local narrow = export.narrow_IPA(persianized) if narrow ~= persianized then table.insert(results, { pron = "[" .. narrow .. "]" }) end if persianized ~= nonpersianized then table.insert(results, { pron = "/" .. nonpersianized .. "/" }) local narrow = export.narrow_IPA(nonpersianized) if narrow ~= nonpersianized then table.insert(results, { pron = "[" .. narrow .. "]" }) end end end return m_a.format_qualifiers(lang, {"Delhi"}) .. " " .. m_IPA.format_IPA_full { lang = lang, items = results } end

function export.make_ur(frame) local args = frame:getParent.args local pagetitle = mw.title.getCurrentTitle.text local lang = require("Module:languages").getByCode("ur") local sc = require("Module:scripts").getByCode("ur-Arab") local p, results = {}, {}, {} if args[1] then for index, item in ipairs(args) do			table.insert(p, (item ~= "") and item or nil) end else error("No transliterations given.") end for _, Urdu in ipairs(p) do		local desanskritize = export.toIPA(Urdu, "desanskritize") table.insert(results, { pron = "/" .. desanskritize .. "/" }) end return m_a.format_qualifiers(lang, {"ur"}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = results } end

function export.make_deccani(frame) local args = frame:getParent.args local pagetitle = mw.title.getCurrentTitle.text local lang = require("Module:languages").getByCode("ur") local sc = require("Module:scripts").getByCode("ur-Arab") local p, results = {}, {}, {} if args[1] then for index, item in ipairs(args) do			table.insert(p, (item ~= "") and item or nil) end else error("No transliterations given.") end for _, Urdu in ipairs(p) do		local dakhini = export.toIPA(Urdu, "dakhini") table.insert(results, { pron = "/" .. dakhini .. "/" }) end return m_a.format_qualifiers(lang, {"Deccani"}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = results } end

return export