Module:User:AmazingJus/af

local export = {}

local lang = require("Module:languages").getByCode("af") local sc = require("Module:scripts").getByCode("Latn") local hyph = require("Module:hyphenation") local str = require("Module:string") local tbl = require("Module:table")

function export.tag_text(text, face) return require("Module:script utilities").tag_text(text, lang, sc, face) end

function export.link(term, face) return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face ) end

local u = require("Module:string/char") local decomp = mw.ustring.toNFD local recomp = mw.ustring.toNFC local lower = mw.ustring.lower

local find = mw.ustring.find local len = mw.ustring.len local match = mw.ustring.match local sub = mw.ustring.sub

local rsubn = mw.ustring.gsub local rmatch = mw.ustring.gmatch

-- version of rsubn that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end

-- apply rsub repeatedly until no change local function rsub_repeatedly(term, foo, bar) while true do		local new_term = rsub(term, foo, bar) if new_term == term then return term end term = new_term end end

-- list of constants local GR = u(0x0300) -- grave local AC = u(0x0301) -- acute local CR = u(0x0302) -- circumflex local DR = u(0x0308) -- diaresis local accents = GR .. AC .. CR .. DR local vowels = "aeiouyAEIOUY" local cons = "bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQSTVWXZ" local syll_boundary = "‧#"

-- list of valid trigraphs and digraphs, including diphthongs and long vowels local graphemes = { ["aai"] = "ɑːɪ̯", ["eeu"] = "iʊ̯", ["ieu"] = "iʊ̯", ["oei"] = "uɪ̯", ["ooi"] = "oːɪ̯", ["aa"] = "ɑː", ["ae"] = "ɑː", ["ai"] = "aɪ̯", ["au"] = "œʊ̯", ["ee"] = "ɪə̯", ["ei"] = "əɪ̯", ["eu"] = "iʊ̯", ["ie"] = "į", -- temporary value ["oe"] = "ů", -- temporary value ["oi"] = "ɔɪ̯", ["oo"] = "ʊə̯", ["ou"] = "œʊ̯", ["ui"] = "uɪ̯", ["uu"] = "ü" -- temporary value } -- sort trigraphs and digraphs in descending order local graphemes_sorted = {} for k, _ in pairs(graphemes) do	table.insert(graphemes_sorted, k) end table.sort(graphemes_sorted, function(a, b) return len(a) > len(b) end)

-- list of various grapheme sets local sets = { ["vowel_length"] = { -- long-short vowels ["a"] = {"a", "ɑː"}, ["e"] = {"ɛ", "ɪə̯"}, ["i"] = {"ə", "i"}, ["o"] = {"ɔ", "ʊə̯"}, ["u"] = {"œ", "y"} },	["cons_voice"] = { -- voiced/voiceless consonants {"b", "p"}, {"d", "t"}, {"ʤ", "ʧ"}, {"ɡ", "k"}, {"v", "f"}, {"z", "s"}, {"ʒ", "ʃ"}, } }

-- list of defined affixes local affixes = { ["prefixes"] = { -- prefixes "aan", "agter", "be", "deur", "er", "ge", "her", "om", "ont", "onder", "ver", "voor" },	["suffixes"] = { -- suffixes "agtig", "baar", "dom", "end", "heid", "lik", "loos", "nis", "sel", "skap", } } -- sort prefixes and suffixes in ascending order table.sort(affixes.prefixes, function(a, b) return len(a) < len(b) end) table.sort(affixes.suffixes, function(a, b) return len(a) < len(b) end)

-- list of unstressed words local unstressed = { "die", "dit", "is", "nie", "'n" }

-- list of stressed endings found in loanwords local stressed_endings = { "aal", "aan", "aans", "aar", "aard", "aat", "am", "ant", "at", "ee", "eel", "eem", "een", "eer", "ees", "eet", "ein", "ek", "el", -- "-el" only in loanwords "ent", "es", "et", "eur", "eus", "eut", "ieel", "ief", "iek", "iel", "iem", "ien", "ine", "ier", "iet", "o", -- "-o" only in french loanwords "oen", "on", "oof", "oog", "ooi", "ool", "oom", "oon", "oor", "teek", "teit", "u", "uum", "uur", "uus", "uut", "y", "yn", "ys" }

-- list of respelling substitutions local subs = { -- 'N	{"#'n#", "#ə(n)#", "-"}, -- pronounced /ə(n)/ as the article 'n	{"'n#", "ən#", "-"}, -- pronounced /ən/ otherwise

-- CH	{"ch", "ʃ", "fr"}, -- pronounced /ʃ/ in french loans {"sch", "sk", "-"}, -- pronounced /sk/ in the sequence "sch" {"ch([" .. cons .. "]?[ei])", "χ%1", "-"}, -- pronounced /χ/ before optional consonant cluster and "e" or "i" {"ch", "k", "-"}, -- otherwise /k/

-- NG	{"ng", "ŋ", "-"}, -- pronounced /ŋ/

-- SH/SJ {"s[hj]", "ʃ", "-"}, -- pronounced /ʃ/

-- DJ/TJ {"[dt]jie", "kį", "-"}, -- pronounced /-ci/ in the suffix "-djie"/"-tjie" {"dj", "ʤ", "-"}, -- "dj" is otherwise /d͡ʒ/ {"tj", "ʧ", "-"}, -- "tj" is otherwise /t͡ʃ/

-- C	{"c([ei])", "s%1", "-"}, -- pronounced /s/ before "e" or "i" {"c", "k", "-"}, -- otherwise /k/

-- GH	{"gh", "ɡ", "-"}, -- pronounced /ɡ/

-- G	{"g", "ɡ", "en"}, -- pronounced /ɡ/ in english loans {"r‧ge", "r‧ɡe", "-"}, -- pronounced /ɡ/ between /r/ and /ə/ {"g", "χ", "-"}, -- otherwise /χ/ {"n(‧?[kɡ])", "ŋ%1", "-"}, -- /ŋ/ is an allophone of /n/ before /ɡ/ and /k/

-- V	{"v", "f", "af"}, -- pronounced /f/ in native words

-- W	{"w", "w", "en"}, -- pronounced /w/ in english loans {"w", "v", "-"}, -- otherwise /v/

-- EAU {"eaux?", "OU", "fr"}, -- pronounced /œʊ̯/ in french loans

-- OI	{"oi", "wA", "fr"}, -- pronounced /wa/ in french loans

-- IJ {"ij([^" .. vowels .. "])", "EI%1", "-"}, -- pronounced /əɪ̯/ in dutch-based names

-- X	{"#x", "#s", "-"}, -- pronounced /s/ word-initially {"x", "ks", "-"}, -- otherwise /ks/

-- H	{"([" .. cons .. vowels .. "])h", "%1", "-"}, -- silent if part of consonant digraph or syllable-final {"h", "ɦ", "-"}, -- otherwise /ɦ/

-- O	{"o([" .. syll_boundary .. "])", "OU%1", "en"}, -- pronounced /œʊ̯/ in open syllables in english loans {"o#", "ů#", "-"}, -- otherwise /u/ in word-final position

-- U	{"u([" .. cons .. "])", "A%1", "en"}, -- pronounced /a/ in closed syllables in english loans {"u", "jů", "en"}, -- otherwise /ju/ in english loans

-- Y	{"y", "j", "EN"}, -- pronounced /j/ in english loans {"y", "EI", "-"}, -- otherwise /əɪ̯/

-- circumflex accent {CR, "ː", "-"} -- lengthens a vowel with its short quality }

-- syllabification function local function syllabify(term, orig, pos) -- decompose accents term = decomp(term)

-- remove diaresis and split syllable (note: diaresis shouldn't be displayed in its hyphenation form) term = rsub(term, "([" .. vowels .. "])" .. DR, "‧%1")

-- mark trigraphs and digraphs with angle brackets for _, graph in ipairs(graphemes_sorted) do term = rsub(term, graph, "{" .. graph .. "}") end

-- add > and < for prefix and suffixes respectively for _, prefix in ipairs(affixes.prefixes) do if find(term, "#" .. prefix) then term = rsub(term, "#" .. prefix, "#" .. prefix .. ">") break end end for _, suffix in ipairs(affixes.suffixes) do if find(term, suffix .. "#") then term = rsub(term, suffix .. "#", "<" .. suffix .. "#") break end end

-- add dot before consonant + vowel term = rsub(term, "([" .. cons .. "]?{?[" .. vowels .. "][" .. accents .. "]?)", "‧%1")

-- remove any dots inside brackets term = rsub(term, "{[^}]*}", function(a) return rsub(a, "‧", "") end)

-- shift dot before certain consonant clusters and digraphs term = rsub(term, "([bcfgkpvw])‧l", "‧%1l") -- clusters with l	term = rsub(term, "([bcdfgkptwv])‧r", "‧%1r") -- clusters with r	term = rsub(term, "([dst])‧j", "‧%1j") -- digraphs with j	term = rsub(term, "([ckgt])‧h", "‧%1h") -- digraphs with h	term = rsub(term, "n‧g", "ng‧") -- ng is syllable-final term = rsub(term, ">s‧", ">‧s") -- s can form a cluster after a prefix

-- remove leading dots and brackets term = rsub(term, "#([^" .. vowels .. "]*)‧", "#%1")	term = rsub(term, "%.", "‧") term = rsub(term, "[{}+]", "") -- comment out to debug return rsub(term, "‧+", "‧") end

-- hyphenation function function export.hyphenation(term, orig, pos) -- get user input as table if type(term) == "table" then term = term.args[1] end

-- mark all word borders term = rsub(term, "([^ ]+)", "#%1#")

-- format hyphenation -- local data = { lang = lang, sc = sc, hyphs = }

-- return hyphen.format_hyphenations(data) return rsub(recomp(syllabify(term)), "[#%[%]<>]", "") end

-- stress assignment function local function stress(term, orig, pos) -- words with certain endings are syllable-final stressed for _, ending in ipairs(stressed_endings) do if find(term, ending .. "#") then if ending == "el" then -- "-el" is only stressed in loanwords if not orig and orig ~= "af" then return rsub(term, ending .. "#", "ˈ" .. ending .. "#") else break end elseif ending == "o" then -- "-o" is only stressed in french loanwords if orig == "fr" then return rsub(term, ending .. "#", "ˈ" .. ending .. "#") else break end else return rsub(term, ending .. "#", "ˈ" .. ending .. "#") end end end

-- add stress mark to first syllable if no ending was stressed return rsub(term, "^#", "#ˈ") end

-- pronunciation function local function pron(term, orig, pos) -- make text lowercase term = lower(term)

-- mark word borders with # term = rsub(term, "([^ ]+)", "#%1#")

-- syllabify term term = syllabify(term, orig, pos)

-- add stress to term term = stress(term, orig, pos)

-- shift stress rightwards to a syllable boundary term = rsub(term, "([^" .. syll_boundary .. "]*)ˈ", "ˈ%1")

-- substitute phonemes local subbed = {} for _, s in ipairs(subs) do		if not subbed[s[1]] then if orig ~= "-" and s[3] == orig then term = rsub(term, s[1], s[2]) subbed[s[1]] = true elseif s[3] == "-" then term = rsub(term, s[1], s[2]) subbed[s[1]] = true end end end

-- make text lowercase again term = lower(term)

-- substitute graphemes for graph, phoneme in pairs(graphemes) do		term = rsub(term, graph, phoneme) end

-- substitute single-letter vowels term = rsub(term, "([aeiou])([‧#ː" .. cons .. "])", function(a, b)		if match("[‧#]", b) then			return sets.vowel_length[a][2] .. b -- for open syllables		else			return sets.vowel_length[a][1] .. b -- for closed syllables		end	end)

-- replace į, ů, ü with their actual phonetic values term = rsub(term, "[įůü]", {["į"] = "i", ["ů"] = "u", ["ü"] = "y"})

-- remove double consonants term = rsub(term, "(.)(‧?)%1", "%2%1")

-- final adjustments term = rsub(term, "‧", ".") return rsub(term, "[#%[%]]", "") end

-- main export function function export.toIPA(term, orig, pos) -- get user input as table if type(term) == "table" then term = term.args[1] end

return pron(term, orig, pos) end

return export