Module:cy-IPA/sandbox

--[=====[ Currently missing: consonant combinations should not remaining distinct from aspirated p, t, k progressive or regressive assimilation depending on vowel length. This ensures it stays separate from u /ɨ/ or /ɨː/ considered one letter) especially in compound words suffix such as -(h)áu or the word is a recent loanword  is by taking South Welsh as normative wrt. vowel length and North Welsh as  normative wrt vowel quality (and length in diphthongs). Whichever of the two  has a long vowel before a cluster should be normative in this respect.  predictable/automatable, e.g. words with atypical short and long vowels or stress  ways to override this, e.g. by adding a circumflex to long vowels and a grave to short vowels.  Some defaults:  - vowels should be short if unstressed or /ə/  - vowels should be long in a stressed open syllable (unless non-final in North Welsh)  - vowels should be long in a stressed final syllable before /b, ch, d, dd, g, f, ff, g, h, l, n, r, ph, s, th/  - note that exceptions to the above are common for /l, n, r/  - vowels should also be long in stressed open syllables before /b, ch, d, dd, g, f, ff, g, h, l, n, r, ph, th/ but NOT /s/ (except in North Wales, where all non-final vowels are short) - all other vowels should be short, especially when an aspirated stop and some liquid consonants follow /c, m, ll, ng, nn, p, rr, t/ - vowels should generally be short before clusters, with well-defined exceptions - Vowels in North Welsh are long in stressed final syllables before /sC, ɬC/ clusters - should form part of the norm with South Welsh automatically derived from it - Diphthongs with long vowels in North Welsh (only in final syllables) include ae /ɑːɨ̯, eːɨ̯/, aw /ɑːu̯/, ew /eːu̯/, ey /e.ɨ̯/, oe /ɔːɨ̯/, ou /ɔːɨ̯/- should form part of the norm with South Welsh automatically derived from it - syllables with secondary stress should be treated as if stressed --]=====]
 * Dialects: should be include North Wales, South Wales and standard vs. colloquial variants of each. These parameters are optional when no difference
 * ch, dd, ff, ng, ll, ph, th should be treated as single letters, all other
 * Function for de-aspiration of b, d, g in /sC/ clusters and word finally, but
 * Function to treat all voicing assimilation as becomong voiceless rather than
 * Function for pre-consonantal obstruent devoicing of d, g, b, s
 * y should be treated as /ə/, unless in a final syllable when it is /ɨ/ or /ɨː/
 * y (in final syllables) and u merge with i in South Wales, including in diphthongs
 * ae should be treated as /ɑːɨ/ in final syllables and /eːɨ̯/ elsewhere
 * ng is usually ŋ (marked for alphabetisation as g~ already) but may be ŋɡ (not
 * Function to reduce double letters, after appropriate consideration of effects on vowel length
 * Function to convert ⟨ai, au⟩ in final unstressed syllables to /ɛ/ in colloquial Welsh
 * Function to convert ⟨ai, au, e⟩ in final unstressed syllables to /a/ in colloquial Northern Welsh
 * Many other dipthongs (including stressed) are smoothed in South Welsh - need to research
 * Rule to determine stress - always penultimate syllable, unless there is a stressed
 * Rules to determine when to make vowels short vs. long. The best way to do this
 * An input whether the word is a recent loan from Englsh might make a lot of exceptions
 * Rules to determine when to make vowels short vs. long. There will need to be

local export = {}

local u = mw.ustring.char local rfind = mw.ustring.find local rsubn = mw.ustring.gsub local rmatch = mw.ustring.match local rsplit = mw.text.split local ulower = mw.ustring.lower local uupper = mw.ustring.upper local usub = mw.ustring.sub local ulen = mw.ustring.len

-- version of rsubn that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end

-- apply rsub repeatedly until no change local function rsub_repeatedly(term, foo, bar) while true do		local new_term = rsub(term, foo, bar) if new_term == term then return term end term = new_term end end

local function ine(x) if x == "" then return nil else return x end end

local AC = u(0x0301) local GR = u(0x0300) local BREVE = u(0x0306) local stress_accent = AC .. GR local stress_accent_c = "[" .. stress_accent .. "]" local accent = stress_accent .. BREVE local accents_r = "[" .. accent .. "]*" local DIA = u(0x0308) local vowel = "àáâäèéêëìíîïòóôöùúûẁẃŵẅüỳýŷÿ" .. accent local vowel_c = "[" .. vowel .. "]" local cons_c = "[^" .. vowel .. ".⁀ %-]" local front_vowel = "eiyæœ" -- Artefact from Module:de-IPA, Welsh has i-umlaut so may be useful? local front_vowel_c = "[" .. front_vowel .. "]"

local sequences = { ["a"] = { ["a"  ] = "a"; ["à"  ] = "a"; ["á"  ] = "a"; ["â"  ] = "a"; ["ä"  ] = "a"; ["ae" ] = "ɑːɨ̯"; ["ai" ] = "ai̯"; ["au" ] = "aɨ̯"; ["aw" ] = "ɑːu̯"; };	["b"] = { ["b"  ] = "b"; };	["c"] = { ["c"  ] = "k"; ["ch" ] = "ç"; };	["d"] = { ["d"  ] = "d"; ["dd" ] = "d"; };	["e"] = { ["e"  ] = "ɛ"; ["è"  ] = "ɛ"; ["é"  ] = "eː"; ["ê"  ] = "eː"; ["ë"  ] = "e"; ["ei" ] = "ɛi̯"; ["eu" ] = "əɨ̯"; ["ew" ] = "eːu̯"; ["ey" ] = "aɨ̯"; };	["f"] = { ["f"  ] = "v"; ["ff" ] = "f"; }; -- Here, Arafsymudwr stopped editing and what follows is from Module:de-IPA ["f"] = "f"; ["g"] = "ɡ"; ["h"] = "h"; ["i"] = { ["i"  ] = "ɪ"; ["ie" ] = "iː"; };	["j"] = "j"; ["k"] = { ["k"  ] = "k"; ["kk" ] = "k"; ["ck" ] = "k"; };	["l"] = "l"; ["m"] = "m"; ["n"] = { ["n"  ] = "n"; ["ng" ] = "ŋ"; ["nn" ] = "n"; };	["o"] = { ["oo" ] = "oː"; ["os" ] = { "ɔ", "s" }; ["o"  ] = "ɔ"; };	["ö"] = { -- XXX: manchmal /øː/ ["ö"  ] = "œ"; ["ös" ] = { "œ", "s" }; };	["p"] = { ["ph" ] = "f"; ["pp" ] = "p"; ["p"  ] = "p"; };	["q"] = { ["qu" ] = { "k", "f" }; ["q"  ] = "k"; -- XXX };	["r"] = { -- XXX: /ʀ/? /r/?; manchmal /ɐ/ ("Uhr"); auch /ər/ ("oder") ["r"  ] = "r"; ["rr" ] = "r"; };	["s"] = { ["s"  ] = "s"; ["sch" ] = "ʃ"; ["sp" ] = { "ʃ", "p" }; ["ss" ] = "s"; ["st" ] = { "ʃ", "t" }; };	["t"] = { ["t"  ] = "t"; ["tsch"] = "t͡ʃ"; ["tt" ] = "t"; ["tion"] = { "t͡s", "i̯", "o", "n" }; };	["u"] = { ["u"  ] = "ʊ"; ["uch" ] = { "ʊ", "x" }; };	["ü"] = { ["ü"  ] = "yː"; ["üh" ] = "yː"; };	["v"] = "f"; ["w"] = "ʋ"; ["x"] = { "k", "s" }; -- XXX ["y"] = "i"; ["z"] = "z"; -- already converted from s	["ß"] = "s"; ["́"] = "ˈ"; -- FIXME ["-"] = {}; }

function export.IPA(text, orig, pos) if type(text) == 'table' then text, orig, pos = ine(text.args[1]), ine(text.args.orig), ine(text.args.pos) end text = text or mw.title.getCurrentTitle.text text = ulower(text) -- decompose, then recompose umlauted vowels, and convert ae oe ue to	-- umlauted vowels text = mw.ustring.toNFD(text) -- while we're doing this, don't get confused by wrongly-ordered umlauts/e's	-- and other accents text = rsub(text, "(" .. accents_r .. ")([e" .. DIA .. "])", "%2%1")	text = rsub(text, "([aou])[e" .. DIA .. "]", {a="ä", o="ö", u="ü"}) -- put breves before acute/grave accents text = rsub(text, "(" .. stress_accent_c .. ")" .. BREVE, BREVE .. "%1")

-- To simplify checking for word boundaries and liaison markers, we -- add ⁀ at the beginning and end of all words, and remove it at the end. -- Note that the liaison marker is ‿. text = rsub(text, "%s*,%s*", '⁀⁀ | ⁀⁀') text = rsub(text, "%s+", '⁀ ⁀') text = rsub(text, "%-+", '⁀-⁀') text = '⁀⁀' .. text .. '⁀⁀'

text = rsub(text, "([aou]" .. accents_r .. ")" .. "ch", "%1χ") text = rsub(text, "sch", "ʃ") text = rsub(text, "ch", "ç") text = rsub(text, "ck", "kk") text = rsub(text, "z", "c") text = rsub(text, "s(" .. vowel_c .. ")", "z%1") text = rsub(text, "([bdgr])(" .. cons_or_boundary_c .. ")",		function(c1, c2)			return devoiced_cons[c1] .. c2		end) -- Buchstaben in Foneme konvertieren local phones, i, n = {}, 1, ulen(text) while i <= n do		local bid = ulower(usub(text, i, i)) local value = sequences[bid] if (type(value) == 'table') and not value[1] then local bidl = ulen(bid) for seq in pairs(value) do				local seql = ulen(seq) if seql > bidl then if (ulower(usub(text, i, i + seql - 1)) == seq) then bid = seq bidl = ulen(bid) end end end value = value[bid] end if type(value) == 'string' then table.insert(phones, value) elseif not value then table.insert(phones, bid) else for _, phone in ipairs(value) do				table.insert(phones, phone) end end i = i + ulen(bid) end

text = table.concat(phones) --remove hyphens and word-boundary markers text = rsub(text, '[⁀%-]', '') return text end

return export