Module:User:Surjection/fi-UPA

local export = {}

local m_hyph = require("Module:fi-hyphenation") local langcode = "fi"

local gsub = mw.ustring.gsub

local U = mw.ustring.char local diphthong = U(0x361) local hiatus = U(0x1DFC) local unreleased = U(0x2FE) local nasalized = U(0x330)

local letters_phonemes = { ["å"] = "o", ["y"] = "ü", ["q"] = "k", ["x"] = "ks", ["zz"] = "ts", ["ng"] = "ŋŋ", ["nk"] = "ŋk", ["qu"] = "kv", ["*"] = "ˣ", ["’"] = "₍",   ["."] = "₍", }

local lookahead = 3 -- how many unstressed syllables at most in a single unit, thus max consecutive unstressed syllables

local long = "̄" local vowels = "aeiouüäö" local vowel = "[" .. vowels .. "]" local consonants = "kptgbdfˀsnmŋlrhvšžrjɦχ" local consonant = "[" .. consonants .. "]" local diacriticsvv = long .. "̝̞̠̟̪́" .. unreleased local diacriticsv = diacriticsvv .. diphthong .. nasalized local diacritics = diacriticsv .. hiatus local diacritic = "[" .. diacritics .. "]"

local spelled_consonants = "cvwxz" local spelled_consonant = "[" .. consonants .. spelled_consonants .. "]" local spelled_vowels = "y" local spelled_vowel = "[" .. vowels .. spelled_vowels .. "]"

local tertiary = "ˌ" -- "tertiary stress", a weaker secondary stress (either rhythmic or in some compound words). is there a better way to represent this? export.tertiary = tertiary

local stress_indicator = "[ ˈˌ" .. tertiary .. "/-]" local plosives = "kptbdg"

local use_UPA_stress = true local stress_p = "[ˈˌ" .. tertiary .. "]" local stress_s = "[ˌ" .. tertiary .. "]" local stress_pd = "[ˈˌ" .. tertiary .. "]" local stress_sd = "[ˌ" .. tertiary .. "]"

--	This adds letters_phonemes["e"] = "e", letters_phonemes["i"] = "i", etc. for letter in mw.ustring.gmatch("aeiouäödhfjklmnprstuv", ".") do	letters_phonemes[letter] = letter end

--	This regex finds the diphthongs in the UPA transcription,		so that the correct tie diacritic can be added. -- /_i/ diphthongs can appear in any syllable local diphthongs_i = { "([aeouüäö])(i)" } -- /_U/ diphthongs can appear in the initial syllable or later open syllables (no consonantal coda) local diphthongs_u = { "([aoei])(u)", "([eiäö])(ü)", } -- rising diphthongs can only appear in the initial syllable (of a word, compound word part, etc.) local diphthongs_rising = { "(u)(o)", "(i)(e)", "(ü)(ö)", }

local function apply_post_fixes(p) -- initial  is /gn/ p = mw.ustring.gsub(p, "ˈŋn", "ˈɡn")

-- ŋ is short before consonant (by default) p = mw.ustring.gsub(p, "ŋŋ("..consonant..")", "ŋ%1")

-- dissimilation of vowels by sandhi p = mw.ustring.gsub(p, "("..vowel..diacritic.."*"..long.."?)("..stress_s..")%1", "%1₍%2%1")

return p end

local function apply_post_fixes_narrow(p) -- long j, v after i, u diphthong p = mw.ustring.gsub(p, "("..diphthong.."i)j("..vowel..")", "%1j("..long..")%2") -- /ʋ/ after /u/ usually realized as /w/ (see Suomi, Toivanen and Ylitalo 2008, p. ) p = mw.ustring.gsub(p, "("..diphthong.."u)v("..vowel..")", "%1w(w)%2") -- cleanup p = mw.ustring.gsub(p, "("..stress_s..")%.", "%1")

-- tautosyllabic nasals nasalize vowels between them (see Suomi, Toivanen and Ylitalo 2008, p. 22) p = mw.ustring.gsub(p, "([mnŋ][mnŋ]?)("..vowel..")("..diacritic.."*)([mnŋ])(.?)", function (n0, nv, nvd, n1, anchor)		-- this cannot be simplified to "(.?)" => "([^" .. vowels .. "]?)", otherwise a vowel after would match		if not mw.ustring.find(anchor, vowel) then			return n0 .. nv .. nasalized .. nvd .. n1 .. anchor		end	end) -- sandhi: nm > mm, np > mp, nb > mb, nk > ŋk, ng > ŋg p = mw.ustring.gsub(p, "n%s-("..stress_pd.."?%s*)([gk])", "ŋ‿%1%2") p = mw.ustring.gsub(p, "n%s-("..stress_pd.."?%s*)([mpb])", "m‿%1%2") p = mw.ustring.gsub(p, "[nm]%s-("..stress_pd.."?%s*)([f])", "ᴍ͔‿%1%2") p = mw.ustring.gsub(p, "n("..stress_pd.."?%s*)([gk])", "ŋ%1%2") p = mw.ustring.gsub(p, "n("..stress_pd.."?%s*)([mpb])", "m%1%2") p = mw.ustring.gsub(p, "[nm]("..stress_pd.."?%s*)([f])", "ᴍ͔%1%2") -- handle potentially long consonants over secondary stresses p = mw.ustring.gsub(p, "("..stress_s..")("..consonant..diacritic.."*)%(%2%)", "(%2)%1%2") p = mw.ustring.gsub(p, "("..consonant..diacritic.."*)%(%1%)("..stress_s..")", "%2%1(%1)") p = mw.ustring.gsub(p, "(ŋ"..diacritic.."*)"..tertiary.."ɡ", "%1"..tertiary.."ŋ") -- [k] allophone before front vowels (see Suomi, Toivanen and Ylitalo 2008, p. 27) p = mw.ustring.gsub(p, "k([eiyæø])", "k̟%1") return p end

function export.is_light_syllable(syllable) return mw.ustring.find(mw.ustring.lower(syllable), "^[" .. m_hyph.sep_symbols .. "]?" .. spelled_consonant .. "?" .. spelled_vowel .. "%(?%*?%)?$") end

function export.has_later_heavy_syllable(hyph, start) local stop = math.min(start + lookahead, #hyph - 1) for index = start, stop do		if not export.is_light_syllable(hyph[index]) then return true end end return false end

-- applied *before* UPA conversion local function add_secondary_stress(word) -- keep_sep_symbols = true local hyph = m_hyph.generate_hyphenation(word, true) local res = "" local last_index = #hyph -- find stressed syllables and add secondary stress before each syllable for index, syllable in ipairs(hyph) do		local stressed = false local has_symbol = mw.ustring.find(syllable, "^[" .. m_hyph.sep_symbols .. "₍ˈˌ" .. tertiary .. "]") if has_symbol then -- check if symbol indicates stress stressed = mw.ustring.find(syllable, "^" .. stress_indicator) has_symbol = stressed end if not stressed then if index == 1 then stressed = true elseif not prev_stress and index < last_index then -- shift stress if current syllable light and a heavy syllable occurs later (except as the last syllable) stressed = index == last_index - 1 or not export.is_light_syllable(syllable) or not export.has_later_heavy_syllable(hyph, index + 1) end if stressed then last_stressed = index end end -- check if next syllable already stressed -- if is, do not stress this syllable if stressed and index < last_index then stressed = stressed and not mw.ustring.find(hyph[index + 1], "^" .. stress_indicator) end

if index > 1 and stressed and not has_symbol then res = res .. "-$"		end res = res .. syllable

prev_stress = stressed end

local noninitial = {} local index = 1 res = mw.ustring.gsub(res, "-([$]?)",		function (dollar)			index = index + 1			noninitial[index] = #dollar > 0			return #dollar > 0 and tertiary or "-"		end) return res, noninitial end

local function handle_diphthongs(UPA, strict_initial) for _, diphthong_regex in pairs(diphthongs_i) do UPA = mw.ustring.gsub(UPA, diphthong_regex, "%1" .. diphthong .. "%2") end

local only_initial = "(" .. stress_indicator .. "[^" .. vowels .. "]*)" if strict_initial then only_initial = "^([^" .. vowels .. "]*)" end

for _, diphthong_regex in pairs(diphthongs_rising) do		-- initial syllables UPA = mw.ustring.gsub(UPA, only_initial .. diphthong_regex, "%1%2" .. diphthong .. "%3") end

for _, diphthong_regex in pairs(diphthongs_u) do		-- initial syllables UPA = mw.ustring.gsub(UPA, only_initial .. diphthong_regex, "%1%2" .. diphthong .. "%3")

local open_noninitial = function(v1, v2, after) if mw.ustring.find(after, "^" .. consonant .. diacritic .. "*" .. vowel) then -- consonant after diphthong -- must be followed by vowel so that it's part of the -- following syllable, else it's in this syllable -- and thus this syllabie is closed

return v1 .. diphthong .. v2 .. after elseif mw.ustring.find(after, "^" .. consonant) then -- consonant after diphthong -- must be in this syllable

return v1 .. hiatus .. v2 .. after end -- no consonant after diphthong => open return v1 .. diphthong .. v2 .. after end

-- open non-initial syllables UPA = mw.ustring.gsub(UPA, diphthong_regex .. "(.+)", open_noninitial) UPA = mw.ustring.gsub(UPA, diphthong_regex .. "($)", open_noninitial) end UPA = mw.ustring.gsub(UPA, "(" .. vowel .. "[" .. diacriticsvv .. "]*)(" .. vowel .. "[" .. diacriticsvv .. "]*)", "%1" .. hiatus .. "%2")

return UPA end

local function UPA_word(term, is_narrow, has_initial) local rest = term local phonemes = {} while mw.ustring.len(rest) > 0 do		-- Find the longest string of letters that matches a recognised sequence in the list local longestmatch = "" for letter, phoneme in pairs(letters_phonemes) do			if mw.ustring.sub(rest, 1, mw.ustring.len(letter)) == letter and mw.ustring.len(letter) > mw.ustring.len(longestmatch) then longestmatch = letter end end -- Convert the string to UPA if mw.ustring.len(longestmatch) > 0 then table.insert(phonemes, letters_phonemes[longestmatch]) rest = mw.ustring.sub(rest, mw.ustring.len(longestmatch) + 1) else table.insert(phonemes, mw.ustring.sub(rest, 1, 1)) rest = mw.ustring.sub(rest, 2) end end local result = table.concat(phonemes) if is_narrow then -- articulation of h (Suomi, Toivanen & Ylitalo 2008, p. 28) result = mw.ustring.gsub(result, "(.?)h(.?)",			function (before, after)				local h				if after ~= "" and after ~= "h" then					if before ~= "" and vowels:find(before) then						if consonants:find(after) then							-- vihma, yhtiö							if before == "i" or before == "ü" then								h = "h́"							-- mahti, kohme, tuhka							elseif before == "a" or before == "o" or before == "u" then								h = "χ"							end						-- maha						elseif vowels:find(after) then							h = "ɦ"						end					end				end				if h then					return before .. h .. after				end			end) -- double letter replacement and diphthongs must be handled earlier here result = mw.ustring.gsub(result, "(" .. vowel .. ")%1", "%1" .. long) if has_initial then result = handle_diphthongs(result, true) end end return result end

function export.UPA_wordparts(term, is_narrow) term = mw.ustring.lower(term) local notinitial = {} -- true if the component is not an initial component local hyphenstress = "ˌ" -- secondary by default local is_prefix = false local is_suffix = false

if mw.ustring.find(term, "%/") then hyphenstress = tertiary -- tertiary if we have slashes end if is_narrow then term, notinitial = add_secondary_stress(term) end local found term, found = mw.ustring.gsub(term, "^%-+", "") is_suffix = found > 0 term, found = mw.ustring.gsub(term, "%-+$", "") is_prefix = found > 0 -- make sure we keep slashes to figure out if secondary or tertiary term = mw.ustring.gsub(term, "%/", "-%1") local wordparts = mw.text.split(term, "-", true)

for key, val in ipairs(wordparts) do		local stress = key > 1 and hyphenstress or "ˈ" local part = val

if mw.ustring.find(part, "^%/") then stress = "ˌ" -- always secondary part = part:sub(2) end

wordparts[key] = stress .. UPA_word(part, is_narrow, not notinitial[key]) end UPA = table.concat(wordparts, "") if is_narrow then -- handle * in narrow transcription UPA = mw.ustring.gsub(UPA, "ˣ(%)?%s*"..stress_p.."?)((.?)" .. diacritic .. "*)", function (post, after, potential_consonant) if potential_consonant == "" then if mw.ustring.find(post, "^%)") then						return "ˀ" .. post .. after					else						return post .. "(ˀ)" .. after					end				elseif consonants:find(potential_consonant) then					if #post > 0 then						local amark = ""						if plosives:find(mw.ustring.sub(after, 1, 1)) then							amark = unreleased						end						return after .. amark .. post .. after					else						return post .. after .. after					end				else					return post .. "ˀ" .. after				end			end) else --	Replace double letters (vowels or consonants) with single letter plus length sign. UPA = gsub(UPA, "(" .. vowel .. ")%1", "%1" .. long) UPA = handle_diphthongs(UPA, false) end UPA = apply_post_fixes(UPA) if is_narrow then UPA = apply_post_fixes_narrow(UPA) end if is_prefix then UPA = UPA .. "-"	end if is_suffix then UPA = "-" .. UPA end

if use_UPA_stress then UPA = mw.ustring.gsub(UPA, "([ˈˌ])([" .. consonants .. diacriticsv .. "]-)([" .. vowels .. diacriticsv .. "]+)", "%2%3%1")       UPA = mw.ustring.gsub(UPA, "([ˈˌ])" .. hiatus, "%1") UPA = mw.ustring.gsub(UPA, "ˈ", "·") UPA = mw.ustring.gsub(UPA, "ˌ", ":") end return UPA end

function export.UPA(term) if type(term) == "table" then term = term:getParent.args[1] end local title = mw.title.getCurrentTitle.text if not term then term = title elseif term == "*" then term = title .. "*"	end --local no_count = mw.ustring.match(term, " ") UPA_narrow = export.UPA_wordparts(term, true) UPA = export.UPA_wordparts(term, false) return require("Module:User:Surjection/UPA").format_UPA_full(require("Module:languages").getByCode(langcode), {{pron = UPA, phonetic = false}, {pron = UPA_narrow, phonetic = true}}) end

return export