Module:pl-pronunciation/sandbox

local export = {}

local langcode = "pl" local lang = require("Module:languages").getByCode(langcode)

local m_IPA = require("Module:IPA") local m_pl_IPA = require("Module:pl-IPA")

local vowels = "aeiouyąęó" local vowel = "[" .. vowels .. "]" local consonants = "bcćdfghjklłmnńpqrsśtuvwxyzźż" local consonant = "[" .. consonants .. "]" -- vowel digraphs, not necessarily actual phonetic diphthongs local diphthong_i_v2 = "[aąoeęuói]" local diphthongs = { ["a"] = "u", ["e"] = "u", ["i"] = diphthong_i_v2 } -- consonant digraphs (key = first letter, value = possible second letters) local digraphs = { ["c"] = "[hz]", ["d"] = "[zźż]", ["q"] = "u", ["r"] = "z", ["s"] = "z", }

local past_tense_suffixes = { "liśmy", "liście", "łyśmy", "łyście", }

local latin_borrowing_suffixes = { "ika", "yka", "iki", "yki", "ika", "yka", "ice", "yce", "ikom", "ykom", "ikę", "ykę", "iką", "yką", "ice", "yce", "ikach", "ykach", "iko", "yko", }

-- if this is changed, the next two functions also need to be local function is_respelling_close_enough(respelling, word) word = mw.ustring.gsub(word, "j(" .. diphthong_i_v2 .. ")", "i%1") respelling = mw.ustring.gsub(respelling, "['.]", "") respelling = mw.ustring.gsub(respelling, "j(" .. diphthong_i_v2 .. ")", "i%1") return word == respelling end

local function partition(word, oword) local parts = {} local lenword = mw.ustring.len(word) local pos = 1 local offset = 0 word = mw.ustring.gsub(word, "['-]", ".") while pos <= lenword do if mw.ustring.find(mw.ustring.lower(word), "^" .. vowel, pos) then local initial = mw.ustring.sub(mw.ustring.lower(word), pos, pos) local seq = 1 if diphthongs[initial] and mw.ustring.find(mw.ustring.lower(word), "^" .. initial .. diphthongs[initial], pos) then seq = 2 end table.insert(parts, { "v", mw.ustring.sub(oword, pos - offset, pos - offset + seq - 1) }) pos = pos + seq elseif mw.ustring.find(mw.ustring.lower(word), "^" .. consonant, pos) then local initial = mw.ustring.sub(mw.ustring.lower(word), pos, pos) local seq = 1 if digraphs[initial] and mw.ustring.find(mw.ustring.lower(word), "^" .. initial .. digraphs[initial], pos) then seq = 2 end table.insert(parts, { "c", mw.ustring.sub(oword, pos - offset, pos - offset + seq - 1) }) pos = pos + seq elseif mw.ustring.find(word, "^% ", pos) then -- multiword, do not hyphenate return nil elseif mw.ustring.find(word, "^%.", pos) then -- syllable break if not mw.ustring.find(oword, "^['-]", pos - offset) then offset = offset + 1 end table.insert(parts, { "b", nil }) pos = pos + 1 else -- unrecognized symbol return nil end end return parts end

local function get_word_suffix(word) word = word:gsub("([ˈ'.,ˌ])", "") local word_suffix = 0 for i,v in ipairs(past_tense_suffixes) do		if word:sub(-string.len(v)) == v 		then word_suffix = 1 end end for i,v in ipairs(latin_borrowing_suffixes) do		if word:sub(-string.len(v)) == v 		then word_suffix = 2 end end return word_suffix end

function export.generate_hyphenation(word, otitle) local syllables = {} local cursyl = "" local nucleus = false local coda = nil local pos = 1 parts = partition(word, otitle) if not parts then return nil end for pos, p in ipairs(parts) do		local kind, part = unpack(p) if kind == "v" then if coda then cursyl = cursyl .. mw.ustring.sub(syllables[#syllables], -coda) syllables[#syllables] = mw.ustring.sub(syllables[#syllables], 1, -coda - 1) coda = nil end if nucleus then table.insert(syllables, cursyl) cursyl = "" end nucleus = true coda = nil cursyl = cursyl .. part elseif kind == "c" then cursyl = cursyl .. part if nucleus then table.insert(syllables, cursyl) cursyl = "" nucleus = false coda = mw.ustring.len(part) else coda = nil end elseif kind == "b" then -- implicit syllable break if #cursyl > 0 then if nucleus or #syllables < 1 then table.insert(syllables, cursyl) else syllables[#syllables] = syllables[#syllables] .. cursyl end end cursyl = "" nucleus = false coda = nil else -- unrecognized kind return nil end end if #cursyl > 0 then if nucleus or #syllables < 1 then table.insert(syllables, cursyl) else syllables[#syllables] = syllables[#syllables] .. cursyl end end return syllables end

local ipavowel = "[aɛiɨɔu]" function export.generate_rhyme(ipa) local vowels_at = { } local pos = 1 while true do		local posnext = mw.ustring.find(ipa, ipavowel, pos) if not posnext then break end table.insert(vowels_at, posnext) pos = posnext + 1 end local vend if #vowels_at < 1 then return nil end if #vowels_at > 1 then vend = vowels_at[#vowels_at - 1] else vend = vowels_at[#vowels_at] end local snippet = mw.ustring.sub(ipa, vend) snippet = mw.ustring.gsub(snippet, "[ˈˌ.]", "") if mw.ustring.find(snippet, " ") then return nil -- copout, something must be wrong end return snippet end

function export.show(frame) local args = require("Module:parameters").process(frame:getParent.args, {		[1] = { list = true },		["ipa"] = { list = true, default = nil, allow_holes = true },		["qual"] = { list = true, allow_holes = true },		["n"] = { list = true, allow_holes = true },		["h"] = { list = true, allow_holes = true }, ["hyphen"] = {},		["r"] = { list = true, allow_holes = true }, ["rhymes"] = {},		["a"] = { list = true, default = nil }, ["audio"] = {},		["ac"] = { list = true, default = nil }, ["caption"] = {},		["hh"] = { default = "" }, ["homophones"] = {},		["q"] = { list = true, default = nil, allow_holes = true },		["hp"] = { list = true, default = nil, allow_holes = true },		["rp"] = { list = true, default = nil, allow_holes = true },		["hhp"] = { list = true, default = nil, allow_holes = true },		["nohyphen"] = { type = "boolean", default = false },		["norhymes"] = { type = "boolean", default = false }, ["fs"] = { type = "boolean" }, ["fixstress"] = {}, ["title"] = { default = nil }, -- for debugging or demonstration only })

local words, transcriptions, transcriptions_raw local lines = {} local categories = {} local actual = args["title"] or mw.title.getCurrentTitle.text if next(args[1]) ~= nil then words = args[1] else words = { actual } end local multiword = mw.ustring.find(words[1], " ") local hyphenations = args["h"] local rhymes = args["r"] local ipa = args["ipa"] if #ipa < 1 then ipa = nil end local qualifiers = args["q"] if not qualifiers or qualifiers.maxindex < 1 then qualifiers = args["qual"] end local hyphlabels = args["hp"] local rhymlabels = args["rp"] local nohyphen = args["nohyphen"] local norhymes = args["norhymes"] local fixstress = args["fs"] if args["fixstress"] then fixstress = args["fixstress"] end local homophones = mw.text.split(args["hh"], ",") local homophonelabels = args["hhp"] if #homophones == 1 and homophones[1] == "" then homophones = {} end local audio = {} local audios = args["a"] local captions = args["ac"] word_suffix = 0 if not ipa and #words == 1 then -- 0 - normal word -- 1 - past tense verb stressed antepenultimately -- 2 - Latin borrowing stressed antepenultimately word_suffix = get_word_suffix(words[1]) end if not (fixstress or (fixstress == nil and word_suffix == 1)) then word_suffix = 0 end if args["hyphen"] then hyphenation[1] = args["hyphen"] end if args["rhymes"] then rhymes[1] = args["rhymes"] end if args["homophones"] then homophones = mw.text.split(args["homophones"], ",") end if args["audio"] then audios[1] = args["audio"] end if args["captions"] then captions[1] = args["caption"] end local respelling_ok = true for i, w in ipairs(words) do		if not is_respelling_close_enough(w, actual) then respelling_ok = false break end end

for i, audiofile in ipairs(audios) do		if audiofile then table.insert(audio, {file = audiofile, caption = captions[i] or "Audio"}) end end if not next(audio) then table.insert(categories, "Requests for audio pronunciation in Polish entries") end if #hyphenations == 1 and hyphenations[1] == "-" then nohyphen = true end if #rhymes == 1 and rhymes[1] == "-" then norhymes = true end

if word_suffix == 0 then if ipa then transcriptions = {} transcriptions_raw = {} for i = 1, #ipa do				local qual = qualifiers[i] table.insert(transcriptions, {					pron = ipa[i],					qualifiers = qual and { qual } or nil,					note = args.n[i]				}) end else transcriptions = {} transcriptions_raw = {} for i = 1, #words do				local qual = qualifiers[i] local ipaconv = m_pl_IPA.convert_to_IPA(words[i]) table.insert(transcriptions_raw, ipaconv) table.insert(transcriptions, {					pron = "/" .. ipaconv .. "/",					qualifiers = qual and { qual } or nil,					note = args.n[i]				}) end end table.insert(lines, "* " .. m_IPA.format_IPA_full { lang = lang, items = transcriptions }) else transcriptions = {} transcriptions2 = {} transcriptions_raw = {} if word_suffix == 1 then qualifier1 = {"prescriptive standard; rarely used"} qualifier2 = {"colloquial; overall more common"} elseif word_suffix == 2 then qualifier1 = {"standard"} qualifier2 = {"colloquial; common in casual speech"} end local ipaconv = m_pl_IPA.convert_to_IPA(words[1]) table.insert(transcriptions_raw, ipaconv) ipaconv_syllables = mw.text.split(ipaconv, "([ˈ.])") for j, syl in ipairs(ipaconv_syllables) do			if j == (#ipaconv_syllables - 2) then ipaconv_syllables[j] = "ˈ" .. syl elseif j ~= 1 then ipaconv_syllables[j] = "." .. syl end end ipacov_fixed_stress = table.concat(ipaconv_syllables) table.insert(transcriptions, {			pron = "/" .. ipacov_fixed_stress .. "/",			qualifiers = qualifier1,		}) table.insert(lines, "* " .. m_IPA.format_IPA_full { lang = lang, items = transcriptions }) table.insert(transcriptions2, {			pron = "/" .. ipaconv .. "/",			qualifiers = qualifier2,		}) table.insert(lines, "* " .. m_IPA.format_IPA_full { lang = lang, items = transcriptions2 }) end for i, a in ipairs(audio) do table.insert(lines, "* " .. frame:expandTemplate{title = "audio", args = {langcode, a["file"], a["caption"]}}) end

if not ipa and #hyphenations < 1 and respelling_ok and not multiword then local autohyph = export.generate_hyphenation(words[1], actual) if autohyph then table.insert(hyphenations, autohyph) end elseif #hyphenations >= 1 then local newhyphenations = {} for i, h in ipairs(hyphenations) do			local t = {} for x in mw.text.gsplit(h, "[.]") do				table.insert(t, x) end newhyphenations[i] = t		end hyphenations = newhyphenations end

if not norhymes then if not ipa and #rhymes < 1 and #transcriptions_raw > 0 then local autorhyme = export.generate_rhyme(transcriptions_raw[1]) if autorhyme then table.insert(rhymes, autorhyme) end end if #rhymes > 0 then -- merge rhymes if they have identical labels local last_label = false local new_rhymes = {} local new_labels = {} local current_list = {} for i, r in ipairs(rhymes) do				local label = rhymlabels[i] if last_label == label then table.insert(current_list, r)				else if #current_list > 0 then table.insert(new_rhymes, current_list) end if last_label ~= false then table.insert(new_labels, last_label) end current_list = { r } last_label = label end end table.insert(new_rhymes, current_list) table.insert(new_labels, last_label) rhymes = new_rhymes rhymlabels = new_labels end for i, r in ipairs(rhymes) do			local label = "" if rhymlabels[i] then label = " " .. require("Module:qualifier").format_qualifier(rhymlabels[i]) end if #r >= 1 then local sylkeys = {} local sylcounts = {} -- get all possible syllable counts from syllabifications for i, h in ipairs(hyphenations) do					local hl = #h if hl > 0 and not sylkeys[hl] then table.insert(sylcounts, hl) sylkeys[hl] = true end end local rhymeobjs = {} for _, rhyme in ipairs(r) do					table.insert(rhymeobjs, {rhyme = rhyme}) end table.insert(lines, "* " .. require("Module:rhymes").format_rhymes( { lang = lang, rhymes = rhymeobjs, num_syl = sylcounts }) .. label) end end end

if not nohyphen then if #transcriptions > 0 and #hyphenations > 0 then local syl_IPA = require("Module:syllables").getVowels(transcriptions[1].pron, lang) local syl_hyph = #hyphenations[1] if syl_IPA and syl_IPA ~= syl_hyph then table.insert(categories, "pl-pronunciation syllable count mismatch") end end if not actual:find("[ %.]") and #hyphenations < 1 then table.insert(categories, "pl-pronunciation without hyphenation") end for i, h in ipairs(hyphenations) do			local label = "" if hyphlabels[i] then label = " " .. require("Module:qualifier").format_qualifier(hyphlabels[i]) end table.insert(lines, "* Syllabification: " .. require("Module:links").full_link({lang = lang, alt = table.concat(h, "‧"), tr = "-"}) .. label) end end

if #homophones > 0 then local homophone_param = {langcode} for i, h in ipairs(homophones) do			table.insert(homophone_param, h)			if homophonelabels[i] then homophone_param["q" .. i] = homophonelabels[i] end end table.insert(lines, "* " .. frame:expandTemplate{title = "homophones", args = homophone_param}) end return table.concat(lines, "\n") .. require("Module:utilities/format_categories")(categories, lang) end

return export