Module:tl-pron/sandbox

-- Based on Module:es-pronunc by Benwing2. -- Adaptation by TagaSanPedroAko.

local export = {}

local m_IPA = require("Module:IPA") local m_table = require("Module:table") local put_module = "Module:parse utilities"

local lang = require("Module:languages").getByCode("tl")

local u = mw.ustring.char local rfind = mw.ustring.find local rsubn = mw.ustring.gsub local rsplit = mw.text.split local ulower = mw.ustring.lower local uupper = mw.ustring.upper local usub = mw.ustring.sub local ulen = mw.ustring.len local unfd = mw.ustring.toNFD local unfc = mw.ustring.toNFC

local AC = u(0x0301) -- acute = ́ local GR = u(0x0300) -- grave = ̀ local CFLEX = u(0x0302) -- circumflex = ̂ local TILDE = u(0x0303) -- tilde = ̃ local DIA = u(0x0308) -- diaeresis = ̈ local MACRON = u(0x0304) -- macron

local SYLDIV = u(0xFFF0) local SYLDIV2 = u(0xFFF1) local vowel = "aeiouAEIOU" -- vowel local V = "[" .. vowel .. "]" local accent = AC .. GR .. CFLEX .. MACRON local accent_c = "[" .. accent .. "]" local stress_c = "[" .. AC .. GR .. "]" local ipa_stress = "ˈˌ" local ipa_stress_c = "[" .. ipa_stress .. "]" local sylsep = "%-." .. SYLDIV -- hyphen included for syllabifying from spelling local sylsep_c = "[" .. sylsep .. "]" local wordsep = "# " local separator_not_wordsep = accent .. ipa_stress .. sylsep local separator = separator_not_wordsep .. wordsep local separator_c = "[" .. separator .. "]" local C = "[^" .. vowel .. separator .. "]" local C_OR_WORDSEP = "[^" .. vowel .. separator_not_wordsep .. "]" -- consonants or word separator

local unstressed_words = require("Module:table").listToSet({	"ang", "sa", "nang", "si", "ni", "kay", -- case markers. "Nang" here is for written "ng", but can also work with nang as in the contraction na'ng and the conjunction "nang"	"a", "ar", "ba", "bi", "da", "di", "e", "ef", "eks", "dyi", "jey", "key", "em", "ma", "en", "pi", "ra", "es", "ta", "ti", "u", "wa", "way", "ya", "yu", "zey", "zi", -- letter names (abakada and modern Filipino)	"ko", "mo", "ka", --single-syllable personal pronouns	"na",-- linker, also temporal particle   "daw", "ga", "ha", "pa", -- particles	"di7", "de7", -- negation words	"may", -- single-syllable existential	"pag", "kung", -- subordinating conjunctions	"at", "o", -- coordinating conjunctions	"hay", -- interjections	"de", "del", "el", "la", "las", "los", -- in some Spanish-derived terms and names	"-an", "-en", "-han", "hi-", "-hin", "hin-", "hing-", "-in", "mag-", "mang-", "pa-", "pag-", "pang-"-- affixes })

-- version of rsubn that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end

-- version of rsubn that returns a 2nd argument boolean indicating whether -- a substitution was made. local function rsubb(term, foo, bar) local retval, nsubs = rsubn(term, foo, bar) return retval, nsubs > 0 end

-- apply rsub repeatedly until no change local function rsub_repeatedly(term, foo, bar) while true do		local new_term = rsub(term, foo, bar) if new_term == term then return term end term = new_term end end

-- ĵ, ɟ and ĉ are used internally to represent [d͡ʒ], [j] and [t͡ʃ] --

local function decompose(text) -- decompose everything but ñ and ü text = unfd(text) text = rsub(text, ".[" .. TILDE .. DIA .. "]", {		["n" .. TILDE] = "ñ",		["N" .. TILDE] = "Ñ",		["u" .. DIA] = "ü",		["U" .. DIA] = "Ü",	}) return text end

local function split_on_comma(term) if term:find(",%s") then return require(put_module).split_on_comma(term) else return rsplit(term, ",") end end

-- Remove any HTML from the formatted text and resolve links, since the extra characters don't contribute to the -- displayed length. local function convert_to_raw_text(text) text = rsub(text, "<.->", "") if text:find("%[%[") then text = require("Module:links").remove_links(text) end return text end

-- Return the approximate displayed length in characters. local function textual_len(text) return ulen(convert_to_raw_text(text)) end

-- Main syllable-division algorithm. Can be called either directly on spelling (when hyphenating) or after -- non-trivial processing of respelling in the direction of pronunciation (when generating pronunciation). local function syllabify_from_spelling_or_pronun(text, is_spelling) -- Part 1: Divide before the last consonant in a cluster of consonants between vowels text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. V .. ")", "%1.%2")	-- NOTE: When run on pronun, we have already eliminated c and v, but not when run on spelling. -- When run on pronun, don't include r, which at this point represents the trill. -- Don't divide ch, sh, and ph. text = rsub(text, "([cs])%.h", ".%1h") -- Don't divide ll or rr. text = rsub(text, "([lr])%.%1", ".%1%1") -- Don't divide ts where pronounced like "ch" (tsika, tsaleko, Tsina) or at end of word (e.g. gets, tropatuts). This can be overriden when it's actually pronounced separately (e.g. tatsulok) text = rsub(text, "t%.s", ".ts")

-- Part 2: Divide double vowels (saan, leeg, giit, poot).

text = rsub_repeatedly(text, "([aeiouAEIOU]" .. accent_c .. "*)([aeiou])", "%1.%2") text = rsub_repeatedly(text, "([aeiouAEIOU]" .. accent_c .. "*)(" .. V .. stress_c .. ")", "%1.%2")

return text end

local function syllabify_from_spelling(text) text = decompose(text) -- start at FFF2 because FFF0 and FFF1 is used for SYLDIV and SYLDIV2 -- Temporary replacements for characters we want treated as default consonants. The C and related consonant regexes -- treat all unknown characters as consonants. local TEMP_I = u(0xFFF2) local TEMP_U = u(0xFFF3) local TEMP_Y_CONS = u(0xFFF4) local TEMP_W_CONS = u(0xFFF5) local TEMP_QU = u(0xFFF6) local TEMP_QU_CAPS = u(0xFFF7) local TEMP_GU = u(0xFFF8) local TEMP_GU_CAPS = u(0xFFF9)

-- Change user-specified into SYLDIV so we don't shuffle it around when dividing into syllables. text = text:gsub("%.", SYLDIV) text = text:gsub("7", SYLDIV2) text = rsub(text, "y(" .. V .. ")", TEMP_Y_CONS .. "%1") text = rsub(text, "w(" .. V .. ")", TEMP_W_CONS .. "%1")

text = syllabify_from_spelling_or_pronun(text, "is spelling")

text = text:gsub(SYLDIV, ".") text = text:gsub(SYLDIV2, "7") text = text:gsub(TEMP_Y_CONS, "y") text = text:gsub(TEMP_W_CONS, "w") text = text:gsub(TEMP_QU, "qu") text = text:gsub(TEMP_QU_CAPS, "Qu") text = text:gsub(TEMP_GU, "gu") text = text:gsub(TEMP_GU_CAPS, "Gu") return text end

-- Generate the IPA of a given respelling, where a respelling is the representation of the pronunciation of a given -- Tagalog term using Tagalog spelling conventions -- ĵ, ɟ and ĉ are used internally to represent [d͡ʒ], [j] and [t͡ʃ]

function export.IPA(text, phonetic)

text = ulower(text or mw.title.getCurrentTitle.text) -- decompose everything but ñ and ü text = decompose(text)

-- convert commas and en/en dashes to IPA foot boundaries text = rsub(text, "%s*[,–—]%s*", " | ") -- question mark or exclamation point in the middle of a sentence -> IPA foot boundary text = rsub(text, "([^%s])%s*[!?]%s*([^%s])", "%1 | %2")

-- canonicalize multiple spaces and remove leading and trailing spaces local function canon_spaces(text) text = rsub(text, "%s+", " ") text = rsub(text, "^ ", "") text = rsub(text, " $", "") return text end

text = canon_spaces(text)

-- Make prefixes unstressed unless they have an explicit stress marker; also make certain -- monosyllabic words (e.g. ang, ng, si, na, etc.) without stress marks be -- unstressed. local words = rsplit(text, " ") for i, word in ipairs(words) do		if rfind(word, "%-$") and not rfind(word, accent_c) or unstressed_words[word] then -- add macron to the last vowel not the first one -- adding the macron after the 'u' words[i] = rsub(word, "^(.*" .. V .. ")", "%1" .. MACRON) end end text = table.concat(words, " ") -- Convert hyphens to spaces text = rsub(text, "%-", " ") -- canonicalize multiple spaces again, which may have been introduced by hyphens text = canon_spaces(text) -- now eliminate punctuation text = rsub(text, "[!?']", "") -- put # at word beginning and end and double ## at text/foot boundary beginning/end text = rsub(text, " | ", "# | #") text = "##" .. rsub(text, " ", "# #") .. "##"

-- Add glottal stop for words starting with vowel and double vowel text = rsub(text, "([#])([aeiou])", "%1ʔ%2") text = rsub(text, "([aeiou])([aeiou])", "%1ʔ%2")

--determining whether "y" or "w" is a consonant or a vowel text = rsub(text, "y(" .. V .. ")", "ɟ%1") -- not the real sound text = rsub(text,"y([ˈˌ]?)([bdɡjklmnprstw])","i%1%2") text = rsub(text, "y#", "i") text = rsub(text, "w(" .. V .. ")","w%1") text = rsub(text,"w([ˈˌ]?)([bdɡjklmnprstw])","u%1%2") text = rsub(text, "w#","u")

-- handle certain combinations; ch ng and sh handling needs to go first text = rsub(text, "ch", "ts") --not the real sound text = rsub(text, "ng", "ŋ") text = rsub(text, "sh", "ʃ")

--x text = rsub(text, "x", "ks")

--c, gü/gu+e or i, q	text = rsub(text, "c([ie])", "s%1") text = rsub(text, "gü([ie])", "ɡw%1") text = rsub(text, "gu([ie])", "ɡ%1") text = rsub(text, "qu([ie])", "k%1") text = rsub(text, "ü", "u")

--alphabet-to-phoneme text = rsub(text, "[cfgjñqrvz7]",	--["g"]="ɡ": U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G		{ ["c"] = "k", ["f"] = "p", ["g"] = "ɡ", ["j"] = "ĵ", ["ñ"] = "ɲ", ["q"] = "k", ["r"] = "ɾ", ["v"] = "b", ["z"] = "s", ["7"] = "ʔ"})

-- trill in rr	text = rsub(text, "ɾɾ", "r")

-- ts	text = rsub(text, "ts", "ĉ") --not the real sound --syllable division text = syllabify_from_spelling_or_pronun(text, false)

local accent_to_stress_mark = { [AC] = "ˈ", [CFLEX] = "ˈ", [GR] = " ", [MACRON] = "" }

local function accent_word(word, syllables, last_word) -- Now stress the word. If any accent exists in the word (including macron indicating an unaccented word), -- put the stress mark(s) at the beginning of the indicated syllable(s). Otherwise, apply the default -- stress rule. local stress_syllable = 0 local last_accent = "" if rfind(word, accent_c) then for i = 1, #syllables do syllables[i] = rsub(syllables[i], "^(.*)(" .. accent_c .. ")(.*)$",						function(pre, accent, post)							last_accent = accent							if last_accent == AC then								stress_syllable = i								end							return accent_to_stress_mark[accent] .. pre .. post						end				) end if last_accent == CFLEX then if last_word then syllables[#syllables] = rsub(syllables[#syllables], "(.*)(" .. V .. ")([#|$]+)", "%1%2ʔ%3") end if stress_syllable == 0 then syllables[#syllables] = "ˈ" .. syllables[#syllables] end elseif last_accent == GR then if last_word then syllables[#syllables] = rsub(syllables[#syllables], "(.*)(" .. V .. ")([#|$]+)", "%1%2ʔ%3") end if stress_syllable ~= #syllables-1 then syllables[#syllables-1] = "ˈ" .. syllables[#syllables-1] end elseif last_accent == AC and stress_syllable == #syllables then force_elongate_final = true end else -- Default stress rule. Words without vowels (e.g. IPA foot boundaries) don't get stress. if #syllables > 1 and rfind(word, "[^aeiouʔbcĉdfɡghjɟĵklmnñŋpqrɾstvwxz#]#") or #syllables == 1 and rfind(word, "[aeiou]") 		then syllables[#syllables] = "ˈ" .. syllables[#syllables] elseif #syllables >= 2 then syllables[#syllables - 1] = "ˈ" .. syllables[#syllables - 1] end end end

local words = rsplit(text, " ") for j, word in ipairs(words) do		-- accentuation local syllables = rsplit(word, "%.")

accent_word(word, syllables, j == #words)

-- Reconstruct the word. words[j] = table.concat(syllables, ".") -- suppress syllable mark before IPA stress indicator words[j] = rsub(words[j], "%.(" .. ipa_stress_c .. ")", "%1")		--make all primary stresses but the last one be secondary words[j] = rsub_repeatedly(words[j], "ˈ(.+)ˈ", "ˌ%1ˈ") end

text = table.concat(words, " ")

--remove "ɟ" and "w" inserted on vowel pair starting with "i" and "u" text = rsub(text,"([i])([ˈˌ]?)ɟ([aeou])","%1%2%3") text = rsub(text,"([u])([ˈˌ]?)w([aei])","%1%2%3")

--add temporary macron for /a/, /i/ and /u/ in stressed syllables so they don't get replaced by unstressed form

text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdfɡhĵɟklmnŋpɾrstw]?)([ɟlnɾst]?)([a])([ʔbdfɡiklmnŋpɾstu]?)([bdɡklmnpɾst]?)","%1%2%3%4ā%6%7") text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdfɡhĵɟklmnŋpɾrstw]?)([ɟlnɾst]?)([i])([ʔbdfɡklmnŋpɾstu]?)([bdɡklmnpɾst]?)","%1%2%3%4ī%6%7") text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdfɡhĵɟklmnŋpɾrstw]?)([ɟlnɾst]?)([u])([ʔbdfɡiklmnŋpɾst]?)([bdɡklmnpɾst]?)","%1%2%3%4ū%6%7")

--Corrections for diphthongs text = rsub(text,"([aā])i","%1j") --ay text = rsub(text,"([aā])u","%1w") --aw text = rsub(text,"([e])i","%1j") --ey text = rsub(text,"([iī])u","%1w") --iw text = rsub(text,"([o])i","%1j") --oy text = rsub(text,"([o])u","%1w") --ow text = rsub(text,"([uū])i","%1j") --uy (mostly in proper nouns)

--phonetic transcription if phonetic then

--Turn phonemic diphthongs to phonetic diphthongs

text = rsub(text, "([aāeouū])j", "%1ɪ̯") text = rsub(text, "([aāeiīo])w", "%1ʊ̯")

--change a, i, u to unstressed equivalents (certain forms to restore) text = rsub(text,"a","ɐ") text = rsub(text,"i","ɪ") text = rsub(text,"u","ʊ")

--Combine consonants (except H) followed by I/U and certain stressed vowels text = rsub(text,"([bkdɡlmnpɾst])ɪ([ˈˌ])([āeoū])","%2%1ɟ%3") text = rsub(text,"([bkdɡlmnpɾst])ʊ([ˈˌ])([āeīo])","%2%1w%3")

text = rsub(text,"([nŋ])([ˈˌ# .]*[bfpm])","m%2") text = rsub(text,"([mŋ])([ˈˌ# .]*[dlst])","n%2") text = rsub(text,"([ɐāeɪɪ̯īoʊʊ̯ū])([#]?)([ ]?)([ˈˌ#.])([k])([ɐāeɪīoʊū])","%1%2%3%4x%6") -- /k/ between vowels text = rsub(text,"([ɐāeɪɪ̯īoʊʊ̯ū])([ˈˌ.])ɡ([ɐāeɪīoʊū])","%1%2ɣ%3") -- /ɡ/ between vowels text = rsub(text,"d([ˈˌ.])ɟ","%1ĵ") --/d/ before /j/ text = rsub(text,"n([ˈˌ.])k","ŋ%1k") -- /n/ before /k/ (some proper nouns) text = rsub(text,"n([ˈˌ.])ɡ","ŋ%1ɡ") -- /n/ before /ɡ/ (some proper nouns and loanwords) text = rsub(text,"n([ˈˌ.])h","ŋ%1h") -- /n/ before /h/ (some proper nouns) text = rsub(text,"n([ˈˌ.])m","m%1m") -- /n/ before /m/ text = rsub(text,"n([ˈˌ.])ɟ","%1ɲ") -- /n/ before /j/ text = rsub(text,"s([ˈˌ.])ɟ","%1ʃ") -- /s/ before /j/ text = rsub(text,"t([ˈˌ.])ɟ","%1ĉ") -- /t/ before /j/ text = rsub(text,"t([ˈˌ.])s","%1ć") -- /t/ before /s/ text = rsub(text,"([ˈˌ.])d([ɟj])([ɐāeɪīoʊū])","%1ĵ%3") -- /dj/ before any vowel following stress text = rsub(text,"([ˈˌ.])n([ɟj])([ɐāeɪīoʊū])","%1ɲ%3") -- /nj/ before any vowel following stress text = rsub(text,"([ˈˌ.])s([ɟj])([ɐāeɪīoʊū])","%1ʃ%3") -- /sj/ before any vowel following stress text = rsub(text,"([ˈˌ.])t([ɟj])([ɐāeɪīoʊū])","%1ĉ%3") -- /tj/ before any vowel following stress text = rsub(text,"([oʊ])([m])([ˈ]?)([pb])","u%2%3%4") -- /o/ and /ʊ/ before /mb/ or /mp/

--final fix for phonetic diphthongs

text = rsub(text,"([ɐ])ɪ̯","aɪ̯") --ay text = rsub(text,"([ɐ])ʊ̯","aʊ̯") --aw text = rsub(text,"([ɪ])ʊ̯","iʊ̯") --iw

--delete temporary macron in /a/, /i/ and /u/

text = rsub(text,"ā","a") text = rsub(text,"ī","i") text = rsub(text,"ū","u")

end

--delete temporary macron in /a/, /i/ and /u/

text = rsub(text,"ā","a") text = rsub(text,"ī","i") text = rsub(text,"ū","u")

-- convert fake symbols to real ones local final_conversions = { ["ĉ"] = "t͡ʃ", -- fake "ch" to real "ch" ["ɟ"] = "j", -- fake "y" to real "y" ["ĵ"] = "d͡ʒ" -- fake "j" to real "j" }

local final_conversions_phonetic = { ["ĉ"] = "t͡ʃ", -- fake "ch" to real "ch" ["ć"] = "t͡s", -- fake "t.s" to real "t.s"		["ɟ"] = "j", -- fake "y" to real "y" ["ĵ"] = "d͡ʒ" -- fake "j" to real "j" }

if phonetic then text = rsub(text, "[ĉćɟĵ]", final_conversions_phonetic) end text = rsub(text, "[ĉɟĵ]", final_conversions)

-- remove # symbols at word and text boundaries text = rsub(text, "#([.]?)", "") -- resuppress syllable mark before IPA stress indicator text = rsub(text, "%.(" .. ipa_stress_c .. ")", "%1")

text = unfc(text)

local ret = { text = text, }	return ret end

-- For bot usage; -- where -- --  SPELLING is the word or respelling to generate pronunciation for;

function export.IPA_string(frame) local iparams = { [1] = {},		["phonetic"] = {type = "boolean"}, }	local iargs = require("Module:parameters").process(frame.args, iparams) local retval = export.IPA(iargs[1], iargs.phonetic) return retval.text end

-- The PRONUN table has the following form for the full phonemic/phonetic pronunciation: -- -- { --  phonemic = "PHONEMIC", --  phonetic = "PHONETIC", --  differences = {FLAG = BOOLEAN, FLAG = BOOLEAN, ...}, -- } -- -- Here, `phonemic` is the phonemic pronunciation (displayed as /.../) and `phonetic` is the phonetic pronunciation -- (displayed as [...]). -- -- The PRONUN table has the following form for the rhyme pronunciation: -- -- { --  rhyme = "RHYME_PRONUN", --  num_syl = {NUM, NUM, ...}, --  qualifiers = nil or {QUALIFIER, QUALIFIER, ...}, --  differences = {FLAG = BOOLEAN, FLAG = BOOLEAN, ...}, -- } -- -- Here, `rhyme` is a phonemic pronunciation such as "an" for [saan]], and `num_syl` is a list of the possible numbers of syllables for the term(s) that have this rhyme -- (e.g. {2} for saan, {3} for paraan and {4,} for makiraan. `num_syl` is used to generate syllable-count categories such as -- in addition to . `num_syl` may be nil to -- suppress the generation of syllable-count categories; this is typically the case with multiword terms. -- `qualifiers`, if non-nil, comes from the user using the syntax. --

local function generate_pronun(args) local ret = { pronun = {}, }

local function format_pron (tag) local pronunciations = {} local formatted_pronuns = {}

local function ins(formatted_part) table.insert(formatted_pronuns, formatted_part) end

-- Loop through each pronunciation. For each one, add the phonemic and phonetic versions to `pronunciations`, -- for formatting by Module:IPA, and also create an approximation of the formatted version so that we can do			-- Add tag to left qualifiers if first one -- FIXME: Consider using accent qualifier for the tag instead. local qs = pronun.q			if j == 1 and tag then if qs then qs = m_table.deepcopy(qs) table.insert(qs, tag) else qs = {tag} end end

local first_pronun = #pronunciations + 1

if not pronun.phonemic and not pronun.phonetic then error("Internal error: Saw neither phonemic nor phonetic pronunciation") end

if pronun.phonemic then -- missing if 'raw:[...]' given -- don't display syllable division markers in phonemic local slash_pron = "/" .. pronun.phonemic:gsub("%.", "") .. "/"				table.insert(pronunciations, {					pron = slash_pron,				}) ins(slash_pron) end

if pronun.phonetic then -- missing if 'raw:/.../' given local bracket_pron = "[" .. pronun.phonetic .. "]"				table.insert(pronunciations, {					pron = bracket_pron,				}) ins(bracket_pron) end

local last_pronun = #pronunciations

if qs then pronunciations[first_pronun].q = qs			end if pronun.a then pronunciations[first_pronun].a = pronun.a			end if j > 1 then pronunciations[first_pronun].separator = ", " ins(", ") end if pronun.qq then pronunciations[last_pronun].qq = pronun.qq			end if pronun.aa then pronunciations[last_pronun].aa = pronun.aa			end if qs or pronun.a or pronun.qq or pronun.aa then local data = { q = qs, a = pronun.a,					qq = pronun.qq, aa = pronun.aa				} -- Note: This inserts the actual formatted qualifier text, including HTML and such, but the later call -- to textual_len removes all HTML and reduces links. ins(require("Module:pron qualifier").format_qualifiers(data, "")) end

if pronun.refs then pronunciations[last_pronun].refs = pronun.refs -- Approximate the reference using a footnote notation. This will be slightly inaccurate if there are -- more than nine references but that is rare. ins(string.rep("[1]", #pronun.refs)) end if first_pronun ~= last_pronun then pronunciations[last_pronun].separator = " " ins(" ") end end

local bullet = string.rep("*", args.bullets) .. " "		-- Here we construct the formatted line in `formatted`, and also try to construct the equivalent without HTML -- and wiki markup in `formatted_for_len`, so we can compute the approximate textual length for use in sizing -- the toggle box with the "more" button on the right. local pre = is_first and args.pre and args.pre .. " " or "" local post = is_first and args.post and " " .. args.post or "" local formatted = bullet .. pre .. m_IPA.format_IPA_full { lang = lang, items = pronunciations, separator = "" } .. post local formatted_for_len = bullet .. pre .. "IPA(key): " .. table.concat(formatted_pronuns) .. post return formatted, textual_len(formatted_for_len) end return ret

end

local function parse_respelling(respelling, pagename, parse_err) local raw_respelling = respelling:match("^raw:(.*)$") if raw_respelling then local raw_phonemic, raw_phonetic = raw_respelling:match("^/(.*)/ %[(.*)%]$") if not raw_phonemic then raw_phonemic = raw_respelling:match("^/(.*)/$") end if not raw_phonemic then raw_phonetic = raw_respelling:match("^%[(.*)%]$") end if not raw_phonemic and not raw_phonetic then parse_err(("Unable to parse raw respelling '%s', should be one of /.../, [...] or /.../ [...]")				:format(raw_respelling)) end return { raw = true, raw_phonemic = raw_phonemic, raw_phonetic = raw_phonetic, }	end if respelling == "+" then respelling = pagename end return {term = respelling} end

-- External entry point for. function export.show(frame) local params = { [1] = {},		["pre"] = {}, ["post"] = {}, ["ref"] = {}, ["bullets"] = {type = "number", default = 1}, }	local parargs = frame:getParent.args local args = require("Module:parameters").process(parargs, params) local text = args[1] or mw.title.getCurrentTitle.text args.terms = local ret = generate_pronun(args) return ret.text end

-- Return the number of syllables of a phonemic representation, which should have syllable dividers in it but no -- hyphens. local function get_num_syl_from_phonemic(phonemic) -- Maybe we should just count vowels instead of the below code. phonemic = rsub(phonemic, "|", " ") -- remove IPA foot boundaries local words = rsplit(phonemic, " +") for i, word in ipairs(words) do -- IPA stress marks are syllable divisions if between characters; otherwise just remove. word = rsub(word, "(.)[ˌˈ](.)", "%1.%2") word = rsub(word, "[ˌˈ]", "") words[i] = word end -- There should be a syllable boundary between words. phonemic = table.concat(words, ".") return ulen(rsub(phonemic, "[^.]", "")) + 1 end

-- Get the rhyme by truncating everything up through the last stress mark + any following consonants, and remove -- syllable boundary markers. local function convert_phonemic_to_rhyme(phonemic) -- NOTE: This works because the phonemic vowels are just [aeiou] possibly with diacritics that are separate -- Unicode chars. If we want to handle things like ɛ or ɔ we need to add them to `vowel`. return rsub(rsub(phonemic, ".*[ˌˈ]", ""), "^[^" .. vowel .. "]*", ""):gsub("%.", ""):gsub("t͡ʃ", "tʃ") end

local function split_syllabified_spelling(spelling) return rsplit(spelling, "%.") end

-- "Align" syllabification to original spelling by matching character-by-character, allowing for extra syllable and -- accent markers in the syllabification. If we encounter an extra syllable marker (.), we allow and keep it. If we -- encounter an extra accent marker in the syllabification, we drop it. In any other case, we return nil indicating -- the alignment failed. local function align_syllabification_to_spelling(syllab, spelling) local result = {} local syll_chars = rsplit(decompose(syllab), "") local spelling_chars = rsplit(decompose(spelling), "") local i = 1 local j = 1 while i <= #syll_chars or j <= #spelling_chars do		local ci = syll_chars[i] local cj = spelling_chars[j] if ci == cj then table.insert(result, ci) i = i + 1 j = j + 1 elseif ci == "." then table.insert(result, ci) i = i + 1 elseif ci == AC or ci == GR or ci == CFLEX then -- skip character i = i + 1 else -- non-matching character return nil end end if i <= #syll_chars or j <= #spelling_chars then -- left-over characters on one side or the other return nil end return unfc(table.concat(result)) end

local function generate_hyph_obj(term) return {syllabification = term, hyph = split_syllabified_spelling(term)} end

-- Word should already be decomposed. local function word_has_vowels(word) return rfind(word, V) end

local function all_words_have_vowels(term) local words = rsplit(decompose(term), "[ %-]") for i, word in ipairs(words) do -- Allow empty word; this occurs with prefixes and suffixes. if word ~= "" and not word_has_vowels(word) then return false end end return true end

local function should_generate_rhyme_from_respelling(term) local words = rsplit(decompose(term), " +") return #words == 1 and -- no if multiple words not words[1]:find(".%-.") and -- no if word is composed of hyphenated parts (e.g. Asya-Pasipiko) not words[1]:find("%-$") and -- no if word is a prefix not (words[1]:find("^%-") and words[1]:find(CFLEX)) and -- no if word is an unstressed suffix word_has_vowels(words[1]) -- no if word has no vowels (e.g. a single letter) end

local function should_generate_rhyme_from_ipa(ipa) return not ipa:find("%s") and word_has_vowels(decompose(ipa)) end

local function parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods, no_split_on_comma) local retval = {}

if arg:find("<") then if not put then put = require(put_module) end

local function get_valid_prefixes local valid_prefixes = {} for param_mod, _ in pairs(param_mods) do				table.insert(valid_prefixes, param_mod) end table.insert(valid_prefixes, "q") table.insert(valid_prefixes, "qq") table.insert(valid_prefixes, "a") table.insert(valid_prefixes, "aa") table.sort(valid_prefixes) return valid_prefixes end

local segments = put.parse_balanced_segment_run(arg, "<", ">") local comma_separated_groups = no_split_on_comma and {segments} or put.split_alternating_runs_on_comma(segments) for _, group in ipairs(comma_separated_groups) do			local obj = generate_obj(group[1]) for j = 2, #group - 1, 2 do				if group[j + 1] ~= "" then parse_err("Extraneous text '" .. group[j + 1] .. "' after modifier") end local modtext = group[j]:match("^<(.*)>$") if not modtext then parse_err("Internal error: Modifier '" .. group[j] .. "' isn't surrounded by angle brackets") end local prefix, val = modtext:match("^([a-z]+):(.*)$") if not prefix then local valid_prefixes = get_valid_prefixes for i, valid_prefix in ipairs(valid_prefixes) do valid_prefixes[i] = "'" .. valid_prefix .. ":'"					end parse_err("Modifier " .. group[j] .. " lacks a prefix, should begin with one of " ..						m_table.serialCommaJoin(valid_prefixes)) end if prefix == "q" or prefix == "qq" or prefix == "a" or prefix == "aa" then if not obj[prefix] then obj[prefix] = {} end table.insert(obj[prefix], val) elseif param_mods[prefix] then local key = param_mods[prefix].item_dest or prefix if obj[key] then parse_err("Modifier '" .. prefix .. "' specified more than once") end local convert = param_mods[prefix].convert if convert then obj[key] = convert(val) else obj[key] = val end else local valid_prefixes = get_valid_prefixes for i, valid_prefix in ipairs(valid_prefixes) do valid_prefixes[i] = "'" .. valid_prefix .. "'"					end parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. group[j]						.. ", should be " .. m_table.serialCommaJoin(valid_prefixes)) end end table.insert(retval, obj) end elseif no_split_on_comma then table.insert(retval, generate_obj(arg)) else for _, term in ipairs(split_on_comma(arg)) do			table.insert(retval, generate_obj(term)) end end

return retval end

local function parse_rhyme(arg, put, parse_err) local function generate_obj(term) return {rhyme = term} end local param_mods = { s = { item_dest = "num_syl", convert = function(arg) local nsyls = rsplit(arg, ",") for i, nsyl in ipairs(nsyls) do					if not nsyl:find("^[0-9]+$") then parse_err("Number of syllables '" .. nsyl .. "' should be numeric") end nsyls[i] = tonumber(nsyl) end return nsyls end, },	}

return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods) end

local function parse_hyph(arg, put, parse_err) -- None other than qualifiers local param_mods = {}

return parse_pron_modifier(arg, put, parse_err, generate_hyph_obj, param_mods) end

local function parse_homophone(arg, put, parse_err) local function generate_obj(term) return {term = term} end local param_mods = { t = { -- We need to store the  inline modifier into the "gloss" key of the parsed term, -- because that is what Module:links (called from Module:homophones) expects. item_dest = "gloss", },		gloss = {}, pos = {}, alt = {}, lit = {}, id = {}, g = { -- We need to store the  inline modifier into the "genders" key of the parsed term, -- because that is what Module:links (called from Module:homophones) expects. item_dest = "genders", convert = function(arg) return rsplit(arg, ",") end, },	}

return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods) end

local function generate_audio_obj(arg) local file, gloss if arg:find("#") then file, gloss = arg:match("^(.-)%s*#%s*(.*)$") else file, gloss = arg:match("^(.-)%s*;%s*(.*)$") end if not file then file = arg gloss = "Audio" end return {file = file, gloss = gloss} end

local function parse_audio(arg, put, parse_err) -- None other than qualifiers local param_mods = {}

-- Don't split on comma because some filenames have embedded commas not followed by a space -- (typically followed by an underscore). return parse_pron_modifier(arg, put, parse_err, generate_audio_obj, param_mods, "no split on comma") end

-- External entry point for. -- External entry point for. function export.show(frame) local params = { [1] = {},		["pre"] = {}, ["post"] = {}, ["ref"] = {}, ["bullets"] = {type = "number", default = 1}, }	local parargs = frame:getParent.args local args = require("Module:parameters").process(parargs, params) local text = args[1] or mw.title.getCurrentTitle.text args.terms = local ret = generate_pronun(args) return ret.text end

-- Return the number of syllables of a phonemic representation, which should have syllable dividers in it but no -- hyphens. local function get_num_syl_from_phonemic(phonemic) -- Maybe we should just count vowels instead of the below code. phonemic = rsub(phonemic, "|", " ") -- remove IPA foot boundaries local words = rsplit(phonemic, " +") for i, word in ipairs(words) do -- IPA stress marks are syllable divisions if between characters; otherwise just remove. word = rsub(word, "(.)[ˌˈ](.)", "%1.%2") word = rsub(word, "[ˌˈ]", "") words[i] = word end -- There should be a syllable boundary between words. phonemic = table.concat(words, ".") return ulen(rsub(phonemic, "[^.]", "")) + 1 end

-- Get the rhyme by truncating everything up through the last stress mark + any following consonants, and remove -- syllable boundary markers. local function convert_phonemic_to_rhyme(phonemic) -- NOTE: This works because the phonemic vowels are just [aeiou] possibly with diacritics that are separate -- Unicode chars. If we want to handle things like ɛ or ɔ we need to add them to `vowel`. return rsub(rsub(phonemic, ".*[ˌˈ]", ""), "^[^" .. vowel .. "]*", ""):gsub("%.", ""):gsub("t͡ʃ", "tʃ") end

local function split_syllabified_spelling(spelling) return rsplit(spelling, "%.") end

-- "Align" syllabification to original spelling by matching character-by-character, allowing for extra syllable and -- accent markers in the syllabification. If we encounter an extra syllable marker (.), we allow and keep it. If we -- encounter an extra accent marker in the syllabification, we drop it. In any other case, we return nil indicating -- the alignment failed. local function align_syllabification_to_spelling(syllab, spelling) local result = {} local syll_chars = rsplit(decompose(syllab), "") local spelling_chars = rsplit(decompose(spelling), "") local i = 1 local j = 1 while i <= #syll_chars or j <= #spelling_chars do		local ci = syll_chars[i] local cj = spelling_chars[j] if ci == cj then table.insert(result, ci) i = i + 1 j = j + 1 elseif ci == "." then table.insert(result, ci) i = i + 1 elseif ci == AC or ci == GR or ci == CFLEX then -- skip character i = i + 1 else -- non-matching character return nil end end if i <= #syll_chars or j <= #spelling_chars then -- left-over characters on one side or the other return nil end return unfc(table.concat(result)) end

local function generate_hyph_obj(term) return {syllabification = term, hyph = split_syllabified_spelling(term)} end

-- Word should already be decomposed. local function word_has_vowels(word) return rfind(word, V) end

local function all_words_have_vowels(term) local words = rsplit(decompose(term), "[ %-]") for i, word in ipairs(words) do -- Allow empty word; this occurs with prefixes and suffixes. if word ~= "" and not word_has_vowels(word) then return false end end return true end

local function should_generate_rhyme_from_respelling(term) local words = rsplit(decompose(term), " +") return #words == 1 and -- no if multiple words not words[1]:find(".%-.") and -- no if word is composed of hyphenated parts (e.g. Austria-Hungría) not words[1]:find("%-$") and -- no if word is a prefix not (words[1]:find("^%-") and words[1]:find(CFLEX)) and -- no if word is an unstressed suffix word_has_vowels(words[1]) -- no if word has no vowels (e.g. a single letter) end

local function should_generate_rhyme_from_ipa(ipa) return not ipa:find("%s") and word_has_vowels(decompose(ipa)) end

local function do_rhymes(rhymes, hyphs, parsed_respellings, rhyme_ret) rhyme_ret.pronun = {} for _, rhyme in ipairs(rhymes) do		local num_syl = rhyme.num_syl local no_num_syl = false

-- If user explicitly gave the rhyme but didn't explicitly specify the number of syllables, try to take it from -- the hyphenation. if not num_syl then num_syl = {} for _, hyph in ipairs(hyphs) do				if should_generate_rhyme_from_respelling(hyph.syllabification) then local this_num_syl = 1 + ulen(rsub(hyph.syllabification, "[^.]", "")) m_table.insertIfNot(num_syl, this_num_syl) else no_num_syl = true break end end if no_num_syl or #num_syl == 0 then num_syl = nil end end

-- If that fails and term is single-word, try to take it from the phonemic. if not no_num_syl and not num_syl then for _, parsed in ipairs(parsed_respellings) do				for dialect, pronun in pairs(parsed.pronun.pronun[dialect]) do -- Check that pronun.phonemic exists (it may not if raw phonetic-only pronun is given). if pronun.phonemic then if not should_generate_rhyme_from_ipa(pronun.phonemic) then no_num_syl = true break end -- Count number of syllables by looking at syllable boundaries (including stress marks). local this_num_syl = get_num_syl_from_phonemic(pronun.phonemic) m_table.insertIfNot(num_syl, this_num_syl) end end if no_num_syl then break end end if no_num_syl or #num_syl == 0 then num_syl = nil end end

table.insert(rhyme_ret.pronun, {			rhyme = rhyme.rhyme,			num_syl = num_syl,			qualifiers = rhyme.qualifiers,		}) end end

local function parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods, no_split_on_comma) local retval = {}

if arg:find("<") then if not put then put = require(put_module) end

local function get_valid_prefixes local valid_prefixes = {} for param_mod, _ in pairs(param_mods) do				table.insert(valid_prefixes, param_mod) end table.insert(valid_prefixes, "q") table.insert(valid_prefixes, "qq") table.insert(valid_prefixes, "a") table.insert(valid_prefixes, "aa") table.sort(valid_prefixes) return valid_prefixes end

local segments = put.parse_balanced_segment_run(arg, "<", ">") local comma_separated_groups = no_split_on_comma and {segments} or put.split_alternating_runs_on_comma(segments) for _, group in ipairs(comma_separated_groups) do			local obj = generate_obj(group[1]) for j = 2, #group - 1, 2 do				if group[j + 1] ~= "" then parse_err("Extraneous text '" .. group[j + 1] .. "' after modifier") end local modtext = group[j]:match("^<(.*)>$") if not modtext then parse_err("Internal error: Modifier '" .. group[j] .. "' isn't surrounded by angle brackets") end local prefix, val = modtext:match("^([a-z]+):(.*)$") if not prefix then local valid_prefixes = get_valid_prefixes for i, valid_prefix in ipairs(valid_prefixes) do valid_prefixes[i] = "'" .. valid_prefix .. ":'"					end parse_err("Modifier " .. group[j] .. " lacks a prefix, should begin with one of " ..						m_table.serialCommaJoin(valid_prefixes)) end if prefix == "q" or prefix == "qq" or prefix == "a" or prefix == "aa" then if not obj[prefix] then obj[prefix] = {} end table.insert(obj[prefix], val) elseif param_mods[prefix] then local key = param_mods[prefix].item_dest or prefix if obj[key] then parse_err("Modifier '" .. prefix .. "' specified more than once") end local convert = param_mods[prefix].convert if convert then obj[key] = convert(val) else obj[key] = val end else local valid_prefixes = get_valid_prefixes for i, valid_prefix in ipairs(valid_prefixes) do valid_prefixes[i] = "'" .. valid_prefix .. "'"					end parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. group[j]						.. ", should be " .. m_table.serialCommaJoin(valid_prefixes)) end end table.insert(retval, obj) end elseif no_split_on_comma then table.insert(retval, generate_obj(arg)) else for _, term in ipairs(split_on_comma(arg)) do			table.insert(retval, generate_obj(term)) end end

return retval end

local function parse_rhyme(arg, put, parse_err) local function generate_obj(term) return {rhyme = term} end local param_mods = { s = { item_dest = "num_syl", convert = function(arg) local nsyls = rsplit(arg, ",") for i, nsyl in ipairs(nsyls) do					if not nsyl:find("^[0-9]+$") then parse_err("Number of syllables '" .. nsyl .. "' should be numeric") end nsyls[i] = tonumber(nsyl) end return nsyls end, },	}

return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods) end

local function parse_hyph(arg, put, parse_err) -- None other than qualifiers local param_mods = {}

return parse_pron_modifier(arg, put, parse_err, generate_hyph_obj, param_mods) end

local function parse_homophone(arg, put, parse_err) local function generate_obj(term) return {term = term} end local param_mods = { t = { -- We need to store the  inline modifier into the "gloss" key of the parsed term, -- because that is what Module:links (called from Module:homophones) expects. item_dest = "gloss", },		gloss = {}, pos = {}, alt = {}, lit = {}, id = {}, }

return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods) end

local function generate_audio_obj(arg) local file, gloss if arg:find("#") then file, gloss = arg:match("^(.-)%s*#%s*(.*)$") else file, gloss = arg:match("^(.-)%s*;%s*(.*)$") end if not file then file = arg gloss = "Audio" end return {file = file, gloss = gloss} end

local function parse_audio(arg, put, parse_err) -- None other than qualifiers local param_mods = {}

-- Don't split on comma because some filenames have embedded commas not followed by a space -- (typically followed by an underscore). return parse_pron_modifier(arg, put, parse_err, generate_audio_obj, param_mods, "no split on comma") end

-- External entry point for. function export.show_pr(frame) local params = { [1] = {list = true}, ["rhyme"] = {}, ["hyph"] = {}, ["hmp"] = {}, ["audio"] = {list = true}, ["pagename"] = {}, }	local parargs = frame:getParent.args local args = require("Module:parameters").process(parargs, params) local pagename = args.pagename or mw.title.getCurrentTitle.subpageText

-- Parse the arguments. local respellings = #args[1] > 0 and args[1] or {"+"} local parsed_respellings = {} local function overall_parse_err(msg, arg, val) error(msg .. ": " .. arg .. "= " .. val) end local overall_rhyme = args.rhyme and parse_rhyme(args.rhyme, nil, function(msg) overall_parse_err(msg, "rhyme", args.rhyme) end) or nil local overall_hyph = args.hyph and parse_hyph(args.hyph, nil, function(msg) overall_parse_err(msg, "hyph", args.hyph) end) or nil local overall_hmp = args.hmp and parse_homophone(args.hmp, nil, function(msg) overall_parse_err(msg, "hmp", args.hmp) end) or nil local overall_audio if args.audio then overall_audio = {} for _, audio in ipairs(args.audio) do			local parsed_audio = parse_audio(audio, nil, function(msg) overall_parse_err(msg, "audio", audio) end) if #parsed_audio > 1 then error("Internal error: Saw more than one object returned from parse_audio") end table.insert(overall_audio, parsed_audio[1]) end end local put

for i, respelling in ipairs(respellings) do		local function parse_err(msg) error(msg .. ": " .. i .. "= " .. respelling) end if respelling:find("<") then if not put then put = require(put_module) end

local param_mods = { pre = {}, post = {}, style = {}, bullets = { convert = function(arg) if not arg:find("^[0-9]+$") then parse_err("Modifier 'bullets' should have a number as argument, but saw '" .. arg .. "'") end return tonumber(arg) end, },				rhyme = { insert = true, flatten = true, convert = function(arg) return parse_rhyme(arg, put, parse_err) end, },				hyph = { insert = true, flatten = true, convert = function(arg) return parse_hyph(arg, put, parse_err) end, },				hmp = { insert = true, flatten = true, convert = function(arg) return parse_homophone(arg, put, parse_err) end, },				audio = { insert = true, flatten = true, convert = function(arg) return parse_audio(arg, put, parse_err) end, },			}

local function get_valid_prefixes local valid_prefixes = {} for param_mod, _ in pairs(param_mods) do					table.insert(valid_prefixes, param_mod) end table.insert(valid_prefixes, "ref") table.insert(valid_prefixes, "q") table.insert(valid_prefixes, "qq") table.insert(valid_prefixes, "a") table.insert(valid_prefixes, "aa") table.sort(valid_prefixes) return valid_prefixes end

local segments = put.parse_balanced_segment_run(respelling, "<", ">") local comma_separated_groups = put.split_alternating_runs_on_comma(segments, ",") local parsed = {terms = {}, audio = {}, rhyme = {}, hyph = {}, hmp = {}} for j, group in ipairs(comma_separated_groups) do				local termobj = parse_respelling(group[1], pagename, parse_err) for k = 2, #group - 1, 2 do					if group[k + 1] ~= "" then parse_err("Extraneous text '" .. group[k + 1] .. "' after modifier") end local modtext = group[k]:match("^<(.*)>$") if not modtext then parse_err("Internal error: Modifier '" .. group[k] .. "' isn't surrounded by angle brackets") end local prefix, arg = modtext:match("^([a-z]+):(.*)$") if not prefix then local valid_prefixes = get_valid_prefixes for i, valid_prefix in ipairs(valid_prefixes) do valid_prefixes[i] = "'" .. valid_prefix .. ":'"						end parse_err("Modifier " .. group[k] .. " lacks a prefix, should begin with one of " ..							m_table.serialCommaJoin(valid_prefixes)) end if prefix == "ref" or prefix == "q" or prefix == "qq" or prefix == "a" or prefix == "aa" then if not termobj[prefix] then termobj[prefix] = {} end table.insert(termobj[prefix], arg) elseif param_mods[prefix] then if j < #comma_separated_groups then parse_err("Modifier '" .. prefix .. "' should occur after the last comma-separated term") end if not param_mods[prefix].insert and parsed[prefix] then parse_err("Modifier '" .. prefix .. "' occurs twice, second occurrence " .. group[k]) end local converted if param_mods[prefix].convert then converted = param_mods[prefix].convert(arg) else converted = arg end if param_mods[prefix].insert then if param_mods[prefix].flatten then for _, obj in ipairs(converted) do									table.insert(parsed[prefix], obj) end else table.insert(parsed[prefix], converted) end else parsed[prefix] = converted end else local valid_prefixes = get_valid_prefixes for i, valid_prefix in ipairs(valid_prefixes) do valid_prefixes[i] = "'" .. valid_prefix .. "'"						end parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. group[k]							.. ", should be " .. m_table.serialCommaJoin(valid_prefixes)) end end table.insert(parsed.terms, termobj) end if not parsed.bullets then parsed.bullets = 1 end table.insert(parsed_respellings, parsed) else local termobjs = {} for _, term in ipairs(split_on_comma(respelling)) do				table.insert(termobjs, parse_respelling(term, pagename, parse_err)) end table.insert(parsed_respellings, {				terms = termobjs,				audio = {},				rhyme = {},				hyph = {},				hmp = {},				bullets = 1,			}) end end

if overall_hyph then local hyphs = {} for _, hyph in ipairs(overall_hyph) do			if hyph.syllabification == "+" then hyph.syllabification = syllabify_from_spelling(pagename) hyph.hyph = split_syllabified_spelling(hyph.syllabification) elseif hyph.syllabification == "-" then overall_hyph = {} break end end end

-- Loop over individual respellings, processing each. for _, parsed in ipairs(parsed_respellings) do		parsed.pronun = generate_pronun(parsed) local no_auto_rhyme = false for _, term in ipairs(parsed.terms) do			if term.raw then if not should_generate_rhyme_from_ipa(term.raw_phonemic or term.raw_phonetic) then no_auto_rhyme = true break end elseif not should_generate_rhyme_from_respelling(term.term) then no_auto_rhyme = true break end end

if #parsed.hyph == 0 then if not overall_hyph and all_words_have_vowels(pagename) then for _, term in ipairs(parsed.terms) do					if not term.raw then local syllabification = syllabify_from_spelling(term.term) local aligned_syll = align_syllabification_to_spelling(syllabification, pagename) if aligned_syll then m_table.insertIfNot(parsed.hyph, generate_hyph_obj(aligned_syll)) end end end end else for _, hyph in ipairs(parsed.hyph) do				if hyph.syllabification == "+" then hyph.syllabification = syllabify_from_spelling(pagename) hyph.hyph = split_syllabified_spelling(hyph.syllabification) elseif hyph.syllabification == "-" then parsed.hyph = {} break end end end

-- Generate the rhymes. local function do_rhyme(rhyme_ret) for _, pronun in ipairs(parsed.pronun.pronun) do				if pronun.phonemic then -- Count number of syllables by looking at syllable boundaries (including stress marks). local num_syl = get_num_syl_from_phonemic(pronun.phonemic) -- Get the rhyme by truncating everything up through the last stress mark + any following -- consonants, and remove syllable boundary markers. local rhyme = convert_phonemic_to_rhyme(pronun.phonemic) local saw_already = false for _, existing in ipairs(rhyme_ret.pronun) do						if existing.rhyme == rhyme then saw_already = true -- We already saw this rhyme but possibly with a different number of syllables, m_table.insertIfNot(existing.num_syl, num_syl) break end end end end

if #parsed.rhyme == 0 then if overall_rhyme or no_auto_rhyme then parsed.rhyme = nil else parsed.rhyme = do_rhymes end else local no_rhyme = false for _, rhyme in ipairs(parsed.rhyme) do				if rhyme.rhyme == "-" then no_rhyme = true break end end end end

if overall_rhyme then local no_overall_rhyme = false for _, orhyme in ipairs(overall_rhyme) do			if orhyme.rhyme == "-" then no_overall_rhyme = true break end end if no_overall_rhyme then overall_rhyme = nil else local all_hyphs if overall_hyph then all_hyphs = overall_hyph else all_hyphs = {} for _, parsed in ipairs(parsed_respellings) do					for _, hyph in ipairs(parsed.hyph) do						m_table.insertIfNot(all_hyphs, hyph) end end end end end

-- If all sets of pronunciations have the same rhymes, display them only once at the bottom. -- Otherwise, display rhymes beneath each set, indented. local first_rhyme_ret local all_rhyme_sets_eq = true for j, parsed in ipairs(parsed_respellings) do		if j == 1 then first_rhyme_ret = parsed.rhyme elseif not m_table.deepEquals(first_rhyme_ret, parsed.rhyme) then all_rhyme_sets_eq = false break end end

local function format_rhyme(rhyme_ret, num_bullets) local pronunciations = {} local rhymes = {} for _, pronun in ipairs(parsed.pronun) do			table.insert(rhymes, pronun) end local data = { lang = lang, rhymes = rhymes, qualifiers = tag and {tag} or nil, force_cat = force_cat, }		local bullet = string.rep("*", num_bullets) .. " "		local formatted = bullet .. require("Module:rhymes").format_rhyme(data) local formatted_for_len_parts = {} table.insert(formatted_for_len_parts, bullet .. "Rhymes: " .. (tag and "(" .. tag .. ") " or "")) for j, pronun in ipairs(parsed.pronun) do			if j > 1 then table.insert(formatted_for_len_parts, ", ") end if pronun.qualifiers then table.insert(formatted_for_len_parts, "(" .. table.concat(pronun.qualifiers, ", ") .. ") ")			end table.insert(formatted_for_len_parts, "-" .. pronun.rhyme) end return formatted, textual_len(table.concat(formatted_for_len_parts)) end end

-- If all sets of pronunciations have the same hyphenations, display them only once at the bottom. -- Otherwise, display hyphenations beneath each set, indented. local first_hyphs local all_hyph_sets_eq = true for j, parsed in ipairs(parsed_respellings) do		if j == 1 then first_hyphs = parsed.hyph elseif not m_table.deepEquals(first_hyphs, parsed.hyph) then all_hyph_sets_eq = false break end end

local function format_hyphenations(hyphs, num_bullets) local hyphtext = require("Module:hyphenation").format_hyphenations { lang = lang, hyphs = hyphs, caption = "Syllabification" } return string.rep("*", num_bullets) .. " " .. hyphtext end

-- If all sets of pronunciations have the same homophones, display them only once at the bottom. -- Otherwise, display homophones beneath each set, indented. local first_hmps local all_hmp_sets_eq = true for j, parsed in ipairs(parsed_respellings) do		if j == 1 then first_hmps = parsed.hmp elseif not m_table.deepEquals(first_hmps, parsed.hmp) then all_hmp_sets_eq = false break end end

local function format_homophones(hmps, num_bullets) local hmptext = require("Module:homophones").format_homophones { lang = lang, homophones = hmps } return string.rep("*", num_bullets) .. " " .. hmptext end

local function format_audio(audios, num_bullets) local ret = {} for i, audio in ipairs(audios) do -- FIXME! There should be a module for this. local text = frame:expandTemplate { title = "audio", args = {"tl", audio.file, audio.gloss } }			if audio.q and audio.q[1] or audio.qq and audio.qq[1] or audio.a and audio.a[1] or audio.aa and audio.aa[1] then text = require("Module:pron qualifier").format_qualifiers(audio, text) end table.insert(ret, string.rep("*", num_bullets) .. " " .. text) end return table.concat(ret, "\n") end

local textparts = {} local min_num_bullets = 9999 for j, parsed in ipairs(parsed_respellings) do		if parsed.bullets < min_num_bullets then min_num_bullets = parsed.bullets end if j > 1 then table.insert(textparts, "\n") end table.insert(textparts, parsed.pronun.text) if #parsed.audio > 0 then table.insert(textparts, "\n") -- If only one pronunciation set, add the audio with the same number of bullets, otherwise -- indent audio by one more bullet. table.insert(textparts, format_audio(parsed.audio, #parsed_respellings == 1 and parsed.bullets or parsed.bullets + 1)) end end if overall_audio and #overall_audio > 0 then table.insert(textparts, "\n") table.insert(textparts, format_audio(overall_audio, min_num_bullets)) end if overall_rhyme then table.insert(textparts, "\n") table.insert(textparts, format_rhyme(overall_rhyme, min_num_bullets)) end if overall_hyph and #overall_hyph > 0 then table.insert(textparts, "\n") table.insert(textparts, format_hyphenations(overall_hyph, min_num_bullets)) end if overall_hmp and #overall_hmp > 0 then table.insert(textparts, "\n") table.insert(textparts, format_homophones(overall_hmp, min_num_bullets)) end

return table.concat(textparts) end

return export