Module:id-pron/sandbox

local export = {}

local m_IPA = require("Module:IPA") local m_str_utils = require("Module:string utilities") local m_table = require("Module:table") local put_module = "Module:parse utilities" local set_utilities_module = "Module:set utilities" local headword_data_module = "Module:headword/data" local accent_qualifier_module = "Module:accent qualifier" local accent_qualifier_data_module = "Module:accent qualifier/data" local rhymes_module = "Module:rhymes" local hyphenation_module = "Module:hyphenation"

local lang = require("Module:languages").getByCode("id")

local maxn = table.maxn local rfind = m_str_utils.find local rsubn = m_str_utils.gsub local rsplit = m_str_utils.split local toNFC = mw.ustring.toNFC local toNFD = mw.ustring.toNFD local trim = mw.text.trim local u = m_str_utils.char local ulen = m_str_utils.len local ulower = m_str_utils.lower

local AC = u(0x0301) -- acute = ́ local GR = u(0x0300) -- grave = ̀ local CFLEX = u(0x0302) -- circumflex = ̂ local MAC = u(0x0304) -- macron local BR = u(0x0306) -- breve = ˘

local vowel = "aeéèioòuəɛɔ" -- vowel local V = "[" .. vowel .. "]" local NV = "[^" .. vowel .. "]"

local accent = AC .. GR .. MAC .. BR local accent_c = "[" .. accent .. "]" local stress_c = "[" .. MAC .. BR .. "]" local ipa_stress = "ˈ" local ipa_stress_c = "[" .. ipa_stress .. "]"

local separator = "# ." local separator_c = "[" .. separator .. "]" local C = "[^" .. vowel .. separator .. "]" -- consonant

local unstressed_words = require("Module:table").listToSet({ --feel free to add more unstressed words	"di", "ké", -- prepositions	"dan", -- conjunctions	"ku", "mu", "nya", -- pronouns })

-- version of rsubn that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end

-- version of rsubn that returns a 2nd argument boolean indicating whether -- a substitution was made. local function rsubb(term, foo, bar) local retval, nsubs = rsubn(term, foo, bar) return retval, nsubs > 0 end

-- apply rsub repeatedly until no change local function rsub_repeatedly(term, foo, bar) while true do		local new_term = rsub(term, foo, bar) if new_term == term then return term end term = new_term end end

-- ĵ, ɟ and ć are used internally to represent [d͡ʒ], [j] and [t͡ʃ] --

function export.IPA(text, phonetic)

local debug = {} text = ulower(text or mw.title.getCurrentTitle.text) -- decompose everything but é, è text = mw.ustring.toNFD(text) text = rsub(text, "." .. "[" .. AC .. CFLEX .. GR .. "]", {		["e" .. AC] = "é",		["e" .. GR] = "è",		["o" .. GR] = "ò", -- O as in the Javanese place names "Solo", "Purwokerto", "Probolinggo"	})

-- convert commas and en/en dashes to IPA foot boundaries text = rsub(text, "%s*[,–—]%s*", " | ") -- question mark or exclamation point in the middle of a sentence -> IPA foot boundary text = rsub(text, "([^%s])%s*[!?]%s*([^%s])", "%1 | %2")

-- canonicalize multiple spaces and remove leading and trailing spaces local function canon_spaces(text) text = rsub(text, "%s+", " ") text = rsub(text, "^ ", "") text = rsub(text, " $", "") return text end

text = canon_spaces(text)

-- Make prefixes unstressed unless they have an explicit stress marker; also make certain -- monosyllabic words (e.g. di, ke, se-, ban, etc.) without stress marks be -- unstressed. local words = rsplit(text, " ") for i, word in ipairs(words) do		if rfind(word, "%-$") and not rfind(word, accent_c) or unstressed_words[word] then -- add BR to the last vowel not the first one -- adding the BR after the 'u' words[i] = rsub(word, "^(.*" .. V .. ")", "%1" .. BR) end end text = table.concat(words, " ")

-- Convert hyphens to spaces text = rsub(text, "%-", " ") -- canonicalize multiple spaces again, which may have been introduced by hyphens text = canon_spaces(text) -- now eliminate punctuation text = rsub(text, "[!?']", "") -- put # at word beginning and end and double ## at text/foot boundary beginning/end text = rsub(text, " | ", "# | #") text = "##" .. rsub(text, " ", "# #") .. "##"

table.insert(debug, text)

--"i" or "u" to glide (as part of a diphthong) text = rsub(text, "(" .. V .. ")i([#.])", "%1ɟ%2") text = rsub(text, "(" ..V.. ")u([#.])", "%1w%2")

-- syllable-initial X (e.g. in xenofobia, xenon, xilofon) text = rsub(text, "x("..V..")", "s%1")

-- handle certain combinations; kh, ng, ny and sy handling needs to go first text = rsub(text, "kh", "x") text = rsub(text, "ng", "ŋ") text = rsub(text, "ny", "ɲ") text = rsub(text, "sy", "ʃ") table.insert(debug, text)

--alphabet-to-phoneme text = rsub(text, "[ceéègjòqvy]",	--["g"]="ɡ": U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G		{ ["c"] = "ć", ["e"] = "ə", ["é"] = "e", ["è"] = "ɛ", ["g"] = "ɡ", ["j"] = "ĵ", ["ò"] = "ɔ", ["q"] = "k", ["y"] = "j" })

-- glottal stop. use also to replace "k" when this corresponds to it	text = rsub(text, "7", "ʔ")

table.insert(debug, text)

--syllable division local vowel_to_glide = { ["i"] = "j", ["u"] = "w" } -- i, o and u between vowels -> j and u e.g. rangkaian)	text = rsub_repeatedly(text, "(" .. V .. ")([iu])(" .. V .. ")", function(v1, iu, v2) return v1 .. vowel_to_glide[iu] .. v2			end )

text = rsub_repeatedly(text, "(" .. V .. accent_c .."*)(" .. C .. V .. ")", "%1.%2")	text = rsub_repeatedly(text, "(" .. V .. accent_c .."*" .. C .. ")(" .. C .. V .. ")", "%1.%2")	text = rsub_repeatedly(text, "(" .. V .. accent_c .."*" .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")	text = rsub_repeatedly(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2") text = rsub_repeatedly(text, "([aeiouɛɔ]" .. accent_c .. "*)([aeiouɛɔ])", "%1.%2")

table.insert(debug, text)

local accent_to_stress_mark = { [MAC] = "ˈ", [BR] = "" }

local function accent_word(word, syllables) -- Now stress the word. If any accent exists in the word (including breves indicating an unaccented word), -- put the stress mark(s) at the beginning of the indicated syllable(s). Otherwise, apply the default -- stress rule. if rfind(word, accent_c) then for i = 1, #syllables do syllables[i] = rsub(syllables[i], "^(.*)(" .. accent_c .. ")(.*)$",						function(pre, accent, post)							return accent_to_stress_mark[accent] .. pre .. post						end				) end else

-- Default stress rule. Words without vowels (e.g. IPA foot boundaries) don't get stress. if #syllables > 1 and (rfind(word, "[^aəeéèioòuɛɔʔbcdfgɡhjɟĵklmnŋɲpqrstvwxz#]#")) or #syllables == 1 and rfind(word, V) then syllables[#syllables] = "ˈ" .. syllables[#syllables] elseif #syllables <= 2 and rfind(word, "[ə]") then syllables[#syllables] = "ˈ" .. syllables[#syllables] elseif #syllables >= 3 and rfind(word, "[ə]") then syllables[#syllables - 1] = "ˈ" .. syllables[#syllables - 1] elseif #syllables > 1 then syllables[#syllables - 1] = "ˈ" .. syllables[#syllables - 1] end end end

local words = rsplit(text, " ") for j, word in ipairs(words) do

local syllables = rsplit(word, "%.")

accent_word(word, syllables)

-- Reconstruct the word. words[j] = table.concat(syllables, phonetic and "." or "") end

text = table.concat(words, " ")

-- suppress syllable mark before IPA stress indicator text = rsub(text, "%.(" .. ipa_stress_c .. ")", "%1")

table.insert(debug, text)

local id_IPA_table = { ["phonetic"] = text, ["phonemic"] = text }

for key, value in pairs(id_IPA_table) do		text = id_IPA_table[key]

--phonetic transcription if key == "phonetic" then

table.insert(debug, text)

--phonemic diphthongs text = rsub(text, "([aeou])([ɟj])([#.ˈ])", "%1i̯%3") text = rsub(text, "([a])w([#.ˈ])", "%1u̯%2")

table.insert(debug, text)

--change e, i, u in closed final syllables text = rsub(text, "([bćdfhjĵɟklmnɲŋprsʃtwz])e([bćdfhjĵɟklmnɲŋprstwz])([#])","%1ɛ%2%3") text = rsub(text, "([bćdfhjĵɟklmnɲŋprsʃtwz])i([bćdfhjĵɟklmnɲŋprstwz])([#])","%1ɪ%2%3") text = rsub(text, "([bćdfhjĵɟklmnɲŋprsʃtwz])u([bćdfhjĵɟklmnɲŋprstwz])([#])","%1ʊ%2%3")

table.insert(debug, text)

--i, u in closed stressed syllables with nasal coda text = rsub(text, "([ˈ])([bćdfhjĵɟklmnɲŋprsʃtwz])ɪ([mnŋ])([.#])","%1%2i%3%4") text = rsub(text, "([ˈ])([bćdfhjĵɟklmnɲŋprsʃtwz])ʊ([mnŋ])([.#])","%1%2u%3%4")

table.insert(debug, text)

--devoice final B, D an G	   text = rsub(text, "b([#.ˈ])","p̚%1") text = rsub(text, "d([#.ˈ])","t̚%1") text = rsub(text, "ɡ([#.ˈ])","k̚%1")

--/n/ and /ŋ/ sandhi text = rsub(text,"([nŋ])([# .]*[bpm])", "m%2") text = rsub(text,"([ŋ])([ˈˌ# .]*[dlstz])","n%2") text = rsub(text,"([n])([ˈˌ# .]*[ćĵʃ])","ɲ%2")

--final K to glottal stop text = rsub(text, "k([#.ˈ])","ʔ%1")

--dental T	   text = rsub(text, "t","t̪")

--V to F	   text = rsub(text, "v","f")

mw.log(text) end

table.insert(debug, text)

-- convert fake symbols to real ones local final_conversions = { ["ć"] = "t͡ʃ", -- fake "c" to real "c" ["ɟ"] = "j", -- fake "i" to real "i" ["ĵ"] = "d͡ʒ" -- fake "j" to real "j" }

text = rsub(text, "[ĉɟĵ]", final_conversions)

-- Do not have multiple syllable break consecutively text = rsub_repeatedly(text, "([.]+)", ".") text = rsub_repeatedly(text, "([.]?)(‿)([.]?)", "%2")

-- remove # symbols at word and text boundaries text = rsub_repeatedly(text, "([.]?)#([.]?)", "")

-- resuppress syllable mark before IPA stress indicator text = rsub(text, "%.(" .. ipa_stress_c .. ")", "%1")		text = rsub_repeatedly(text, "([.]?)(" .. ipa_stress_c .. ")([.]?)", "%2")

id_IPA_table[key] = toNFC(text) end

return id_IPA_table end

function export.show(frame) local params = { [1] = {},		["pre"] = {}, ["bullets"] = {type = "number", default = 1}, }

local parargs = frame:getParent.args local args = require("Module:parameters").process(parargs, params)

local results = {}

local text = args[1] or mw.title.getCurrentTitle.text

local IPA_result = export.IPA(text) table.insert(results, { pron = "/" .. IPA_result["phonemic"] .. "/" }) table.insert(results, { pron = "[" .. IPA_result["phonetic"] .. "]" })

local pre = args.pre and args.pre .. " " or "" local bullet = (args.bullets ~= 0) and "* " or ""

return bullet .. pre .. m_IPA.format_IPA_full { lang = lang, items = results } end

local function parse_gloss(arg) local poses, gloss if arg:find("%^") then poses, gloss = arg:match("^(.-)%^(.*)$") if gloss == "" then gloss = nil end else gloss = arg end if poses then poses = split_on_comma(poses) local m_headword_data = mw.loadData(headword_data_module) for i, pos in ipairs(poses) do			poses[i] = m_headword_data.pos_aliases[pos] or pos end end return { poses = poses, gloss = gloss, } end

-- Parse a raw accent spec, which is one or more comma-separated accents, each of which may be aliases listed in the -- accent data in Module:accent qualifier/data. FIXME: The separate accent qualifier data will be going away and -- merged into label data, at which point we'll have to rewrite this. local function parse_accents(arg) -- Accent group processing local accent_data = mw.loadData(accent_qualifier_data_module)

-- Split on commas and canonicalize aliases. local accents = rsplit(arg, "%s*,%s*") for i, alias in ipairs(accents) do		if accent_data.aliases[alias] then accents[i] = accent_data.aliases[alias] end end

return accents end

-- Return the number of syllables of a phonemic or phonetic representation, which should have syllable dividers in it -- but no hyphens. local function get_num_syl_from_ipa(pron) -- Maybe we should just count vowels instead of the below code. pron = rsub(pron, "|", " ") -- remove IPA foot boundaries local words = rsplit(pron, " +") for i, word in ipairs(words) do -- IPA stress marks are syllable divisions if between characters; otherwise just remove. word = rsub(word, "(.)[ˌˈ](.)", "%1.%2") word = rsub(word, "[ˌˈ]", "") words[i] = word end -- There should be a syllable boundary between words. pron = table.concat(words, ".") return ulen(rsub(pron, "[^.]", "")) + 1 end

-- Get the rhyme by truncating everything up through the last stress mark + any following consonants, and remove -- syllable boundary markers. local function convert_phonemic_to_rhyme(phonemic) -- NOTE: This works because the phonemic vowels are just [aeiou] possibly with diacritics that are separate -- Unicode chars. If we want to handle things like ɛ or ɔ we need to add them to `vowel`. phonemic = rsplit(phonemic, " ") phonemic = phonemic[#phonemic] return rsub(rsub(phonemic, ".*[ˌˈ]", ""), "^" .. NV .. "*", ""):gsub("%.", "") end

local function split_syllabified_spelling(spelling) return rsplit(spelling, "%.") end

-- "Align" syllabified respelling `syllab` to original spelling `spelling` by matching character-by-character, allowing -- for extra syllable and accent markers in the syllabification and certain mismatches in the consonants. The goal is to -- produce the appropriately syllabified version of the original spelling (the pagename) by matching characters in the -- syllabified respelling to the original spelling, putting the syllable boundaries in the appropriate places in the -- original spelling. As an example, given syllabified respelling 'a.ma.7ín' and original spelling 'amain', we would -- like to produce 'a.ma.in'. -- -- If we encounter an extra syllable marker (.), we allow and keep it. If we encounter an extra accent marker in the -- syllabification, we drop it. We allow for mismatches in capitalization and for certain other mismatches, e.g. extra -- glottal stops (written 7), h in respelling vs. g or j in the original, etc. If we can't match, we return nil -- indicating the alignment failed. local function align_syllabification_to_spelling(syllab, spelling) local result = {} local function concat_result -- Postprocess to remove dots (syllable boundaries) next to hyphens. return (toNFC(table.concat(result)):gsub("%.%-", "-"):gsub("%-%.", "-")) end -- Remove glottal stop (7) from respelling to simplify the code below, because it's never found in the original -- spelling. (FIXME: We should do the same for diacritics, but they're currently removed earlier, in	-- syllabify_from_spelling. We should probably get rid of the removal there and put it here.) syllab = decompose(syllab):gsub("7", "") spelling = decompose(spelling) local syll_chars = rsplit(ulower(syllab), "") local spelling_chars = rsplit(spelling, "") local i = 1 local j = 1 local function matches(uci, ucj) -- Return true if a syllabified respelling character (uci) matches the corresponding spelling char (ucj). -- Both uci and ucj should be lowercase. return uci == ucj or			uci == "h" and (ucj == "g" or ucj == "j" or ucj == "x") or			uci == "j" and ucj == "g" or			uci == "y" and ucj == "i" or			uci == "w" and ucj == "u" end local function silent_spelling_letter(ucj) return ucj == "h" or ucj == "'" or ucj == "-" end local function syll_at(pos) return syll_chars[pos] or "" end local function spell_at(pos) return spelling_chars[pos] or "" end local function uspell_at(pos) local c = spelling_chars[pos] return c and ulower(c) or "" end while i <= #syll_chars or j <= #spelling_chars do		local uci = syll_at(i) local cj = spell_at(j) local ucj = uspell_at(j) if uci == "g" and syll_at(i - 1) == "n" and syll_at(i + 1) == "." and matches(syll_at(i + 2), ucj) and not matches(syll_at(i + 2), uspell_at(j + 1)) then -- As a special case, before checking whether the corresponding characters match, we have to skip an extra -- g in an -ng- sequence in the syllabified respelling if the corresponding spelling character matches the -- next respelling character (taking into account the syllable boundary). This is so that e.g. -- syll='ba.rang.gay' matches spelling='barangay'. Otherwise we will match the first respelling g against -- the spelling g and the second respelling g won't match. A similar case occurs with -- syll='E.vang.he.lis.ta' and spelling='Evangelista'. But we need an extra condition to not do this hack -- when syll='ba.rang.gay' matches spelling='baranggay'. i = i + 1 elseif matches(uci, ucj) then table.insert(result, cj) i = i + 1 j = j + 1 elseif ucj == uspell_at(j - 1) and uci == "." and ucj ~= syll_at(i + 1) then -- See below. We want to allow for a doubled letter in spelling that is pronounced single, and preserve the -- doubled letter. But it's tricky in the presence of syllable boundaries on both sides of the doubled -- letter as well as doubled letters pronounced double. Specifically, there are three possibilities, -- exemplified by: -- (1) syll='Mal.lig', spelling='Mallig' -> 'Mal.lig'; -- (2) syll='Ma.lig', spelling='Mallig' -> 'Ma.llig'; -- (3) syll='Wil.iam', spelling='William' -> 'Will.iam'. -- If we copy the dot first, we get (1) and (2) right but not (3). -- If we copy the double letter first, we get (2) and (3) right but not (1). -- We choose to copy the dot first except in the situation exemplified by (3), where we copy the doubled -- letter first. The condition above handles (3) (the doubled letter matches against a dot) while not -- interfering with (1) (where the doubled letter also matches against a dot but the next letter in the			-- syllabification is the same as the doubled letter, because the doubled letter is pronounced double). table.insert(result, cj) j = j + 1 elseif silent_spelling_letter(ucj) and uci == "." and ucj ~= syll_at(i + 1) and not rfind(uspell_at(j + 1), V) then -- See below for apostrophe in spelling. This condition is parallel to the one directly above -- for silent doubled letters in spelling and handles the case of syllab='Abduramán', spelling='Abdurahman', -- which should be syllabified 'Ab.du.rah.man'. But we need a check to see that the next spelling character -- isn't a vowel, because in that case we want the silent letter to go after the period, e.g. -- syllab='Jumu7á', spelling='Jumu'ah' -> 'Ju.mu.'ah' (the 7 is removed above). table.insert(result, cj) j = j + 1 elseif uci == "." then table.insert(result, uci) i = i + 1 elseif ucj == uspell_at(j - 1) then -- A doubled letter in spelling that is pronounced single. Examples: -- * syllab='Ab.dur.rah.man', spelling='Abdurrahman' -> 'Ab.du.rrah.man' (with r)			-- * syllab='a.sa.la.mu a.lai.kum', spelling='assalamu alaikum' -> 'as.sa.la.mu a.lai.kum' (with s)			-- * syllab='Tal.lo', spelling='Tallo' -> 'Ta.llo' (with ll) -- * syllab='Ha.sa.nu.din', spelling='Hasanuddin' -> 'Ha.sa.nu.din' (with b)			-- * syllab='Ka.ba', spelling='Kaaba' -> 'Kaa.ba' (with a)			table.insert(result, cj) j = j + 1 elseif silent_spelling_letter(ucj) then -- A silent h, apostrophe or hyphen in spelling. Examples: -- * syllab='Ramadān', spelling='Ramadhan' -> 'Ra.ma.dhan' table.insert(result, cj) j = j + 1 elseif uci == AC or uci == GR or uci == CFLEX or uci == DIA or uci == TILDE or uci == MACRON or			uci == "y" or uci == "w" then -- skip character i = i + 1 else -- non-matching character mw.log(("Syllabification alignment mismatch for pagename '%s' (position %s, character %s), syllabified respelling '%s' (position %s, character %s), aligned result so far '%s'" ):format(spelling, j, ucj, syllab, i, uci, concat_result)) return nil end end if i <= #syll_chars or j <= #spelling_chars then -- left-over characters on one side or the other mw.log(("Syllabification alignment mismatch for pagename '%s' (%s), syllabified respelling '%s' (%s), aligned result so far '%s'" ):format( spelling, j > #spelling_chars and "end of string" or ("position %s, character %s"):format(j, uspell_at(j)), syllab, i > #syll_chars and "end of string" or ("position %s, character %s"):format(i, syll_at(i)), concat_result)) return nil end return concat_result end

local function generate_syll_obj(term) return {syllabification = term, hyph = split_syllabified_spelling(term)} end

-- Word should already be decomposed. local function word_has_vowels(word) word = ulower(word) return rfind(word, V) or word:find("y") end

local function any_words_have_vowels(term) local words = rsplit(decompose(term), "[ %-]") for i, word in ipairs(words) do -- Allow empty word; this occurs with prefixes and suffixes. if word_has_vowels(word) then return true end end return false end

local function should_generate_rhyme_from_respelling(term) local words = rsplit(decompose(term), " +") local last_word = words[#words] local should_generate_cat = #words == 1 local should_generate_rhyme = not last_word:find("%-$") and -- no if word is a prefix not (last_word:find("^%-") and last_word:find(MACRON)) and -- no if word is an unstressed suffix word_has_vowels(last_word) -- no if word has no vowels (e.g. a single letter) return should_generate_rhyme, should_generate_cat end

local function should_generate_rhyme_from_ipa(ipa) local should_generate_cat = not ipa:find("%s") local should_generate_rhyme = word_has_vowels(decompose(ipa)) return should_generate_rhyme, should_generate_cat end

local function should_generate_rhyme_from_termobj(termobj) if termobj.raw then return should_generate_rhyme_from_ipa(termobj.raw_phonemic or termobj.raw_phonetic) else return should_generate_rhyme_from_respelling(termobj.term) end end

local function process_specified_rhymes(rhymes, sylls, parsed_respellings) local rhyme_ret = {} for _, rhyme in ipairs(rhymes) do		local num_syl = rhyme.num_syl local no_num_syl = false

-- If user explicitly gave the rhyme but didn't explicitly specify the number of syllables, try to take it from -- the syllabification. if not num_syl then num_syl = {} for _, syll in ipairs(sylls) do				if should_generate_rhyme_from_respelling(syll.syllabification) then local this_num_syl = 1 + ulen(rsub(syll.syllabification, "[^.]", "")) m_table.insertIfNot(num_syl, this_num_syl) else no_num_syl = true break end end if no_num_syl or #num_syl == 0 then num_syl = nil end end

-- If that fails and term is single-word, try to take it from the phonemic. if not no_num_syl and not num_syl then for _, parsed in ipairs(parsed_respellings) do				for _, pronun in ipairs(parsed.pronuns) do					-- Check that pronun.phonemic exists (it may not if raw phonetic-only pronun is given), and rhyme -- isn't suppressed (which may happen if the term has a qualifier "colloquial", "obsolete" or the					-- like or is an auto-generated "glottal stop elision" pronunciation). if pronun.phonemic and not pronun.no_rhyme then if not should_generate_rhyme_from_ipa(pronun.phonemic) then no_num_syl = true break end -- Count number of syllables by looking at syllable boundaries (including stress marks). local this_num_syl = get_num_syl_from_ipa(pronun.phonemic) m_table.insertIfNot(num_syl, this_num_syl) end end if no_num_syl then break end end if no_num_syl or #num_syl == 0 then num_syl = nil end end

local rhymeobj = m_table.shallowcopy(rhyme) rhymeobj.num_syl = num_syl table.insert(rhyme_ret, rhymeobj) end end

-- Parse a pronunciation modifier in `arg`, the argument portion in an inline modifier (after the prefix), which -- specifies a pronunciation property such as rhyme, syllabification, homophones or audio. The argument can itself have -- inline modifiers, e.g. >. The allowed inline modifiers are specified by `param_mods` (of -- the format expected by `parse_inline_modifiers`); in addition to any modifiers specified there, the modifiers -- , ,  and  are always accepted (and can be repeated). `generate_obj` and `parse_err` are -- like in `parse_inline_modifiers` and specify respectively a function to generate the object into which modifier -- properties are stored given the non-modifier part of the argument, and a function to generate an error message (given -- the message). Normally, a comma-separated list of pronunciation properties is accepted and parsed, where each element -- in the list can have its own inline modifiers and where no spaces are allowed next to the commas in order for them to -- be recognized as separators. If `no_split_on_comma` is given, only a single pronunciation property is accepted. If -- `has_outer_container` is given, the list of pronunciation properties is embedded in the `terms` property of an outer -- container, into which other list-level modifiers can also be stored (by setting `overall = "true"` in the respective -- spec in `param_mods`). The return value is a list if neither `no_split_on_comma` nor `has_outer_container` are given, -- otherwise a container object (which, in the case of `has_outer_container`, will contain a list inside of it, in the -- `terms` property). local function parse_pron_modifier(arg, parse_err, generate_obj, param_mods, no_split_on_comma, has_outer_container) if arg:find("<") then local insert = { store = "insert" } param_mods.q = insert param_mods.qq = insert param_mods.a = insert param_mods.aa = insert return require(put_module).parse_inline_modifiers(arg, {			param_mods = param_mods,			generate_obj = generate_obj,			parse_err = parse_err,			splitchar = not no_split_on_comma and "," or nil,			outer_container = has_outer_container and {} or nil,		}) elseif no_split_on_comma then return generate_obj(arg) else local retval = {} for _, term in ipairs(split_on_comma(arg)) do			table.insert(retval, generate_obj(term)) end if has_outer_container then retval = { terms = retval, }		end return retval end end

local function parse_rhyme(arg, parse_err) local function generate_obj(term) return {rhyme = term} end local param_mods = { s = { item_dest = "num_syl", convert = function(arg, parse_err) local nsyls = rsplit(arg, ",") for i, nsyl in ipairs(nsyls) do					if not nsyl:find("^[0-9]+$") then parse_err("Number of syllables '" .. nsyl .. "' should be numeric") end nsyls[i] = tonumber(nsyl) end return nsyls end, },	}

return parse_pron_modifier(arg, parse_err, generate_obj, param_mods) end

local function parse_syll(arg, parse_err) local param_mods = { cap = { overall = true}, }

-- We need to pass in has_outer_container because we have an overall property  (the caption, defaulting	-- to "Syllabification") applying to the whole set of syllabifications. return parse_pron_modifier(arg, parse_err, generate_syll_obj, param_mods, nil, "has outer container") end

local function parse_homophone(arg, parse_err) local function generate_obj(term) return {term = term} end local param_mods = { t = { -- We need to store the  inline modifier into the "gloss" key of the parsed term, -- because that is what Module:links (called from Module:homophones) expects. item_dest = "gloss", },		gloss = {}, pos = {}, alt = {}, lit = {}, id = {}, g = { -- We need to store the  inline modifier into the "genders" key of the parsed term, -- because that is what Module:links (called from Module:homophones) expects. item_dest = "genders", convert = function(arg) return rsplit(arg, ",") end, },	}

return parse_pron_modifier(arg, parse_err, generate_obj, param_mods) end

local function generate_audio_obj(arg) local file, gloss = arg:match("^(.-)%s*#%s*(.*)$") if not file then file = arg gloss = "Audio" end return {file = file, gloss = gloss} end

local function parse_audio(arg, parse_err) -- None other than qualifiers local param_mods = {}

-- Don't split on comma because some filenames have embedded commas not followed by a space (typically followed by	-- an underscore). return parse_pron_modifier(arg, parse_err, generate_audio_obj, param_mods, "no split on comma") end

return export