Module:amf-utilities

local export = {}

-- internal encoding using [a-zA-Z] export.encode = { ["ɓ"] = "B", ["cʼ"] = "C", ["ɗ"] = "D", ["ɛ"] = "E", ["ɠ"] = "G", ["ɲ"] = "N", ["ɔ"] = "O", ["sh"] = "S", ["tʼ"] = "T", ["ʔ"] = "Q", } export.decode = { B = "ɓ", C = "cʼ", D = "ɗ", E = "ɛ", G = "ɠ", N = "ɲ", O = "ɔ", S = "sh", T = "tʼ", Q = "ʔ", } export.sortkey = { B = "b", C = "c", D = "d", E = "e", G = "g", N = "n", O = "o", S = "sh", T = "t", Q = "ʔ", }

-- parse a word into syllables using the internal encoding -- returns a table with extra info, e.g. "Wucʼê" becomes: -- { "wu", "Ce", accent=2, cap=true, falling=true } function export.syllabify(word) word = mw.ustring.toNFD(word) local lowered = word:ulower local cap = lowered ~= word word = lowered local accent = word:match('\204[\129\130]') local _, count = word:gsub('\204[\129\130]','') if count > 1 then error("More than one diacritic found.") end word = word:gsub("[tc]ʼ",export.encode) if word:match("ʼ") then error("Uncoupled ʼ found.") end word = word:gsub("sh","S") word = word:gsub("[\194-\223][\128-\191]",export.encode) word = word:gsub("[bBcCdDgGhjklmnNpqrsStwxyzQ][aeiouEO]",".%0") :gsub("([aeiouEO])([aeiouEO]\204[\129\130])","%1.%2") -- e.g. tiá -> ti.á :gsub("^%.","") :gsub("%.%.+",".") local syllables = mw.text.split(word,".",true) local accented = 0 for i,syl in ipairs(syllables) do		syllables[i],count = syl:gsub("\204[\129\130]","") if count == 1 then accented = i			break end end syllables.accent = accented syllables.cap = cap syllables.falling = accent == "\204\130" return syllables end

-- inverse of export.syllabify function export.combine(syllables) local a,c,f = syllables.accent, syllables.cap, syllables.falling local diacritic = f and "\204\130" or "\204\129" local word = "" -- do not use table.concat to avoid modifying input for i,syl in ipairs(syllables) do		if i == a then syl = syl:gsub("[aeiouEO]","%0"..diacritic,1) end word = word .. syl end word = word:gsub("[BCDEGNOSTQ]",export.decode) if c then word = word:gsub("^[\1-\127\194-\255][\128-\191]*",string.uupper,1) end return mw.ustring.toNFC(word) end

-- generates the sort key for categorization -- wucʼê --> wuce2' -- (2: accent on second syllable) -- (apostrophe at the end: falling tone) function export.makeSortKey(text, lang, sc) if lang ~= "amf" or sc ~= "Latn" then require("Module:debug").track("amf-utilities/sort") return text end words = mw.text.split(text, " ", true) for i,word in ipairs(words) do		local success, syllables = pcall(export.syllabify,word) if success then words[i] = table.concat(syllables):gsub("[BCDEGNOSTQ]",export.sortkey) .. syllables.accent .. (syllables.falling and "'" or "") else require("Module:debug").track("amf-utilities/sort") end end return table.concat(words, " ") end

return export