Module:za-sortkey

local export = {} local u = require("Module:string/char") local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" local a, b, c, d, e, f, g = u(0xF000), u(0xF001), u(0xF002), u(0xF003) local b2 = u(0xF100) local g2, g3 = u(0xF200), u(0xF201) local m2, m4 = u(0xF300), u(0xF301) local n2, n4, n6, n7, n8 = u(0xF400), u(0xF401), u(0xF402), u(0xF403), u(0xF404)

local remove_diacritics = "'" -- apostrophe

local oneCharInit = { ["z"] = "2", ["ƨ"] = "2!", ["j"] = "3", ["з"] = "3!", ["x"] = "4", ["ч"] = "4!", ["q"] = "5", ["ƽ"] = "5!", ["ƅ"] = "6!" }

local twoCharsInit = { ["by"] = b2, ["gv"] = g2, ["gy"] = g3, ["mb"] = m2, ["my"] = m4, ["nd"] = n2, ["ng"] = n4, ["ŋv"] = n7, ["ny"] = n8 }

local threeCharsInit = { ["ngv"] = n6 }

local conditionalTones1 = { ["h"] = "6" }

local conditionalTones2 = { ["m"] = "m1", ["n"] = "n1", [n4] = n4 .. "1", ["ŋ"] = "ŋ1", ["k"] = "k7", ["p"] = "p7", ["t"] = "t7", ["b"] = "b8", ["d"] = "d8", ["g"] = "g8" }

local oneCharFinal = { ["ə"] = "a" .. a .. "!", [b2] = "b" .. a, [g2] = "g" .. a, [g3] = "g" .. b, [m2] = "m" .. a, ["ƃ"] = "m" .. a .. "!", [m4] = "m" .. b, [n2] = "n" .. a, ["ƌ"] = "n" .. a .. "!", [n4] = "n" .. b, ["ŋ"] = "n" .. b .. "!", [n6] = "n" .. c, [n7] = "n" .. c .. "!", [n8] = "n" .. d, ["ɵ"] = "o" .. a .. "!", ["ɯ"] = "w!" }

local twoCharsFinal = { ["ae"] = "a" .. a, ["oe"] = "o" .. a }

function export.makeSortKey(text, lang, sc) local origText = text text = mw.ustring.lower(text) -- convert any consonant clusters to single characters, which is necessary for later regexes, and unconditional tone letters to numbers for from, to in pairs(threeCharsInit) do		text = text:gsub(from, to) end for from, to in pairs(twoCharsInit) do		text = text:gsub(from, to) end text = text:gsub(UTF8_char, oneCharInit) -- conditionally convert any conditional tone letters to numbers (e.g. "h" can be a consonant or a tone letter) for from, to in pairs(conditionalTones1) do text = text:gsub(from .. "$", to) text = mw.ustring.gsub(text, from .. "([^1-8aeiouwəɵɯ])", to .. "%1") end -- conditionally add a tone number to any syllable-final consonants which do not have them for from, to in pairs(conditionalTones2) do text = text:gsub(from .. "$", to) text = mw.ustring.gsub(text, from .. "([^1-8aeiouwəɵɯ])", to .. "%1") end -- conditionally add a tone number to any syllable-final vowels which do not have them text = mw.ustring.gsub(text, "([^1-8%s%p])$", "%11") text = mw.ustring.gsub(text, "([1-8][" .. a .. "-" .. d .. "])1$", "%1")	text = mw.ustring.gsub(text, "([aeiouwəɵɯ])([^1-8aeiouwəɵɯ][^1-8])", "%11%2") -- convert clusters and non-ASCII characters to final form, to achieve correct order for from, to in pairs(twoCharsFinal) do		text = text:gsub(from, to) end text = text:gsub(UTF8_char, oneCharFinal) -- move "!" to the end and remove any duplicates, to ensure old orthography terms are sorted immediately after their new equivalents for old in text:gmatch("!") do text = text:gsub("(!)(.+)", "%2%1") end text = text:gsub("!+", "!") -- if tone 5 is substituted for tone 1 in pronunciation, also substitute in sortkey (i.e. as though "q" were written) local page = mw.title.new(origText):getContent or "" if mw.ustring.match(page, "") or mw.ustring.match(page, "") then text = mw.ustring.gsub(text, "1", "5") -- if the page has the old orthography template, then check the modern orthography page and substitute if present there (i.e. as though "ƽ" were written) elseif mw.ustring.match(page, "") then local parentPage = mw.title.new(mw.ustring.match(page, "")):getContent or "" if mw.ustring.match(parentPage, "") or mw.ustring.match(parentPage, "") then text = mw.ustring.gsub(text, "1", "5" .. a)		end elseif mw.ustring.match(page, "") then local parentPage = mw.title.new(mw.ustring.match(page, "")):getContent or "" if mw.ustring.match(parentPage, "") or mw.ustring.match(parentPage, "") then text = mw.ustring.gsub(text, "1", "5" .. a)		end end -- decompose, remove appropriate diacritics, then recompose again return mw.ustring.upper(mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(text), "[" .. remove_diacritics .. "]", ""))) end

local za = require("Module:languages").getByCode("za") local function tag(text) return require("Module:script utilities").tag_text(text, za) end

local showsubst1 = { ["0"] = "⁰", ["1"] = "¹", ["2"] = "²", ["3"] = "³", ["4"] = "⁴", ["5"] = "⁵", ["6"] = "⁶", ["7"] = "⁷", ["8"] = "⁸" }

local showsubst2 = { ["2!"] = "²ᵃ", ["3!"] = "³ᵃ", ["4!"] = "⁴ᵃ", ["5!"] = "⁵ᵃ", ["6!"] = "⁶ᵃ", ["A" .. a] = "A₂", ["A" .. a .. "!"] = "A₂ₐ", ["B" .. a] = "B₂", ["G" .. a] = "G₂", ["G" .. b] = "G₃", ["M" .. a] = "M₂", ["M" .. a .. "!"] = "M₂ₐ", ["M" .. b] = "M₃", ["N" .. a] = "N₂", ["N" .. a .. "!"] = "N₂ₐ", ["N" .. b] = "N₃", ["N" .. b .. "!"] = "N₃ₐ", ["N" .. c] = "N₄", ["N" .. c .. "!"] = "N₄ₐ", ["N" .. d] = "N₅", ["O" .. a] = "O₂", ["O" .. a .. "!"] = "O₂ₐ", ["W!"] = "Wₐ" }

function export.showSortkey(frame) local output = {} for _, word in ipairs(frame.args) do		local sc = za:findBestScript(word):getCode local sortkey = export.makeSortKey(word, "za", sc) for from, to in pairs(showsubst2) do			sortkey = mw.ustring.gsub(sortkey, from, to) end for from, to in pairs(showsubst1) do			sortkey = mw.ustring.gsub(sortkey, from, to) end local example = "\n* \n: " .. tag(word) table.insert(output, example) end return table.concat(output) end

function export.showSorting(frame) local terms = {} for _, term in ipairs(frame.args) do		table.insert(terms, term) end local makeSortKey = require("Module:fun").memoize(export.makeSortKey) local function comp(term1, term2) return makeSortKey(term1) < makeSortKey(term2) end table.sort(terms, comp) for i, term in pairs(terms) do		local sc = za:findBestScript(term):getCode local sortkey = export.makeSortKey(term, "za", sc) for from, to in pairs(showsubst2) do			sortkey = mw.ustring.gsub(sortkey, from, to) end for from, to in pairs(showsubst1) do			sortkey = mw.ustring.gsub(sortkey, from, to) end terms[i] = "\n* " .. tag(term) .. " "	end return table.concat(terms) end

return export