Module:Tibt-sortkey

local export = {}

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub local len = m_str_utils.len local match = m_str_utils.match local sub = m_str_utils.sub local toNFC = mw.ustring.toNFC local u = m_str_utils.char

local Tibt = require("Module:Tibt-common") local a, b, c, d = u(0xE000), u(0xE001), u(0xE002), u(0xE003)

local letters = { {"ཀ", "ྐ"}, {"ཀ༹", "ྐ༹", ""}, {"ཫ", "ཫ"}, {"ཫ༹", "ཫ༹", ""}, {"ཁ", "ྑ"}, {"ཁ༹", "ྑ༹", ""}, {"ག", "ྒ"}, {"ག༹", "ྒ༹", ""}, {"ང", "ྔ"}, {"ང༹", "ྔ༹", ""}, {"ཅ", "ྕ"}, {"ཆ", "ྖ"}, {"ཇ", "ྗ"}, {"ཉ", "ྙ"}, {"ཉ༹", "ྙ༹", ""}, {"ཊ", "ྚ"}, {"ཊ༹", "ྚ༹", ""}, {"ཋ", "ྛ"}, {"ཋ༹", "ྛ༹", ""}, {"ཌ", "ྜ"}, {"ཌ༹", "ྜ༹", ""}, {"ཎ", "ྞ"}, {"ཎ༹", "ྞ༹", ""}, {"ཏ", "ྟ"}, {"ཏ༹", "ྟ༹", ""}, {"ཐ", "ྠ"}, {"ཐ༹", "ྠ༹", ""}, {"ད", "ྡ"}, {"ད༹", "ྡ༹", ""}, {"ན", "ྣ"}, {"ན༹", "ྣ༹", ""}, {"པ", "ྤ"}, {"པ༹", "ྤ༹", ""}, {"ཕ", "ྥ"}, {"ཕ༹", "ྥ༹", ""}, {"བ", "ྦ"}, {"བ༹", "ྦ༹", ""}, {"མ", "ྨ"}, {"མ༹", "ྨ༹", ""}, {"ཙ", "ྩ"}, {"ཚ", "ྪ"}, {"ཛ", "ྫ"}, {"ཝ", "ྭ"}, {"ཝ༹", "ྭ༹", ""}, {"ཞ", "ྮ"}, {"ཞ༹", "ྮ༹", ""}, {"ཟ", "ྯ"}, {"ཟ༹", "ྯ༹", ""}, {"འ", "ྰ"}, {"འ༹", "ྰ༹", ""}, {"ཡ", "ྱ"}, {"ཡ༹", "ྱ༹", ""}, {"ར", "ྲ"}, {"ར༹", "ྲ༹", ""}, {"ཬ", "ཬ"}, {"ཬ༹", "ཬ༹", ""}, {"ལ", "ླ"}, {"ལ༹", "ླ༹", ""}, {"ཤ", "ྴ"}, {"ཤ༹", "ྴ༹", ""}, {"ཥ", "ྵ"}, {"ཥ༹", "ྵ༹", ""}, {"ས", "ྶ"}, {"ས༹", "ྶ༹", ""}, {"ཧ", "ྷ"}, {"ཧ༹", "ྷ༹", ""}, {"ཨ", "ྸ"}, {"ཨ༹", "ྸ༹", ""}, {"ཱ", "ཱ"}, {"ི", "ི"}, {u(0xF73), "ཱི"}, {"ུ", "ུ"}, {u(0xF75), "ཱུ"}, {u(0xF76), "ྲྀ"}, {u(0xF77), "ྲཱྀ"}, {u(0xF78), "ླྀ"}, {u(0xF79), "ླཱྀ"}, {"ེ", "ེ"}, {"ཻ", "ཻ"}, {"ོ", "ོ"}, {"ཽ", "ཽ"} }

local function findAffixes(text, mainStack) return (gsub(text, "(.*)" .. mainStack .. ".*", "%1")), (gsub(text, ".*" .. mainStack .. "(.*)", "%1")) end

local function findVowel(mainStack) return (gsub(mainStack, "[ཱ-ཽྀ]+", "")), match(mainStack, "[ཱ-ཽྀ]+") or "" end

local function mainStackParts(mainStack) local superjoined = match(mainStack, "(ར)[ྐྒྔྗྙྟྡྣྦྨྩྫ]") or match(mainStack, "(ལ)[ྐྒྔྕྗྟྡྤྦྷ]") or match(mainStack, "(ས)[ྐྒྔྙྟྡྣྤྦྨྩ]") or "" if (superjoined == "ར" and match(mainStack, "ར[^ྐྒྨ]ྱ")) or (superjoined == "ས" and (match(mainStack, "ས[^ྐྒྤྦྨ]ྱ") or match(mainStack, "ས[^ྐྒྣྤྦྨ]ྲ"))) then superjoined = "" end local radical = match(mainStack, "^" .. superjoined .. "(.)") local subjoined = match(mainStack, "^" .. superjoined .. radical .. "(.*)") for _, letter in ipairs(letters) do		radical = gsub(radical, letter[2], letter[1]) end return superjoined, radical, subjoined end

local function sortRadical(radical) for _, letter in ipairs(letters) do		if letter[3] then radical = gsub(radical, letter[3], letter[1]) end end radical = gsub(radical, "༹", b)	local radicalSubs = { ["ཫ" .. b] = "ཀ" .. d, ["ཬ" .. b] = "ར" .. d	} local radicalSubs2 = { ["ཫ"] = "ཀ" .. c, ["ཬ"] = "ར" .. c	} for char, replacement in pairs(radicalSubs) do		radical = gsub(radical, char, replacement) end radical = gsub(radical, ".", radicalSubs2) return (gsub(radical, "([^" .. b .. "-" .. d .. "])$", "%1" .. a)) end

-- Convert into base-6724 to reduce length. function baseConvert(value) if #value%2 ~= 0 then table.insert(value, 1, 0) end local newValue = {} for i = 1, #value/2 do		newValue[i] = u(0x4E00+(value[(i*2)-1]*(#letters+1))+value[i*2]) end return table.concat(newValue) end

local function sortValue(part, partType) local length if partType == "superjoined" or partType == "prefix" or partType == "vowel" then length = 1 elseif partType == "subjoined" then length = 9 elseif partType == "suffix" then length = 6 end local partLetters = {} for i = 1, length do		if len(part) >= i then table.insert(partLetters, sub(part, i, i)) else table.insert(partLetters, "") end end for i, partLetter in ipairs(partLetters) do		for j, letter in ipairs(letters) do			if partLetter == letter[1] or partLetter == letter[2] or partLetter == letter[3] then partLetters[i] = j			end end if match(tostring(partLetters[i]), "[^0-9]") or partLetters[i] == "" then partLetters[i] = 0 end end return partLetters end

function export.makeSortKey(text, lang, sc) local langObj if not lang then error("Language code required.") else langObj = require("Module:languages").getByCode(lang) end if not sc then sc = langObj:findBestScript(text):getCode end if sc ~= "Tibt" then return text end text = (langObj:makeEntryName(text)) local initSubs = { ["ཪ"] = "ར", ["ྺ"] = "ྭ", ["ྻ"] = "ྱ", ["ྼ"] = "ྲ" }	text = gsub(text, ".", initSubs) local syllables = {} local sort, prefix, mainStack, superjoined, radical, subjoined, vowel, suffix for word in Tibt.getWords(text) do		for syllable in Tibt.getSyllables(word) do			mainStack = Tibt.findMainStack(syllable, lang) for _, letter in ipairs(letters) do				if letter[3] then syllable = gsub(syllable, letter[1], letter[3]) syllable = gsub(syllable, letter[2], letter[3]) mainStack = gsub(mainStack, letter[1], letter[3]) mainStack = gsub(mainStack, letter[2], letter[3]) end end for i = 42, #letters do				syllable = gsub(syllable, letters[i][2], letters[i][1]) mainStack = gsub(mainStack, letters[i][2], letters[i][1]) end prefix, suffix = findAffixes(syllable, mainStack) mainStack, vowel = findVowel(mainStack) superjoined, radical, subjoined = mainStackParts(mainStack) local set1 = {table.concat(sortValue(superjoined, "superjoined")), table.concat(sortValue(prefix, "prefix"))} local set2 = sortValue(subjoined, "subjoined") table.insert(set2, table.concat(sortValue(vowel, "vowel"))) local set3 = sortValue(suffix, "suffix") sort = sortRadical(radical) .. baseConvert(set1) .. baseConvert(set2) .. baseConvert(set3) table.insert(syllables, sort) end end text = table.concat(syllables) if match(text, ".[་༌]") or match(text, "[་༌].") then text = gsub(text, "[་༌]", "") end return toNFC(text) end

local bo = require("Module:languages").getByCode("bo") local function tag(text) return require("Module:script utilities").tag_text(text, bo) end

function export.showSorting(frame) local terms = {} for _, term in ipairs(frame.args) do		table.insert(terms, term) end local makeSortKey = require("Module:fun").memoize(export.makeSortKey) local function comp(term1, term2) return makeSortKey(term1, "bo", "Tibt") < makeSortKey(term2, "bo", "Tibt") end table.sort(terms, comp) for i, term in pairs(terms) do		local sc = require("Module:scripts").getByCode("Tibt") local sortkey = export.makeSortKey(term, "bo", sc) terms[i] = "\n* " .. tag(term) end return table.concat(terms) end

return export