Module:Tibt-common

local export = {}

local m_str_utils = require("Module:string utilities")

local find = m_str_utils.find local gmatch = m_str_utils.gmatch local gsub = m_str_utils.gsub local match = m_str_utils.match local sub = m_str_utils.sub local toNFC = mw.ustring.toNFC local u = m_str_utils.char

-- Finds the main stack of a given syllable, which allows all other components to be determined (and is the basis for sorting and transliteration). Currently defaults to Classical Tibetan, but if a ruleset for a specific language exists, it will use that instead. Once the main stack has been located, the process of sorting and transliteration is the same. Because of this, Module:Tibt-sortkey and Module:Tibt-translit only need to be pointed at this common function. -- Uses a (somewhat expanded) implementation of the algorithm found in "Algorithmic description of the decomposition and checking of a Classical Tibetan syllable" by Roux, Hildt & Drupchen: https://escholarship.org/uc/item/70z8069f function export.findMainStack(text, langCode) -- If a language-specific module exists, use the ruleset in that. If not, fall back on the Tibetan module Module:bo-common. local langModuleCheck, langModule = pcall(function langModule = require("Module:" .. langCode .. "-common") return langModule end) if not langModuleCheck then langModule = require("Module:bo-common") end local sc = require("Module:scripts").getByCode("Tibt") text = sc:fixDiscouragedSequences(text) text = sc:toFixedNFC(text) local origText, mainStack = text -- If halantas are present, the the input must be modified so as to treat the parent consonant + any that follow as a pseudo-stack before being processed by the rules. The locations are then stored, so that the pseudo-stack can be converted back again if it is found to be the main stack. local halantaSubs, halantas = {}, {} if match(text, "྄") and match(text, "[^྄]$") then halantaSubs = { {"྄ཀ", "ྐ"}, {"྄ཁ", "ྑ"}, {"྄ག", "ྒ"}, {"྄ང", "ྔ"}, {"྄ཅ", "ྕ"}, {"྄ཆ", "ྖ"}, {"྄ཇ", "ྗ"}, {"྄ཉ", "ྙ"}, {"྄ཊ", "ྚ"}, {"྄ཋ", "ྛ"}, {"྄ཌ", "ྜ"}, {"྄ཎ", "ྞ"}, {"྄ཏ", "ྟ"}, {"྄ཐ", "ྠ"}, {"྄ད", "ྡ"}, {"྄ན", "ྣ"}, {"྄པ", "ྤ"}, {"྄ཕ", "ྥ"}, {"྄བ", "ྦ"}, {"྄མ", "ྨ"}, {"྄ཙ", "ྩ"}, {"྄ཚ", "ྪ"}, {"྄ཛ", "ྫ"}, {"྄ཝ", "ྭ"}, {"྄ཞ", "ྮ"}, {"྄ཟ", "ྯ"}, {"྄འ", "ྰ"}, {"྄ཡ", "ྱ"}, {"྄ར", "ྲ"}, {"྄ལ", "ླ"}, {"྄ཤ", "ྴ"}, {"྄ཥ", "ྵ"}, {"྄ས", "ྶ"}, {"྄ཧ", "ྷ"}, {"྄ཨ", "ྸ"}, {"྄ཪ", "ྼ"} }		local convHalantas = {} for _, halantaSub in pairs(halantaSubs) do			convHalantas[halantaSub[1]] = halantaSub[2] end for halanta in gmatch(text, "྄.") do halantaSub = u(0xF000) .. (gsub(halanta, ".*", convHalantas)) text = gsub(text, halanta, halantaSub, 1) table.insert(halantas, find(text, u(0xF000))) text = gsub(text, u(0xF000), "") end halantas = require("Module:table").compressSparseArray(halantas) end local function err return error("Invalid syllable " .. toNFC(origText) .. ".") end text = langModule.preconvert(text) for _, check in pairs(langModule.mainStackChecks(text)) do		if check then mainStack = check if match(origText, "྄") then local convHalantas = {} for _, halantaSub in pairs(halantaSubs) do					convHalantas[halantaSub[2]] = halantaSub[1] end local offset = find(text, mainStack) for i, halanta in ipairs(halantas) do					mainStack = gsub(mainStack, sub(mainStack, (halanta-offset)+i, (halanta-offset)+i), convHalantas, 1) end end return mainStack end end -- If ambiguous, return the most likely stack, along with a second value (true) so that this can be taken into account. for syllable, mainStack in pairs(langModule.ambiguousSyllables) do if match(text, "^" .. syllable .. "$") then return langModule.postconvert(mainStack), true end end return err end

function export.getWords(text) return gmatch(text, "[ༀ་-༒" .. u(0xF35) .. u(0xF37) .. u(0xF39) .. "-ྼ]+") end

function export.getSyllables(text) return gmatch(text, "[ༀ" .. u(0xF35) .. u(0xF37) .. u(0xF39) .. "-ྼ]+") end

return export