Module:languages/doSubstitutions

local m_str_utils = require("Module:string utilities")

local codepoint = m_str_utils.codepoint local gsub = m_str_utils.gsub local safe_require = require("Module:utilities").safe_require local u = m_str_utils.char

local function doRemoveExceptions(text, sc, remove_exceptions) local substitutes, i = {}, 0 local function insert_substitute(m) i = i + 1 table.insert(substitutes, m)		return u(0x80000 + i)	end for _, exception in ipairs(remove_exceptions) do		exception = sc:toFixedNFD(exception) text = gsub(text, exception, insert_substitute) end return text, substitutes end

local function undoRemoveExceptions(text, substitutes) return text:gsub("\242[\128-\191]*", function(m)		return substitutes[codepoint(m) - 0x80000]	end) end

local function doSubstitutions(text, self, sc, substitution_data, function_name, recursed) local fail, cats = nil, {} -- If there are language-specific substitutes given in the data module, use those. if type(substitution_data) == "table" then -- If a script is specified, run this function with the script-specific data before continuing. local sc_code = sc:getCode if substitution_data[sc_code] then text, fail, cats = doSubstitutions(text, self, sc, substitution_data[sc_code], function_name, true) -- Hant, Hans and Hani are usually treated the same, so add a special case to avoid having to specify each one separately. elseif sc_code:match("^Han") and substitution_data.Hani then text, fail, cats = doSubstitutions(text, self, sc, substitution_data.Hani, function_name, true) -- Substitution data with key 1 in the outer table may be given as a fallback. elseif substitution_data[1] then text, fail, cats = doSubstitutions(text, self, sc, substitution_data[1], function_name, true) end -- Iterate over all strings in the "from" subtable, and gsub with the corresponding string in "to". We work with the NFD decomposed forms, as this simplifies many substitutions. if substitution_data.from then for i, from in ipairs(substitution_data.from) do -- We normalize each loop, to ensure multi-stage substitutions work correctly. text = sc:toFixedNFD(text) -- Check whether specific magic characters are present, as they rely on UTF-8 compatibility. If not, just use string.gsub. In most cases, doing this is faster than using mw.ustring.gsub every time. text = gsub(text, sc:toFixedNFD(from), substitution_data.to[i] or "") end end if substitution_data.remove_diacritics then text = sc:toFixedNFD(text) -- Convert exceptions to PUA. local substitutes if substitution_data.remove_exceptions then text, substitutes = doRemoveExceptions(text, sc, substitution_data.remove_exceptions) end -- Strip diacritics. text = gsub(text, "[" .. substitution_data.remove_diacritics .. "]", "") -- Convert exceptions back. if substitution_data.remove_exceptions then text = undoRemoveExceptions(text, substitutes) end end elseif type(substitution_data) == "string" then -- If there is a dedicated function module, use that. local module = safe_require("Module:" .. substitution_data) if module then if function_name == "tr" then text, fail, cats = module[function_name](text, self:getCode, sc:getCode) else text, fail, cats = module[function_name](sc:toFixedNFD(text), self:getCode, sc:getCode) end else error("Substitution data '" .. substitution_data .. "' does not match an existing module.") end end -- Don't normalize to NFC if this is the inner loop or if a module returned nil. if recursed or not text then return text, fail, cats else -- Fix any discouraged sequences created during the substitution process, and normalize into the final form. text = sc:fixDiscouragedSequences(text) return sc:toFixedNFC(text), fail, cats end end

-- This avoids calling into globals with require when the main function recurses. return function (text, self, sc, substitution_data, function_name) return doSubstitutions(text, self, sc, substitution_data, function_name) end