Module:User:Theknightwho/string utilities

local byte = string.byte local char = string.char local concat = table.concat local find = string.find local format = string.format local gmatch = string.gmatch local gsub = string.gsub local lower = string.lower local match = string.match local select = select local sub = string.sub local tonumber = tonumber local tostring = tostring local type = type local ucodepoint = mw.ustring.codepoint local ufind = mw.ustring.find local ugmatch = mw.ustring.gmatch local ugsub = mw.ustring.gsub local ulower = mw.ustring.lower local umatch = mw.ustring.match local unpack = unpack local upper = string.upper local usub = mw.ustring.sub local uupper = mw.ustring.upper

local codepoint local decode_entities local format_fun local get_indefinite_article local len local pattern_escape local pattern_simplifier local php_trim local replacement_escape local split local u

local module_name = "string_utilities"

local export = {}

--[==[Explodes a string into an array of UTF8 characters. Warning: this function has no safety checks for non-UTF8 byte sequences, to optimize speed and memory use. Inputs containing them therefore result in undefined behaviour.]==] function export.explode_utf8(str) local text, i = {}, 0 for ch in gmatch(str, "[%z\1-\127\194-\244][\128-\191]*") do		i = i + 1 text[i] = ch	end return text end

--[==[Escapes the magic characters used in patterns (Lua's version of regular expressions). For example, lua becomes lua. This is necessary when constructing a pattern involving arbitrary text (e.g. from user input).]==] function export.pattern_escape(str) return (gsub(str, "[$%%*+%-.?[%]^]", "%%%0")) end pattern_escape = export.pattern_escape

--[==[Like pattern_escape, but escapes the replacement text used by string.gsub and mw.ustring.gsub.]==] function export.replacement_escape(str) return (gsub(str, "%%", "%%%%")) end replacement_escape = export.replacement_escape

do local memo = {} local function memoize(input, result) memo[input] = result return result end -- If the preceding set has an optional trail, then it's not safe for it to be followed by an optional multibyte character. -- e.g. "[%s\194]\160?" cannot be followed by "[%c\194]?[\128-\159]?", since "[%s\194]" and "[\128-\159]"? should not be able to pair. local function no_optional_multibyte_char(output, n)		return output and match(sub(output[n], -3), "[\128-\191]%]%?") end function export.pattern_simplifier(pattern) local memoized = memo[pattern] if memoized ~= nil then return memoized end local input, pos, captures, start, n, output = pattern, 1, 0, 1, 0 while true do			local ch, nxt_pos pos, ch, nxt_pos = match(pattern, "([%%(.%[[\194-\244][\128-\191]*)", pos)			if not ch then				break			end			local nxt = sub(pattern, nxt_pos, nxt_pos)			if ch == "%" then				if nxt == "b" then					if match(sub(pattern, pos + 2, pos + 3), "[\194-\244]") then						return memoize(input, false)					end					pos = pos + 4				elseif nxt == "c" then					pos = pos + 2					nxt = sub(pattern, pos, pos)					if nxt == "*" or nxt == "+" or nxt == "-" then						return memoize(input, false)					end					output = output or {}					if nxt == "?" then						if no_optional_multibyte_char(output, n) then							return memoize(input, false)						end						n = n + 1						output[n] = sub(pattern, start, pos - 3) .. "[%c\194]?[\128-\159]?"						pos = pos + 1					else						n = n + 1						output[n] = sub(pattern, start, pos - 3) .. "[%c\194][\128-\159]?"					end					start = pos				elseif nxt == "Z" then					pos = pos + 2					nxt = sub(pattern, pos, pos)					if nxt == "*" or nxt == "+" or nxt == "-" then -- "%Z*, %Z+ and %Z- are the same						pos = pos + 1					else						output = output or {}						if nxt == "?" then							if no_optional_multibyte_char(output, n) then								return memoize(input, false)							end							n = n + 1							output[n] = sub(pattern, start, pos - 3) .. "[\1-\127\194-\244]?[\128-\191]*"							pos = pos + 1						else							n = n + 1							output[n] = sub(pattern, start, pos - 3) .. "[\1-\127\194-\244][\128-\191]*"						end						start = pos					end				elseif find("adlpsuwxACDLPSUWX", nxt, 1, true) then -- %z is the same					return memoize(input, false)				else					pos = pos + 2				end			elseif ch == "(" then				if nxt == ")" or captures == 32 then					return memoize(input, false)				end				captures = captures + 1				pos = pos + 1			elseif ch == "." then				if nxt == "*" or nxt == "+" or nxt == "-" then -- .*, .+ and .- are the same					pos = pos + 2				else					output = output or {}					if nxt == "?" then						if no_optional_multibyte_char(output, n) then							return memoize(input, false)						end						n = n + 1						output[n] = sub(pattern, start, pos - 1) .. "[%z\1-\127\194-\244]?[\128-\191]*"						pos = pos + 2					else						n = n + 1						output[n] = sub(pattern, start, pos - 1) .. "[%z\1-\127\194-\244][\128-\191]*"						pos = pos + 1					end					start = pos				end			elseif ch == "[" then				if nxt == "^" then					return memoize(input, false)				end				pos = pos + 1				local ch_len = #match(pattern, "[%z\1-\127\194-\244][\128-\191]*", pos)				while true do					pos, ch, nxt_pos = match(pattern, "([%z\1-\127\194-\244][\128-\191]*)", pos)					if ch == "%" then						local nxt = sub(pattern, nxt_pos, nxt_pos)						if nxt == "%" or nxt == "]" then							if ch_len ~= 1 then								return memoize(input, false)							end							pos = pos + 2						elseif find("acdlpsuwxACDLPSUWXZ", nxt, 1, true) then							return memoize(input, false)						else							pos = nxt_pos						end					elseif ch == "]" then						pos = pos + 1						break					elseif not ch or #ch ~= ch_len then						return memoize(input, false)					else						pos = nxt_pos					end				end			elseif nxt == "+" then				if #ch ~= 2 then					return memoize(input, false)				end				output = output or {}				n = n + 1				output[n] = sub(pattern, start, pos) .. "[" .. ch .. "]*" .. sub(ch, 2, 2)				pos = nxt_pos + 1				start = pos			elseif nxt == "?" or nxt == "*" or nxt == "-" then				return memoize(input, false)			else				pos = nxt_pos			end		end		if start == 1 then			return memoize(input, pattern)		end		n = n + 1		output[n] = sub(pattern, start)		return memoize(input, concat(output))	end	pattern_simplifier = export.pattern_simplifier end

function export.len(str) str = tostring(str) return #str - #gsub(str, "[^\128-\191]+", "") end len = export.len

function export.sub(str, i, j)	str = gsub(tostring(str), "[%z\1-\127\194-\244][\128-\191]*", "", i - 1) return sub(str, 1, find(str, gsub(str, "[%z\1-\127\194-\244][\128-\191]*", "", j - i + 1), 1, true) - 1) end

do local function _find(str, loc1, loc2, ...) if loc1 and match(str, "[\194-\244]") then -- Use raw values of loc1 and loc2 to get loc1 and the length of the match. loc1, loc2 = len(sub(str, 1, loc1)), len(sub(str, loc1, loc2)) -- Offset length with loc1 to get loc2. loc2 = loc1 + loc2 - 1 end return loc1, loc2, ... end --[==[A version of find which uses string.find when possible, but otherwise uses mw.ustring.find.]==] function export.find(str, pattern, init, plain) str, init = tostring(str), init or 1 if init ~= 1 and match(str, "[\194-\244]") then return ufind(str, pattern, init, plain) elseif plain then return _find(str, find(str, pattern, init, true)) end local simple = pattern_simplifier(pattern) if simple then return _find(str, find(str, simple, init)) end return ufind(str, pattern, init) end end

--[==[A version of match which uses string.match when possible, but otherwise uses mw.ustring.match.]==] function export.match(str, pattern, init) str, init = tostring(str), init or 1 if init ~= 1 and match(str, "[\194-\244]") then return umatch(str, pattern, init) end local simple = pattern_simplifier(pattern) if simple then return match(str, simple, init) end return umatch(str, pattern, init) end

--[==[A version of gmatch which uses string.gmatch when possible, but otherwise uses mw.ustring.gmatch.]==] function export.gmatch(str, pattern) str = tostring(str) local simple = pattern_simplifier(pattern) if simple then return gmatch(str, simple) end return ugmatch(str, pattern) end

--[==[A version of gsub which uses string.gsub when possible, but otherwise uses mw.ustring.gsub.]==] function export.gsub(str, pattern, repl, n)	str = tostring(str) local simple = pattern_simplifier(pattern) if simple then return gsub(str, simple, repl, n)	end return ugsub(str, pattern, repl, n) end

function export.plain_gsub(str, pattern, repl, n)	return gsub(str, pattern_escape(pattern), type(repl) == "string" and replacement_escape(repl) or repl, n) end

do local function err(cp) error("Codepoint " .. cp .. " is out of range: codepoints must be between 0x0 and 0x10FFFF.", 2) end

local function utf8_char(cp) cp = tonumber(cp) if cp < 0 then err("-0x" .. format("%X", -cp + 1)) elseif cp < 0x80 then return char(cp) elseif cp < 0x800 then return char(				0xC0 + cp / 0x40,				0x80 + cp % 0x40			) elseif cp < 0x10000 then if cp >= 0xD800 and cp < 0xE000 then return "?" -- mw.ustring.char returns "?" for surrogates. end return char(				0xE0 + cp / 0x1000,				0x80 + cp / 0x40 % 0x40,				0x80 + cp % 0x40			) elseif cp < 0x110000 then return char(				0xF0 + cp / 0x40000,				0x80 + cp / 0x1000 % 0x40,				0x80 + cp / 0x40 % 0x40,				0x80 + cp % 0x40			) end err("0x" .. format("%X", cp)) end

function export.char(cp, ...) if ... == nil then return utf8_char(cp) end local ret = {cp, ...} for i = 1, #ret do			ret[i] = utf8_char(ret[i]) end return concat(ret) end u = export.char end

do local function get_codepoint(b1, b2, b3, b4) if b1 < 128 then return b1, 1 elseif b1 < 224 then return 0x40 * b1 + b2 - 0x3080, 2 elseif b1 < 240 then return 0x1000 * b1 + 0x40 * b2 + b3 - 0xE2080, 3 end return 0x40000 * b1 + 0x1000 * b2 + 0x40 * b3 + b4 - 0x3C82080, 4 end

function export.codepoint(str, i, j)		i, j = i or 1, j or i or 1 if i == 1 and j == 1 then return (get_codepoint(byte(str, 1, 4))) elseif i < 0 or j < 0 then return ucodepoint(str, i, j) -- FIXME end local n, nb, ret, nr = 0, 1, {}, 0 while n < j do			n = n + 1 if n < i then local b = byte(str, nb) nb = nb + (b < 128 and 1 or b < 224 and 2 or b < 240 and 3 or 4) else local b1, b2, b3, b4 = byte(str, nb, nb + 3) if not b1 then break end nr = nr + 1 local add ret[nr], add = get_codepoint(b1, b2, b3, b4) nb = nb + add end end return unpack(ret) end codepoint = export.codepoint end

--[==[A version of lower which uses string.lower when possible, but otherwise uses mw.ustring.lower.]==] function export.lower(str) str = tostring(str) return (match(str, "[\194-\244]") and ulower or lower)(str) end

--[==[A version of upper which uses string.upper when possible, but otherwise uses mw.ustring.upper.]==] function export.upper(str) str = tostring(str) return (match(str, "[\194-\244]") and uupper or upper)(str) end

do local function add_captures(text, n, ...) -- Insert any captures from the splitting pattern. local offset, capture = n - 1, ... while capture do			n = n + 1 text[n] = capture capture = select(n - offset, ...) end return n	end local function iterate(str, str_len, text, n, start, _sub, loc1, loc2, ...) if not (loc1 and start <= str_len) then -- If no match, or there is but we're past the end of the string -- (which happens when the match is the empty string), then add -- the final chunk and return. n = n + 1 text[n] = _sub(str, start) return elseif loc2 < start then -- Special case: If we don't advance by any characters, then advance -- by one character; this avoids an infinite loop, and makes splitting -- by an empty string work the way mw.text.split does. If we reach -- the end of the string this way, return immediately, so we don't -- get a final empty string. n = n + 1 text[n] = _sub(str, start, start) if start == str_len then return add_captures(text, n, ...) end start = start + 1 else -- Add chunk up to the current match. n = n + 1 text[n] = _sub(str, start, loc1 - 1) start = loc2 + 1 end return add_captures(text, n, ...), start end local function _split(str, pattern, str_len, _sub, _find, plain) local text, n, start = {}, 0, 1 repeat n, start = iterate(str, str_len, text, n, start, _sub, _find(str, pattern, start, plain)) until not start return text end --[==[	-- Reimplementation of mw.text.split that includes any capturing -- groups in the splitting pattern. This works like Python's re.split -- function, except that it has Lua's behavior when the split pattern -- is empty (i.e. advancing by one character at a time; Python returns the	-- whole remainder of the string). -- `mode` is optional, and can take two values: if "string", then the -- pattern is interpreted as a string library pattern (instead of a	-- ustring one); if "plain", then pattern matching facilities are turned -- off. ]==]	function export.split(str, pattern, mode) if mode == "string" then return _split(str, pattern, #str, sub, find) elseif mode == "plain" then return _split(str, pattern, #str, sub, find, true) end local simple = pattern_simplifier(pattern) if simple then return _split(str, simple, #str, sub, find) end return _split(str, pattern, len(str), usub, ufind) end split = export.split export.capturing_split = split -- To be removed. end

function export.gsplit(str, pattern, mode) local t, i = split(str, pattern, mode), 0 return function i = i + 1 return t[i] end end

do local entities

local function decode_numeric_entity(code, pattern, base) local cp = match(code, pattern) and tonumber(code, base) return cp and cp < 0x110000 and u(cp) or nil end

local function decode_entity(hash, x, code) if hash == "#" then return x == "" and decode_numeric_entity(code, "^%d+$") or				decode_numeric_entity(code, "^%x+$", 16) end entities = entities or mw.loadData("Module:data/entities") return entities[x .. code] end

-- Non-ASCII characters aren't valid in proper HTML named entities, but MediaWiki uses them in some custom aliases which have also been included in Module:data/entities. function export.decode_entities(this) return (gsub(this, "&(#?)([xX]?)([%w\128-\244]+);", decode_entity)) end decode_entities = export.decode_entities end

do local function encode_entity(ch) return "&#x" .. format("%X", codepoint(ch)) .. ";"	end

function export.encode_entities(str, charset, raw) if not raw then str = decode_entities(str) end if charset == "" then return str elseif not charset then charset = "\"&'<>\194\160"		elseif not match(charset, "[\194-\244]") then			return (gsub(str, "[" .. pattern_escape(charset) .. "]", encode_entity))		end		return (gsub(str, "[%z\1-\127\194-\244][\128-\191]*", function(ch)			return find(charset, ch, 1, true) and encode_entity(ch) or nil		end))	end end

do local data = mw.loadData("Module:string/nowiki/data") local absolute = data.absolute local after_newline = data.after_newline local after_magic_link = data.after_magic_link local uri_schemes = data.uri_schemes

local function escape_uri(uri) return uri_schemes[lower(uri)] and uri .. "&#58;" or uri .. ":"	end

function export.nowiki(str) local ret, head, n, first = {}, 1, 0, sub(str, 1, 1) if after_newline[first] then n = n + 1 ret[n] = "&#" .. byte(first) .. ";"			head = 2 elseif sub(str, 1, 4) == "" then n = n + 1 ret[n] = "&#45;---" head = 5 end local start = head while true do			local loc, this = match(str, "([\n\r\"&':;<=>IPR[%]_{|}])", head)			if not loc then				n = n + 1				ret[n] = sub(str, start)				return (gsub(concat(ret), "([%w_]+):", escape_uri))			elseif absolute[this] then				n = n + 1				ret[n] = sub(str, start, loc - 1) .. "&#" .. byte(this) .. ";"				head = loc + 1				start = head			elseif this == "\n" or this == "\r" then				local nxt = loc + 1				nxt = sub(str, nxt, nxt)				if after_newline[nxt] then					n = n + 1					ret[n] = sub(str, start, loc) .. "&#" .. byte(nxt) .. ";"					head = loc + 2					start = head				elseif sub(str, loc + 1, loc + 4) == "" then					n = n + 1					ret[n] = sub(str, start, loc) .. "&#45;---"					head = loc + 5					start = head				else					head = head + 1				end			elseif this == "_" then				local nxt = loc + 1				if sub(str, nxt, nxt) == "_" then					n = n + 1					ret[n] = sub(str, start, loc) .. "&#95;" head = loc + 2 start = head else head = head + 1 end elseif this == ":" and sub(str, loc + 1, loc + 2) == "//" then n = n + 1 ret[n] = sub(str, start, loc - 1) .. "//"				head = loc + 3 start = head elseif (				this == "I" and sub(str, loc + 1, loc + 3) == "SBN" or				this == "P" and sub(str, loc + 1, loc + 3) == "MID"			) then local nxt = loc + 4 nxt = sub(str, nxt, nxt) if after_magic_link[nxt] then n = n + 1 ret[n] = sub(str, start, loc + 3) .. "&#" .. byte(nxt) .. ";"					head = loc + 5 start = head else head = head + 1 end elseif this == "R" and sub(str, loc + 1, loc + 2) == "FC" then local nxt = loc + 3 nxt = sub(str, nxt, nxt) if after_magic_link[nxt] then n = n + 1 ret[n] = sub(str, start, loc + 2) .. "&#" .. byte(nxt) .. ";"					head = loc + 4 start = head else head = head + 1 end else head = head + 1 end end end end

-- Note: PHP does not trim \f but does trim \0. function export.php_trim(str) local n = find(str, "[^%z\t-\v\r ]") return n and match(str, ".*[^%z\t-\v\r ]", n) or "" end php_trim = export.php_trim

function export.scribunto_parameter_key(key) if type(key) ~= "string" then return key end key = php_trim(key) if match(key, "^-?[1-9]%d*$") then local num = tonumber(key) return (			num <= 9007199254740991 and num >= -9007199254740991 or			key == "9007199254740992" or			key == "-9007199254740992"		) and num or key elseif key == "0" then return 0 end return key end

function export.format_fun(str, fun) return (gsub(str, "{(\\?)((\\?)[^{}]*)}", function(p1, name, p2) if #p1 + #p2 == 1 then return name == "op" and "{" or				name == "cl" and "}" or error(module_name .. ".format: unrecognized escape sequence '{\\" .. name .. "}'") elseif fun(name) and type(fun(name)) ~= "string" then error(module_name .. ".format: \"" .. name .. "\" is a " .. type(fun(name)) .. ", not a string") end return fun(name) or error(module_name .. ".format: \"" .. name .. "\" not found in table") end)) end format_fun = export.format_fun

--[==[This function, unlike lua and lua, takes just two parameters—a format string and a table—and replaces all instances of lua in the format string with the table's entry for lua. The opening and closing brace characters can be escaped with  and , respectively. A table entry beginning with a slash can be escaped by doubling the initial slash.

Examples
function export.format(str, tbl) return format_fun(str, function(key)		return tbl[key]	end) end
 * lua
 * produces: lua
 * lua
 * produces: lua
 * Note that the single and double backslashes should be entered as double and quadruple backslashes when quoted in a literal string.]==]

do local function do_uclcfirst(str, case_func) -- Actual function to re-case of the first letter. local first_letter = case_func(match(str, "^[%z\1-\127\194-\244][\128-\191]*") or "") return first_letter .. sub(str, #first_letter + 1) end local function uclcfirst(str, case_func) -- If there's a link at the beginning, re-case the first letter of the -- link text. This pattern matches both piped and unpiped links. -- If the link is not piped, the second capture (linktext) will be empty. local link, linktext, remainder = match(str, "^%[%[([^|%]]+)%|?(.-)%]%](.*)$") if link then return "" .. do_uclcfirst(linktext ~= "" and linktext or link, case_func) .. "" .. remainder end return do_uclcfirst(str, case_func) end function export.ucfirst(str) return uclcfirst(str, uupper) end

function export.lcfirst(str) return uclcfirst(str, ulower) end local function capitalize(w) return uclcfirst(w, uupper) end --[==[Capitalize each word of a string. WARNING: May be broken in the presence of multiword links.]==] function export.capitalize(str) if type(str) == "table" then -- allow calling from a template str = str.args[1] end -- Capitalize multi-word that is separated by spaces -- by uppercasing the first letter of each part. -- I assume nobody will input all CAP text. return (ugsub(str, "%S+", capitalize)) end end

do local function word_ends_in_consonant_plus_y(str) -- FIXME, a subrule of rule #1 above says the -ies ending doesn't -- apply to proper nouns, hence "the Gettys", "the public Ivys". -- We should maybe consider applying this rule here; but it may not -- be important as this function is almost always called on common nouns -- (e.g. parts of speech, place types). return find(str, "[^aeiouAEIOU ]y$") end local function word_takes_es_plural(str) return find(str, "[sxz]$") or find(str, "[cs]h$") end local function do_pluralize(str) if word_ends_in_consonant_plus_y(str) then -- avoid returning multiple values return (gsub(str, "y$", "ies")) elseif word_takes_es_plural(str) then return str .. "es" end return str .. "s" end --[==[	Pluralize a word in a smart fashion, according to normal English rules. # If word ends in consonant + -y, replace the -y with -ies. # If the word ends in -s, -x, -z, -sh, -ch, add -es. # Otherwise, add -s.

This handles links correctly: # If a piped link, change the second part appropriately. # If a non-piped link and rule #1 above applies, convert to a piped link with the second part containing the plural. # If a non-piped link and rules #2 or #3 above apply, add the plural outside the link. ]==]	function export.pluralize(str) if type(str) == "table" then -- allow calling from a template str = str.args[1] end -- Check for a link. This pattern matches both piped and unpiped links. -- If the link is not piped, the second capture (linktext) will be empty. local beginning, link, linktext = match(str, "^(.*)%[%[([^|%]]+)%|?(.-)%]%]$") if not link then return do_pluralize(str) elseif linktext ~= "" then return beginning .. "" .. do_pluralize(linktext) .. "" elseif word_ends_in_consonant_plus_y(link) then return beginning .. "" .. gsub(link, "y$", "ies") .. "" end return beginning .. "" .. link .. "" .. (word_takes_es_plural(link) and "es" or "s") end end

do local function do_singularize(str) local sing = match(str, "^(.-)ies$") if sing then return sing .. "y" end -- Handle cases like "parishes" return match(str, "^(.-[sc]h%]*)es$") or		-- Handle cases like "boxes" match(str, "^(.-x%]*)es$") or		-- Handle regular plurals match(str, "^(.-)s$") or		-- Otherwise, return input str end local function collapse_link(link, linktext) if link == linktext then return "" .. link .. "" end return "" .. linktext .. "" end --[==[	Singularize a word in a smart fashion, according to normal English rules. Works analogously to {pluralize}.

NOTE: This doesn't always work as well as {pluralize}. Beware. It will mishandle cases like "passes" -> "passe", "eyries" -> "eyry". # If word ends in -ies, replace -ies with -y. # If the word ends in -xes, -shes, -ches, remove -es. [Does not affect -ses, cf. "houses", "impasses".] # Otherwise, remove -s.

This handles links correctly: # If a piped link, change the second part appropriately. Collapse the link to a simple link if both parts end up the same. # If a non-piped link, singularize the link. # A link like "parishes" will be handled correctly because the code that checks for -shes etc. allows ] characters between the 'sh' etc. and final -es. ]==]	function export.singularize(str) if type(str) == "table" then -- allow calling from a template str = str.args[1] end -- Check for a link. This pattern matches both piped and unpiped links. -- If the link is not piped, the second capture (linktext) will be empty. local beginning, link, linktext = match(str, "^(.*)%[%[([^|%]]+)%|?(.-)%]%]$") if not link then return do_singularize(str) elseif linktext ~= "" then return beginning .. collapse_link(link, do_singularize(linktext)) end return beginning .. "" .. do_singularize(link) .. "" end end

do --[==[	Return the appropriate indefinite article to prefix to `str`. Correctly handles links and capitalized text. Does not correctly handle words like union, uniform and university that take "a" despite beginning with a 'u'. The returned article will have its first letter capitalized if `ucfirst` is specified, otherwise lowercase. ]==]	function export.get_indefinite_article(str, ucfirst) str = str or "" local is_vowel = false -- If there's a link at the beginning, examine the first letter of the -- link text. This pattern matches both piped and unpiped links. -- If the link is not piped, the second capture (linktext) will be empty. local link, linktext = match(str, "^%[%[([^|%]]+)%|?(.-)%]%]") if link then is_vowel = find(linktext ~= "" and linktext or link, "^[AEIOUaeiou]") else is_vowel = find(str, "^[AEIOUaeiou]") end return is_vowel and (ucfirst and "An" or "an") or (ucfirst and "A" or "a") end get_indefinite_article = export.get_indefinite_article end

--[==[ Prefix `text` with the appropriate indefinite article to prefix to `text`. Correctly handles links and capitalized text. Does not correctly handle words like union, uniform and university that take "a" despite beginning with a 'u'. The returned article will have its first letter capitalized if `ucfirst` is specified, otherwise lowercase. ]==] function export.add_indefinite_article(text, ucfirst) return get_indefinite_article(text, ucfirst) .. " " .. text end

return export