Module:ar-nominals

-- Author: Benwing, based on early version by CodeCat.

-- FIXME: Nouns/adjectives to create to exemplify complex declensions: -- riḍan (رِضًا or رِضًى) --

local m_utilities = require("Module:utilities") local m_links = require("Module:links") local ar_utilities = require("Module:ar-utilities")

local lang = require("Module:languages").getByCode("ar") local u = require("Module:string/char") local rfind = mw.ustring.find local rsubn = mw.ustring.gsub local rmatch = mw.ustring.match local rsplit = mw.text.split

-- This is used in place of a transliteration when no manual -- translit is specified and we're unable to automatically generate -- one (typically because some vowel diacritics are missing). local BOGUS_CHAR = u(0xFFFD)

-- hamza variants local HAMZA           = u(0x0621) -- hamza on the line (stand-alone hamza) = ء local HAMZA_ON_ALIF   = u(0x0623) local HAMZA_ON_W      = u(0x0624) local HAMZA_UNDER_ALIF = u(0x0625) local HAMZA_ON_Y      = u(0x0626) local HAMZA_ANY = "[" .. HAMZA .. HAMZA_ON_ALIF .. HAMZA_UNDER_ALIF .. HAMZA_ON_W .. HAMZA_ON_Y .. "]" local HAMZA_PH = u(0xFFF0) -- hamza placeholder

-- various letters local ALIF  = u(0x0627) -- ʾalif = ا local AMAQ  = u(0x0649) -- ʾalif maqṣūra = ى local AMAD  = u(0x0622) -- ʾalif madda = آ local TAM   = u(0x0629) -- tāʾ marbūṭa = ة local T     = u(0x062A) -- tāʾ = ت local HYPHEN = u(0x0640) local N     = u(0x0646) -- nūn = ن local W     = u(0x0648) -- wāw = و local Y     = u(0x064A) -- yā = ي

-- diacritics local A = u(0x064E) -- fatḥa local AN = u(0x064B) -- fatḥatān (fatḥa tanwīn) local U = u(0x064F) -- ḍamma local UN = u(0x064C) -- ḍammatān (ḍamma tanwīn) local I = u(0x0650) -- kasra local IN = u(0x064D) -- kasratān (kasra tanwīn) local SK = u(0x0652) -- sukūn = no vowel local SH = u(0x0651) -- šadda = gemination of consonants local DAGGER_ALIF = u(0x0670) local DIACRITIC_ANY_BUT_SH = "[" .. A .. I .. U .. AN .. IN .. UN .. SK .. DAGGER_ALIF .. "]"

-- common combinations local NA   = N .. A local NI   = N .. I local AH   = A .. TAM local AT   = A .. T local AA   = A .. ALIF local AAMAQ = A .. AMAQ local AAH  = AA .. TAM local AAT  = AA .. T local II   = I .. Y local IIN  = II .. N local IINA = II .. NA local IY	= II local UU   = U .. W local UUN  = UU .. N local UUNA = UU .. NA local AY   = A .. Y local AW   = A .. W local AYSK = AY .. SK local AWSK = AW .. SK local AAN  = AA .. N local AANI = AA .. NI local AYN  = AYSK .. N local AYNI = AYSK .. NI local AWN  = AWSK .. N local AWNA = AWSK .. NA local AYNA = AYSK .. NA local AYAAT = AY .. AAT local UNU = "[" .. UN .. U .. "]"

-- optional diacritics/letters local AOPT = A .. "?" local AOPTA = A .. "?" .. ALIF local IOPT = I .. "?" local UOPT = U .. "?" local UNOPT = UN .. "?" local UNUOPT = UNU .. "?" local SKOPT = SK .. "?"

-- lists of consonants -- exclude tāʾ marbūṭa because we don't want it treated as a consonant -- in patterns like أَفْعَل local consonants_needing_vowels_no_tam = "بتثجحخدذرزسشصضطظعغفقكلمنهپچڤگڨڧأإؤئء" -- consonants on the right side; includes alif madda local rconsonants_no_tam = consonants_needing_vowels_no_tam .. "ويآ" -- consonants on the left side; does not include alif madda local lconsonants_no_tam = consonants_needing_vowels_no_tam .. "وي" local CONS = "[" .. lconsonants_no_tam .. "]" local CONSPAR = "([" .. lconsonants_no_tam .. "])"

local LRM = u(0x200E) --left-to-right mark

-- First syllable or so of elative/color-defect adjective local ELCD_START = "^" .. HAMZA_ON_ALIF .. AOPT .. CONSPAR

local export = {}

-- Utility functions

function ine(x) -- If Not Empty if x == nil then return nil elseif rfind(x, '^".*"$') then local ret = rmatch(x, '^"(.*)"$') return ret elseif rfind(x, "^'.*'$") then local ret = rmatch(x, "^'(.*)'$") return ret elseif x == "" then return nil else return x	end end

-- Compare two items, recursively comparing arrays. -- FIXME, doesn't work for tables that aren't arrays. function equals(x, y)	if type(x) == "table" and type(y) == "table" then if #x ~= #y then return false end for key, value in ipairs(x) do			if not equals(value, y[key]) then return false end end return true end return x == y end

-- true if array contains item function contains(tab, item) for _, value in pairs(tab) do		if equals(value, item) then return true end end return false end

-- append to array if element not already present function insert_if_not(tab, item) if not contains(tab, item) then table.insert(tab, item) end end

-- version of rsubn that discards all but the first return value function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end

-- version of rsub that asserts that a match occurred function assert_rsub(term, foo, bar) local retval, numsub = rsubn(term, foo, bar) assert(numsub > 0) return retval end

function make_link(arabic) --return m_links.full_link(nil, arabic, lang, nil, "term", nil, {tr = "-"}, false) return m_links.full_link({lang = lang, alt = arabic}, "term") end

function track(page) require("Module:debug").track("ar-nominals/" .. page) return true end

- -- Functions for building inflections -

-- Functions that do the actual inflecting by creating the forms of a basic term. local inflections = {}

local max_mods = 9 -- maximum number of modifiers local mod_list = {"mod"} -- list of "mod", "mod2", "mod3", ... for i=2,max_mods do table.insert(mod_list, "mod" .. i) end

-- Create and return the 'data' structure that will hold all of the -- generated declensional forms, as well as other ancillary information -- such as the possible numbers, genders and cases the the actual numbers -- and states to store (in 'data.numbers' and 'data.states' respectively). function init_data -- FORMS contains a table of forms for each inflectional category, -- e.g. "nom_sg_ind" for nouns or "nom_m_sg_ind" for adjectives. The value -- of an entry is an array of alternatives (e.g. different plurals), where -- each alternative is either a string of the form "ARABIC" or	-- "ARABIC/TRANSLIT", or an array of such strings (this is used for	-- alternative spellings involving different hamza seats,	-- e.g. مُبْتَدَؤُون or مُبْتَدَأُون). Alternative hamza spellings are separated -- in display by an "inner separator" (/), while alternatives on -- the level of different plurals are separated by an "outer separator". return {forms = {}, title = nil, categories = {}, allgenders = {"m", "f"}, allstates = {"ind", "def", "con"}, allnumbers = {"sg", "du", "pl"}, states = {}, -- initialized later numbers = {}, -- initialized later engnumbers = {sg="singular", du="dual", pl="plural", coll="collective", sing="singulative", pauc="paucal"}, engnumberscap = {sg="Singular", du="Dual", pl="Plural", coll="Collective", sing="Singulative", pauc="Paucal (3-10)"}, allcases = {"nom", "acc", "gen", "inf"}, allcases_with_lemma = {"nom", "acc", "gen", "inf", "lemma"}, -- index into endings array indicating correct ending for given -- combination of state and case statecases = { ind = {nom = 1, acc = 2, gen = 3, inf = 10, lemma = 13}, def = {nom = 4, acc = 5, gen = 6, inf = 11, lemma = 14}, -- used for a definite adjective modifying a construct-state noun defcon = {nom = 4, acc = 5, gen = 6, inf = 11, lemma = 14}, con = {nom = 7, acc = 8, gen = 9, inf = 12, lemma = 15}, },	} end

-- Initialize and return ARGS, ORIGARGS and DATA (see init_data). -- ARGS is a table of user-supplied arguments, massaged from the original -- arguments by converting empty-string arguments to nil and appending -- translit arguments to their base arguments with a separating slash. -- ORIGARGS is the original table of arguments. function init(origargs) -- Massage arguments by converting empty arguments to nil, and -- "" or '' arguments to empty. local args = {} for k, v in pairs(origargs) do		args[k] = ine(v) end

-- Further massage arguments by appending translit arguments to the -- corresponding base arguments, with a slash separator, as is expected -- in the rest of the code. --	-- FIXME: We should consider separating translit and base arguments by the -- separators ;, | (used in overrides; see handle_lemma_and_overrides) -- and matching up individual parts, to allow separate translit arguments -- to be specified for overrides. But maybe not; the point of allowing -- separate translit arguments is for compatibility with headword -- templates such as "ar-noun" and "ar-adj", and those templates don't	-- handle override arguments.

local function dotr(arg, argtr) if not args[arg] then error("Argument '" .. argtr .."' specified but not corresponding base argument '" .. arg .. "'") end args[arg] = args[arg] .. "/" .. args[argtr] end

-- By convention, corresponding to arg 1 is tr; corresponding to -- head2, head3, ... is tr2, tr3, ...; corresponding to -- modhead2, modhead3, ... is modtr2, modtr3, ...; corresponding to -- modNhead2, modNhead3, ... is modNtr2, modNtr3, ..; corresponding to -- all other arguments FOO, FOO2, ... is FOOtr, FOO2tr, ... for k, v in pairs(args) do		if k == "tr" then dotr(1, "tr") elseif rfind(k, "tr[0-9]+$") then dotr(assert_rsub(k, "tr([0-9]+)$", "head%1"), k)		elseif rfind(k, "tr$") then dotr(assert_rsub(k, "tr$", ""), k)		end end

-- Construct data. local data = init_data

return args, origargs, data end

-- Parse the user-specified state spec and other related arguments. The -- user can specify, using idafaN=, how modifiers are related to previous -- words. The user can also manually specify which states are to appear; -- whether to omit the definite article in the definite state; and -- how/whether to restrict modifiers to a particular state, case or number. -- Normally the modN_* parameters and basestate= do not need to be set -- directly; instead, use idafaN=. It may be necessary to explicitly -- specify state= in the presence of proper nouns or definite-only -- adjectival expressions. NOTE: At the time this function is called, -- data.numbers has not yet been initialized. function parse_state_etc_spec(data, args) local function check(arg, dataval, allvalues) if args[arg] then if not contains(allvalues, args[arg]) then error("For " .. arg .. "=, value '" .. args[arg] .. "' should be one of " ..					table.concat(allvalues, ", ")) end data[dataval] = args[arg] end end

local function check_boolean(arg, dataval) check(arg, dataval, {"yes", "no"}) if data[dataval] == "yes" then data[dataval] = true elseif data[dataval] == "no" then data[dataval] = false end end

-- Make sure no holes in mod values for i=1,(#mod_list)-1 do		if args[mod_list[i+1]] and not args[mod_list[i]] then error("Hole in modifier arguments -- " .. mod_list[i+1] ..				" present but not " .. mod_list[i]) end end -- FIXME! Remove this once we're sure there are no instances of mod2 -- that haven't been converted to modhead2. if args["mod2"] then track("mod2") end

-- Set default value; may be overridden e.g. by arg["state"] or -- by idafaN=. data.states = data.allstates

-- List of pairs of idafaN/modN parameters local idafa_mod_list = for i=2,max_mods do table.insert(idafa_mod_list, {"idafa" .. i, "mod" .. i}) end

-- True if the value of an |idafa= param is a valid adjectival modifier -- value. local function valid_adjectival_idafaval(idafaval) return idafaval == "adj" or idafaval == "adj-base" or			idafaval == "adj-mod" or rfind(idafaval, "^adj%-mod[0-9]+$") end

-- Extract the referent (base or modifier) of an adjectival |idafa= param. -- Assumes the value is valid. local function adjectival_idafaval_referent(idafaval) if idafaval == "adj" then return "base" end return assert_rsub(idafaval, "^adj%-", "") end

-- Convert a base/mod spec to an index: 0=base, 1=mod, 2=mod2, etc.	local function basemod_to_index(basemod) if basemod == "base" then return 0 end if basemod == "mod" then return 1 end return tonumber(assert_rsub(basemod, "^mod", "")) end

-- Recognize idafa spec and handle it. -- We do the following: -- (1) Check that if idafaN= is given, then modN= is also given. -- (2) Check that adjectival modifiers aren't followed by idafa modifiers. -- (3) Check that adjectival modifiers are modifying the base or an --    ʾidāfa modifier, not another adjectival modifier. -- (4) Support idafa values "adj-base", "adj-mod", "adj-mod2", "adj" --    (="adj-base") etc. and check that we're referring to an earlier --    word. -- (5) For ʾidāfa modifiers, set basestate=con, set modN_case=gen, --    set modN_idafa=true, and set modN_number to the number specified --    in the parameter value (e.g. 'sg' or 'def-pl'); and if the --    parameter value specifies a state (e.g. 'def' or 'ind-du'), --    set modN_state= to this value, and if this is the last ʾidāfa --    modifier, also set state= to this value; if this is not the last --    ʾidāfa modifier, set modN_state=con and disallow a state to be --    specified in the parameter value. -- (6) For adjectival modifiers of the base, do nothing. -- (7) For adjectival modifiers of ʾidāfa modifiers, set modN_case=gen; --    set modN_idafa=false; and set modN_number=, modN_numgen= and --    modN_state= to match the values of the idafa modifier.

-- error checking and find last ʾidāfa modifier local last_is_idafa = true local last_idafa_mod = "base" for _, idafa_mod in ipairs(idafa_mod_list) do		local idafaparam = idafa_mod[1] local mod = idafa_mod[2] local idafaval = args[idafaparam]

if idafaval then local paramval = idafaparam .. "=" .. idafaval if not args[mod] then error("'" .. idafaparam .. "' parameter without corresponding '"					.. mod .. "' parameter") end if not valid_adjectival_idafaval(idafaval) then -- We're a construct (ʾidāfa) modifier if not last_is_idafa then error("ʾidāfa modifier " .. paramval .. " follows adjectival modifier") end last_idafa_mod = mod else last_is_idafa = false local adjref = adjectival_idafaval_referent(idafaval) if adjref ~= "base" then if basemod_to_index(adjref) >= basemod_to_index(mod) then error(paramval .. " can only refer to an earlier element") end local idafaref = assert_rsub(adjref, "^mod", "idafa") if not args[idafaref] then error(paramval .. " cannot refer to a missing modifier") elseif valid_adjectival_idafaval(args[idafaref]) then error(paramval .. " cannot refer to an adjectival modifier") end end end end end

-- Now go through and set all the modN_ data values appropriately. for _, idafa_mod in ipairs(idafa_mod_list) do		local idafaparam = idafa_mod[1] local mod = idafa_mod[2] local idafaval = args[idafaparam]

if idafaval then local paramval = idafaparam .. "=" .. idafaval local bad_idafa = true if idafaval == "yes" then idafaval = "sg" end if idafaval == "ind-def" or contains(data.allstates, idafaval) then idafaval = idafaval .. "-sg" end if not idafaval then bad_idafa = false elseif valid_adjectival_idafaval(idafaval) then local adjref = adjectival_idafaval_referent(idafaval) if adjref ~= "base" then data[mod .. "_case"] = "gen" data[mod .. "_state"] = data[adjref .. "_state"] -- if agreement is with ind-def, make it def if data[mod .. "_state"] == "ind-def" then data[mod .. "_state"] = "def" end data[mod .. "_number"] = data[adjref .. "_number"] data[mod .. "_numgen"] = data[adjref .. "_numgen"] data[mod .. "_idafa"] = false end bad_idafa = false elseif contains(data.allnumbers, idafaval) then data.basestate = "con" data[mod .. "_case"] = "gen" data[mod .. "_number"] = idafaval data[mod .. "_idafa"] = true if mod ~= last_idafa_mod then data[mod .. "_state"] = "con" end bad_idafa = false elseif rfind(idafaval, "%-") then local state_num = rsplit(idafaval, "%-") -- Support ind-def as a possible value. We set modstate to				-- ind-def, which will signal definite agreement with adjectival -- modifiers; then later on we change the value to ind. if #state_num == 3 and state_num[1] == "ind" and state_num[2] == "def" then state_num[1] = "ind-def" state_num[2] = state_num[3] table.remove(state_num) end if #state_num == 2 then local state = state_num[1] local num = state_num[2] if (state == "ind-def" or contains(data.allstates, state)) and contains(data.allnumbers, num) then if mod == last_idafa_mod then if state == "ind-def" then data.states = {"def"} else data.states = {state} end else error(paramval .. " cannot specify a state because it is not the last ʾidāfa modifier") end data.basestate = "con" data[mod .. "_case"] = "gen" data[mod .. "_state"] = state data[mod .. "_number"] = num data[mod .. "_idafa"] = true bad_idafa = false end end end if bad_idafa then error(paramval .. " should be one of yes, def, sg, def-sg, adj, adj-base, adj-mod, adj-mod2 or similar") end end end

if args["state"] == "ind-def" then data.states = {"def"} data.basestate = "ind" elseif args["state"] then data.states = rsplit(args["state"], ",") for _, state in ipairs(data.states) do			if not contains(data.allstates, state) then error("For state=, value '" .. state .. "' should be one of " ..					table.concat(data.allstates, ", ")) end end end

-- Now process explicit settings, so that they can override the -- settings based on idafaN=. check("basestate", "basestate", data.allstates) check_boolean("noirreg", "noirreg") check_boolean("omitarticle", "omitarticle") data.prefix = args.prefix

for _, mod in ipairs(mod_list) do check(mod .. "state", mod .. "_state", data.allstates) check(mod .. "case", mod .. "_case", data.allcases) check(mod .. "number", mod .. "_number", data.allnumgens) check(mod .. "numgen", mod .. "_numgen", data.allnumgens) check_boolean(mod .. "idafa", mod .. "_idafa") check_boolean(mod .. "omitarticle", mod .. "_omitarticle") data[mod .. "_prefix"] = args[mod .. "prefix"] end

-- Make sure modN_numgen is initialized, to modN_number if necessary. -- This simplifies logic in certain places, e.g. call_inflections. -- Also convert ind-def to ind. for _, mod in ipairs(mod_list) do data[mod .. "_numgen"] = data[mod .. "_numgen"] or data[mod .. "_number"] if data[mod .. "_state"] == "ind-def" then data[mod.. "_state"] = "ind" end end end

-- Parse the user-specified number spec. The user can manually specify which -- numbers are to appear. Return true if |number= was specified. function parse_number_spec(data, args) if args["number"] then data.numbers = rsplit(args["number"], ",") for _, num in ipairs(data.numbers) do			if not contains(data.allnumbers, num) then error("For number=, value '" .. num .. "' should be one of " ..					table.concat(data.allnumbers, ", ")) end end return true else data.numbers = data.allnumbers return false end end

-- Determine which numbers will appear using the logic for nouns. -- See comment just below. function determine_noun_numbers(data, args, pls) -- Can manually specify which numbers are to appear, and exactly those -- numbers will appear. Otherwise, if any plurals given, duals and plurals -- appear; else, only singular (on the assumption that the word is a proper	-- noun or abstract noun that exists only in the singular); however, -- singular won't appear if "-" given for singular, and similarly for dual. if not parse_number_spec(data, args) then data.numbers = {} local sgarg1 = args[1] local duarg1 = args["d"] if sgarg1 ~= "-" then table.insert(data.numbers, "sg") end if #pls["base"] > 0 then -- Dual appears if either: explicit dual stem (not -) is given, or -- default dual is used and explicit singular stem (not -) is given. if (duarg1 and duarg1 ~= "-") or (not duarg1 and sgarg1 ~= "-") then table.insert(data.numbers, "du") end table.insert(data.numbers, "pl") elseif duarg1 and duarg1 ~= "-" then -- If explicit dual but no plural given, include it. Useful for -- dual tantum words. table.insert(data.numbers, "du") end end end

-- For stem STEM, convert to stem-and-type format and insert stem and type -- into RESULTS, checking to make sure it's not already there. SGS is the -- list of singular items to base derived forms off of (masculine or feminine -- as appropriate), an array of length-two arrays of {COMBINED_STEM, TYPE} as -- returned by stem_and_type; ISFEM is true if this is feminine gender; -- NUM is "sg", "du" or "pl". POS is the part of speech, generally "noun" or -- "adjective". function insert_stems(stem, results, sgs, isfem, num, pos) if stem == "-" then return end for _, sg in ipairs(sgs) do		local combined_stem, ty = export.stem_and_type(stem,			sg[1], sg[2], isfem, num, pos) insert_if_not(results, {combined_stem, ty}) end end

-- Handle manually specified overrides of individual forms. Separate -- outer-level alternants with ; or, or the Arabic equivalents; separate -- inner-level alternants with | (we can't use / because it's already in -- use separating Arabic from translit). -- -- Also determine lemma and allow it to be overridden. -- Also allow POS (part of speech) to be overridden. function handle_lemma_and_overrides(data, args) local function handle_override(arg) if args[arg] then local ovval = {} local alts1 = rsplit(args[arg], "[;,؛،]") for _, alt1 in ipairs(alts1) do				local alts2 = rsplit(alt1, "|") table.insert(ovval, alts2) end data.forms[arg] = ovval end end

local function do_overrides(mod) for _, numgen in ipairs(data.allnumgens) do			for _, state in ipairs(data.allstates) do				for _, case in ipairs(data.allcases) do local arg = mod .. case .. "_" .. numgen .. "_" .. state handle_override(arg) if args[arg] and not data.noirreg then insert_cat(data, mod, numgen,							"Arabic NOUNs with irregular SINGULAR",							"SINGULAR of irregular NOUN") end end end end end

do_overrides("") for _, mod in ipairs(mod_list) do do_overrides(mod .. "_") end

local function get_lemma(mod) for _, numgen in ipairs(data.numgens) do			for _, state in ipairs(data.states) do local arg = mod .. "lemma_" .. numgen .. "_" .. state if data.forms[arg] and #data.forms[arg] > 0 then return data.forms[arg] end end end return nil end

data.forms["lemma"] = get_lemma("") for _, mod in ipairs(mod_list) do data.forms[mod .. "_lemma"] = get_lemma(mod .. "_") end handle_override("lemma") for _, mod in ipairs(mod_list) do handle_override(mod .. "_lemma") end end

-- Return the part of speech based on the part of speech contained in -- data.pos and MOD (either "", "mod_", "mod2_", etc., same as in -- do_gender_number_1). If we're a modifier, don't use data.pos but -- instead choose based on whether modifier is adjectival or nominal -- (ʾiḍāfa). function get_pos(data, mod) local ismod = mod ~= "" if not ismod then return data.pos elseif data[mod .. "idafa"] then return "noun" else return "adjective" end end

-- Find the stems associated with a particular gender/number combination. -- ARGS is the set of all arguments. ARGPREFS is an array of argument prefixes -- (e.g. "f" for the actual arguments "f", "f2", ..., for the feminine -- singular; we allow more than one to handle "cpl"). SGS is a -- "stem-type list" (see do_gender_number), and is the list of stems to -- base derived forms off of (masculine or feminine as appropriate), an array -- of length-two arrays of {COMBINED_STEM, TYPE} as returned by -- stem_and_type. DEFAULT, ISFEM and NUM are as in do_gender_number. -- MOD is either "", "mod_", "mod2_", etc. depending if we're working on a -- base or modifier argument (in the latter case, basically if the argument -- begins with "mod"). function do_gender_number_1(data, args, argprefs, sgs, default, isfem, num, mod) local results = {} local function handle_stem(stem) insert_stems(stem, results, sgs, isfem, num, get_pos(data, mod)) end

-- If no arguments specified, use the default instead. need_default = true for _, argpref in ipairs(argprefs) do		if args[argpref] then need_default = false break end end if need_default then if not default then return results end handle_stem(default) return results end

-- For explicitly specified arguments, make sure there's at least one -- stem to generate off of; otherwise specifying e.g. 'sing=- pauc=فُلَان' -- won't override paucal. if #sgs == 0 then sgs = end for _, argpref in ipairs(argprefs) do		if args[argpref] then handle_stem(args[argpref]) end local i = 2 while args[argpref .. i] do handle_stem(args[argpref .. i]) i = i + 1 end end return results end

-- For a given gender/number combination, parse and return the full set -- of stems for both base and modifier. The return value is a -- "stem specification", i.e. table with a "base" key for the base, a -- "mod" key for the first modifier (see below), a "mod2" key for the -- second modifier, etc. listing all stems for both the base and modifier(s). -- The value of each key is a "stem-type list", i.e. an array of stem-type -- pairs, where each element is a size-two array of {COMBINED_STEM, STEM_TYPE}. -- COMBINED_STEM is a stem with attached transliteration in the form -- STEM/TRANSLIT (where the transliteration is either manually specified in -- the stem argument, e.g. 'pl=لُورْدَات/lordāt', or auto-transliterated from -- the Arabic, with BOGUS_CHAR substituting for the transliteration if -- auto-translit fails). STEM_TYPE is the declension of the stem, either -- manually specified, e.g. 'بَبَّغَاء:di' for manually-specified diptote, or -- auto-detected (see stem_and_type and detect_type). -- -- DATA and ARGS are as in init. ARGPREFS is an array of the prefixes for -- the argument(s) specifying the stem (and optional translit and declension -- type). For a given ARGPREF, we check ARGPREF, ARGPREF2, ARGPREF3, ... in -- turn for the base, and modARGPREF, modARGPREF2, modARGPREF3, ... in turn -- for the first modifier, and mod2ARGPREF, mod2ARGPREF2, mod2ARGPREF3, ... -- for the second modifier, etc. SGS is a stem specification (see above), -- giving the stems that are used to base derived forms off of (e.g. if a stem -- type "smp" appears in place of a stem, the sound masculine plural of the -- stems in SGS will be derived). DEFAULT is a single stem (i.e. a string) that -- is used when no stems were explicitly given by the user (typically either -- "f", "m", "d" or "p"), or nil for no default. ISFEM is true if we're -- accumulating stems for a feminine number/gender category, and NUM is the -- number (expected to be "sg", "du" or "pl") of the number/gender category -- we're accumulating stems for. -- -- About bases and modifiers: Note that e.g. in the noun phrase يَوْم الاِثْنَيْن -- the head noun يَوْم is the base and the noun الاِثْنَيْن is the modifier. -- In a noun phrase like البَحْر الأَبْيَض المُتَوَسِّط, there are two modifiers. -- Note that modifiers come in two varieties, adjectival modifiers and -- construct (ʾidāfa) modifiers. The first above noun phrase is an example -- of a noun phrase with a construct modifier, where the base is fixed in -- the construct state and the modifier is fixed in number and case -- (which is always genitive) and possibly in state. The second above noun -- phrase is an example of a noun phrase with two adjectival modifiers. -- A construct modifier is generally a noun, whereas an adjectival modifier -- is an adjective that usually agrees in state, number and case with the -- base noun. (Note that in the case of multiple modifiers, it is possible -- for e.g. the second modifier to be an adjectival modifier that agrees -- with the first, construct, modifier, in which case its case will be fixed -- to genitive, its number will be fixed to the same number as the first -- modifier and its state will vary or not depending on whether the first -- modifier's state varies. It is not possible in general to distinguish -- adjectival and construct modifiers by looking at the values of -- modN_state, modN_case or modN_number, since e.g. a third modifier could -- have all of them specified and be either kind. Thus we have modN_idafa, -- which is true for a construct modifier, false otherwise.) function do_gender_number(data, args, argprefs, sgs, default, isfem, num) local results = do_gender_number_1(data, args, argprefs, sgs["base"],		default, isfem, num, "") basemodtable = {base=results} for _, mod in ipairs(mod_list) do		local modn_argprefs = {} for _, argpref in ipairs(argprefs) do table.insert(modn_argprefs, mod .. argpref) end local modn_results = do_gender_number_1(data, args, modn_argprefs,			sgs[mod] or {}, default, isfem, num, mod .. "_") basemodtable[mod] = modn_results end return basemodtable end

-- Generate inflections for the given combined stem and type, for MOD -- (either "" if we're working on the base or "mod_", "mod2_", etc. if we're -- working on a modifier) and NUMGEN (number or number-gender combination, -- of the sort that forms part of the keys in DATA.FORMS). function call_inflection(combined_stem, ty, data, mod, numgen) if ty == "-" then return end

if not inflections[ty] then error("Unknown inflection type '" .. ty .. "'") end local ar, tr = split_arabic_tr(combined_stem) inflections[ty](ar, tr, data, mod, numgen) end

-- Generate inflections for the stems of a given number/gender combination -- and for either the base or the modifier. STEMTYPES is a stem-type list -- (see do_gender_number), listing all the stems and corresponding -- declension types. MOD is either "", "mod_", "mod2_", etc. depending on -- whether we're working on the base or a modifier. NUMGEN is the number or -- number-gender combination we're working on, of the sort that forms part -- of the keys in DATA.FORMS, e.g. "sg" or "m_sg". function call_inflections(stemtypes, data, mod, numgen) local mod_with_modnumgen = mod ~= "" and data[mod .. "numgen"] -- If modN_numgen= is given, do nothing if NUMGEN isn't the same if mod_with_modnumgen and data[mod .. "numgen"] ~= numgen then return end -- always call inflection if mod_with_modnumgen since it may affect -- other numbers (cf. يَوْم الاِثْنَيْن) if mod_with_modnumgen or contains(data.numbers, rsub(numgen, "^.*_", "")) then for _, stemtype in ipairs(stemtypes) do			call_inflection(stemtype[1], stemtype[2], data, mod, numgen) end end end

-- Generate the entire set of inflections for a noun or adjective. -- Also handle any manually-specified part of speech and any manual -- inflection overrides. The value of INFLECTIONS is an array of stem -- specifications, one per number, where each element is a size-two -- array of a stem specification (containing the set of stems and -- corresponding declension types for the base and any modifiers; -- see do_gender_number) and a NUMGEN string, i.e. a string identifying -- the number or number/gender in question (e.g. "sg", "du", "pl", -- "m_sg", "f_pl", etc.). function do_inflections_and_overrides(data, args, inflections) -- do this before generating inflections so POS change is reflected in	-- categories if args["pos"] then data.pos = args["pos"] end

for _, inflection in ipairs(inflections) do		call_inflections(inflection[1]["base"] or {}, data, "", inflection[2]) for _, mod in ipairs(mod_list) do			call_inflections(inflection[1][mod] or {}, data,				mod .. "_", inflection[2]) end end

handle_lemma_and_overrides(data, args) end

-- Helper function for get_heads. Parses the stems for either the -- base or the modifier (see do_gender_number). ARG1 is the argument -- for the first stem and ARGN is the prefix of the arguments for the -- remaining stems. For example, for the singular base, ARG1=1 and -- ARGN="head"; for the first singular modifier, ARG1="mod" and -- ARGN="modhead"; for the plural base, ARG1=ARGN="pl". The arguments -- other than the first are numbered 2, 3, ..., which is appended to -- ARGN. MOD is either "", "mod_", "mod2_", etc. depending if we're -- working on a base or modifier argument. The returned value is an -- array of stems, where each element is a size-two array of -- {COMBINED_STEM, STEM_TYPE}. See do_gender_number. function get_heads_1(data, args, arg1, argn, mod) if not args[arg1] then return {} end local heads if args[arg1] == "-" then heads = else heads = {} insert_stems(args[arg1], heads,, false, "sg",			get_pos(data, mod)) end local i = 2 while args[argn .. i] do local arg = args[argn .. i]		insert_stems(arg, heads,, false, "sg",			get_pos(data, mod)) i = i + 1 end return heads end

-- Very similar to do_gender_number, and returns the same type of -- structure, but works specifically for the stems of the head (the -- most basic gender/number combiation, e.g. singular for nouns, -- masculine singular for adjectives and gendered nouns, collective -- for collective nouns, etc.), including both base and modifier. -- See do_gender_number. Note that the actual return value is -- two items, the first of which is the same type of structure -- returned by do_gender_number and the second of which is a boolean -- indicating whether we were called from within a template documentation -- page (in which case no user-specified arguments exist and we -- substitute sample ones). The reason for this boolean is to indicate -- whether sample arguments need to be substituted for other numbers -- as well. function get_heads(data, args, headtype) if not args[1] and mw.title.getCurrentTitle.nsText == "Template" then return {base=}, true end

if not args[1] then error("Parameter 1 (" .. headtype .. " stem) may not be empty.") end local base = get_heads_1(data, args, 1, "head", "") basemodtable = {base=base} for _, mod in ipairs(mod_list) do local modn = get_heads_1(data, args, mod, mod .. "head", mod .. "_") basemodtable[mod] = modn end return basemodtable, false end

-- The main entry point for noun tables. function export.show_noun(frame) local args, origargs, data = init(frame:getParent.args) data.pos = "noun" data.numgens = function return data.numbers end data.allnumgens = data.allnumbers

local sgs, is_template = get_heads(data, args, "singular") local pls = is_template and {base=} or		do_gender_number(data, args, {"pl", "cpl"}, sgs, nil, false, "pl") -- always do dual so cases like يَوْم الاِثْنَيْن work -- a singular with -- a dual modifier, where data.number refers only the singular -- but we need to go ahead and compute the dual so it parses the -- "modd" modifier dual argument. When the modifier dual argument -- is parsed, it will store the resulting dual declension for اِثْنَيْن -- in the modifier slot for all numbers, including specifically -- the singular. local dus = do_gender_number(data, args, {"d"}, sgs, "d", false, "du")

parse_state_etc_spec(data, args)

determine_noun_numbers(data, args, pls)

do_inflections_and_overrides(data, args,		{{sgs, "sg"}, {dus, "du"}, {pls, "pl"}})

-- Make the table return make_noun_table(data) end

function any_feminine(data, stem_spec) for basemod, stemtypelist in pairs(stem_spec) do -- Only check modifiers if modN_numgen= not given. If not given, the -- modifier needs to be declined for all numgens; else only for the -- given numgen, which should be explicitly specified. if not (basemod ~= "base" and data[basemod .. "_numgen"]) then for _, stemtype in ipairs(stemtypelist) do if rfind(stemtype[1], TAM .. UNUOPT .. "/") then return true end end end end return false end

function all_feminine(data, stem_spec) for basemod, stemtypelist in pairs(stem_spec) do -- Only check modifiers if modN_numgen= not given. If not given, the -- modifier needs to be declined for all numgens; else only for the -- given numgen, which should be explicitly specified. if not (basemod ~= "base" and data[basemod .. "_numgen"]) then for _, stemtype in ipairs(stemtypelist) do if not rfind(stemtype[1], TAM .. UNUOPT .. "/") then return false end end end end return true end

-- The main entry point for collective noun tables. function export.show_coll_noun(frame) local args, origargs, data = init(frame:getParent.args) data.pos = "noun" data.allnumbers = {"coll", "sing", "du", "pauc", "pl"} data.engnumberscap["pl"] = "Plural of variety" data.numgens = function return data.numbers end data.allnumgens = data.allnumbers

local colls, is_template = get_heads(data, args, "collective") local pls = is_template and {base=} or		do_gender_number(data, args, {"pl", "cpl"}, colls, nil, false, "pl")

parse_state_etc_spec(data, args)

-- If collective noun is already feminine in form, don't try to	-- form a feminine singulative local collfem = any_feminine(data, colls)

local sings = do_gender_number(data, args, {"sing"}, colls,		not already_feminine and "f" or nil, true, "sg") local singfem = all_feminine(data, sings) local dus = do_gender_number(data, args, {"d"}, sings, "d", singfem, "du") local paucs = do_gender_number(data, args, {"pauc"}, sings, "paucp",		singfem, "pl")

-- Can manually specify which numbers are to appear, and exactly those -- numbers will appear. Otherwise, if any plurals given, plurals appear, -- and if singulative given, dual and paucal appear. if not parse_number_spec(data, args) then data.numbers = {} if args[1] ~= "-" then table.insert(data.numbers, "coll") end if #sings["base"] > 0 then table.insert(data.numbers, "sing") end if #dus["base"] > 0 then table.insert(data.numbers, "du") end if #paucs["base"] > 0 then table.insert(data.numbers, "pauc") end if #pls["base"] > 0 then table.insert(data.numbers, "pl") end end

-- Generate the collective, singulative, dual, paucal and plural forms do_inflections_and_overrides(data, args,		{{colls, "coll"}, {sings, "sing"}, {dus, "du"}, {paucs, "pauc"}, {pls, "pl"}})

-- Make the table return make_noun_table(data) end

-- The main entry point for singulative noun tables. function export.show_sing_noun(frame) local args, origargs, data = init(frame:getParent.args) data.pos = "noun" data.allnumbers = {"sing", "coll", "du", "pauc", "pl"} data.engnumberscap["pl"] = "Plural of variety" data.numgens = function return data.numbers end data.allnumgens = data.allnumbers

parse_state_etc_spec(data, args)

local sings, is_template = get_heads(data, args, "singulative")

-- If all singulative nouns feminine in form, form a masculine collective local singfem = all_feminine(data, sings)

local colls = do_gender_number(data, args, {"coll"}, sings,		singfem and "m" or nil, false, "sg") local dus = do_gender_number(data, args, {"d"}, sings, "d", singfem, "du") local paucs = do_gender_number(data, args, {"pauc"}, sings, "paucp",		singfem, "pl") local pls = is_template and {base=} or		do_gender_number(data, args, {"pl", "cpl"}, colls, nil, false, "pl")

-- Can manually specify which numbers are to appear, and exactly those -- numbers will appear. Otherwise, if any plurals given, plurals appear; -- if singulative given or derivable, it and dual and paucal will appear. if not parse_number_spec(data, args) then data.numbers = {} if args[1] ~= "-" then table.insert(data.numbers, "sing") end if #colls["base"] > 0 then table.insert(data.numbers, "coll") end if #dus["base"] > 0 then table.insert(data.numbers, "du") end if #paucs["base"] > 0 then table.insert(data.numbers, "pauc") end if #pls["base"] > 0 then table.insert(data.numbers, "pl") end end

-- Generate the singulative, collective, dual, paucal and plural forms do_inflections_and_overrides(data, args,		{{sings, "sing"}, {colls, "coll"}, {dus, "du"}, {paucs, "pauc"}, {pls, "pl"}})

-- Make the table return make_noun_table(data) end

-- The implementation of the main entry point for adjective and -- gendered noun tables. function show_gendered(frame, isadj, pos) local args, origargs, data = init(frame:getParent.args) data.pos = pos data.numgens = function local numgens = {} for _, gender in ipairs(data.allgenders) do			for _, number in ipairs(data.numbers) do table.insert(numgens, gender .. "_" .. number) end end return numgens end data.allnumgens = {} for _, gender in ipairs(data.allgenders) do		for _, number in ipairs(data.allnumbers) do table.insert(data.allnumgens, gender .. "_" .. number) end end

parse_state_etc_spec(data, args)

local msgs = get_heads(data, args, 'masculine singular') -- Always do all of these so cases like يَوْم الاِثْنَيْن work. -- See comment in show_noun. local fsgs = do_gender_number(data, args, {"f"}, msgs, "f", true, "sg") local mdus = do_gender_number(data, args, {"d"}, msgs, "d", false, "du") local fdus = do_gender_number(data, args, {"fd"}, fsgs, "d", true, "du") local mpls = do_gender_number(data, args, {"pl", "cpl"}, msgs,		isadj and "p" or nil, false, "pl") local fpls = do_gender_number(data, args, {"fpl", "cpl"}, fsgs, "fp",		true, "pl")

if isadj then parse_number_spec(data, args) else determine_noun_numbers(data, args, mpls) end

-- Generate the singular, dual and plural forms do_inflections_and_overrides(data, args,		{{msgs, "m_sg"}, {fsgs, "f_sg"}, {mdus, "m_du"}, {fdus, "f_du"},		 {mpls, "m_pl"}, {fpls, "f_pl"}})

-- Make the table if isadj then return make_adj_table(data) else return make_gendered_noun_table(data) end end

-- The main entry point for gendered noun tables. function export.show_gendered_noun(frame) return show_gendered(frame, false, "noun") end

-- The main entry point for numeral tables. Same as using show_gendered_noun -- with pos=numeral. function export.show_numeral(frame) return show_gendered(frame, false, "numeral") end

-- The main entry point for adjective tables. function export.show_adj(frame) return show_gendered(frame, true, "adjective") end

-- Inflection functions

function do_translit(term) return (lang:transliterate(term)) or track("cant-translit") and BOGUS_CHAR end

function split_arabic_tr(term) if term == "" then return "", "" elseif not rfind(term, "/") then return term, do_translit(term) else splitvals = rsplit(term, "/") if #splitvals ~= 2 then error("Must have at most one slash in a combined Arabic/translit expr: '" .. term .. "'") end return splitvals[1], splitvals[2] end end

function reorder_shadda(word) -- shadda+short-vowel (including tanwīn vowels, i.e. -an -in -un) gets -- replaced with short-vowel+shadda during NFC normalisation, which -- MediaWiki does for all Unicode strings; however, it makes the -- detection process inconvenient, so undo it. word = rsub(word, "(" .. DIACRITIC_ANY_BUT_SH .. ")" .. SH, SH .. "%1") return word end

-- Combine PREFIX, AR/TR, and ENDING in that order. PREFIX and ENDING -- can be of the form ARABIC/TRANSLIT. The Arabic and translit parts are -- separated out and grouped together, resulting in a string of the -- form ARABIC/TRANSLIT (TRANSLIT will always be present, computed -- automatically if not present in the source). The return value is actually a -- list of ARABIC/TRANSLIT strings because hamza resolution is applied to -- ARABIC, which may produce multiple outcomes (all of which will have the -- same TRANSLIT). function combine_with_ending(prefix, ar, tr, ending) local prefixar, prefixtr = split_arabic_tr(prefix) local endingar, endingtr = split_arabic_tr(ending) -- When calling hamza_seat, leave out prefixes, which we expect to be -- clitics like وَ. (In case the prefix is a separate word, it won't matter	-- whether we include it in the text passed to hamza_seat.) allar = hamza_seat(ar .. endingar) -- Convert ...īān to ...iyān in case of stems ending in -ī or -ū -- (e.g. kubrī "bridge"). if rfind(endingtr, "^[aeiouāēīōū]") then if rfind(tr, "ī$") then tr = rsub(tr, "ī$", "iy") elseif rfind(tr, "ū$") then tr = rsub(tr, "ū$", "uw") end end tr = prefixtr .. tr .. endingtr allartr = {} for _, arval in ipairs(allar) do table.insert(allartr, prefixar .. arval .. "/" .. tr) end return allartr end

-- Combine PREFIX, STEM/TR and ENDING in that order and insert into the -- list of items in DATA[KEY], initializing it if empty and making sure -- not to insert duplicates. ENDING can be a list of endings, will be -- distributed over the remaining parts. PREFIX and/or ENDING can be -- of the form ARABIC/TRANSLIT (the stem is already split into Arabic STEM -- and Latin TR). Note that what's inserted into DATA[KEY] is actually a -- list of ARABIC/TRANSLIT strings; if more than one is present in the list, -- they represent hamza variants, i.e. different ways of writing a hamza -- sound, such as مُبْتَدَؤُون vs. مُبْتَدَأُون (see init_data). function add_inflection(data, key, prefix, stem, tr, ending) if data.forms[key] == nil then data.forms[key] = {} end if type(ending) ~= "table" then ending = {ending} end for _, endingval in ipairs(ending) do		insert_if_not(data.forms[key],			combine_with_ending(prefix, stem, tr, endingval)) end end

-- Form inflections from combination of STEM, with transliteration TR, -- and ENDINGS (and definite article where necessary, plus any specified -- prefixes) and store in DATA, for the number or gender/number -- determined by MOD ("", "mod_", "mod2_", etc.; see call_inflection) and -- NUMGEN ("sg", "du", "pl", or "m_sg", "f_pl", etc. for adjectives). ENDINGS -- is an array of 15 values, each of which is a string or array of -- alternatives. The order of ENDINGS is indefinite nom, acc, gen; definite -- nom, acc, gen; construct-state nom, acc, gen; informal indefinite, definite, -- construct; lemma indefinite, definite, construct. (Normally the lemma is -- based off of the indefinite, but if the inflection has been restricted to -- particular states, it comes from one of those states, in the order -- indefinite, definite, construct.) See also add_inflection for more info -- on exactly what is inserted into DATA. function add_inflections(stem, tr, data, mod, numgen, endings) stem = canon_hamza(stem) assert(#endings == 15) local ismod = mod ~= "" -- If working on modifier and modN_numgen= is given, it better agree with -- NUMGEN; the case where it doesn't agree should have been caught in -- call_inflections. if ismod and data[mod .. "numgen"] then assert(data[mod .. "numgen"] == numgen) end -- Return a list of combined of ar/tr forms, with the ending tacked on. -- There may be more than one form because of alternative hamza seats that -- may be supplied, e.g. مُبْتَدَؤُون or مُبْتَدَأُون (mubtadaʾūn "(grammatical) subjects"). local defstem, deftr if stem == "?" or data[mod .. "omitarticle"] then defstem = stem deftr = tr	else -- apply sun-letter assimilation and hamzat al-wasl elision defstem = rsub("الْ" .. stem, "^الْ([سشصتثطدذضزژظنرل])", "ال%1ّ") defstem = rsub(defstem, "^الْ([اٱ])([ًٌٍَُِ])", "ال%2%1") deftr = rsub("al-" .. tr, "^al%-([sšṣtṯṭdḏḍzžẓnrḷ])", "a%1-%1") end -- For a given MOD spec, is the previous word (base or modifier) a noun? -- We assume the base is always a noun in this case, and otherwise -- look at the value of modN_idafa. local function prev_mod_is_noun(mod) if mod == "mod_" then return true end if mod == "mod2_" then return data["mod_idafa"] end modnum = assert_rsub(mod, "^mod([0-9]+)_$", "%1") modnum = modnum - 1 return data["mod" .. modnum .. "_idafa"] end

local numgens = ismod and data[mod .. "numgen"] and data.numgens or {numgen} -- "defcon" means definite adjective modifying construct state noun. We -- add a ... before the adjective (and after the construct-state noun) to -- indicate that a nominal modifier would go between noun and adjective. local stems = {ind = stem, def = defstem, con = stem, defcon = "... " .. defstem} local trs = {ind = tr, def = deftr, con = tr, defcon = "... " .. deftr} for _, ng in ipairs(numgens) do		for _, state in ipairs(data.allstates) do			for _, case in ipairs(data.allcases_with_lemma) do				-- We are generating the inflections for STATE, but sometimes -- we want to use the inflected form of a different state, e.g. -- if modN_state= or basestate= is set to some particular state. -- If we're dealing with an adjectival modifier, then in				-- place of "con" we use "defcon" if immediately after a noun -- (see comment above), else "def". local thestate = ismod and data[mod .. "state"] or ismod and not data[mod .. "idafa"] and state == "con" and (prev_mod_is_noun(mod) and "defcon" or "def") or					not ismod and data.basestate or					state local is_lemmainf = case == "lemma" or case == "inf" -- Don't substitute value of modcase for lemma/informal "cases" local thecase = is_lemmainf and case or ismod and data[mod .. "case"] or case add_inflection(data, mod .. case .. "_" .. ng .. "_" .. state,					data[mod .. "prefix"] or "",					stems[thestate], trs[thestate],					endings[data.statecases[thestate][thecase]]) end end end end

-- Insert into a category and a type variable (e.g. m_sg_type) for the -- declension type of a particular declension (e.g. masculine singular for -- adjectives). MOD and NUMGEN are as in call_inflection. CATVALUE is the -- category and ENGVALUE is the English description of the declension type. -- In these values, NOUN is replaced with either "noun" or "adjective", -- SINGULAR is replaced with the English equivalent of the number in NUMGEN -- (e.g. "singular", "dual" or "plural") while BROKSING is the same but uses -- "broken plural" in place of "plural" and "broken paucal" in place of -- "paucal". function insert_cat(data, mod, numgen, catvalue, engvalue) local singpl = data.engnumbers[rsub(numgen, "^.*_", "")] assert(singpl ~= nil) local broksingpl = rsub(singpl, "plural", "broken plural") broksingpl = rsub(broksingpl, "paucal", "broken paucal") if rfind(broksingpl, "broken plural") and (rfind(catvalue, "BROKSING") or			rfind(engvalue, "BROKSING")) then table.insert(data.categories, "Arabic " .. data.pos .. "s with broken plural") end if rfind(catvalue, "irregular") or rfind(engvalue, "irregular") then table.insert(data.categories, "Arabic irregular " .. data.pos .. "s") end catvalue = rsub(catvalue, "NOUN", data.pos) catvalue = rsub(catvalue, "SINGULAR", singpl) catvalue = rsub(catvalue, "BROKSING", broksingpl) engvalue = rsub(engvalue, "NOUN", data.pos) engvalue = rsub(engvalue, "SINGULAR", singpl) engvalue = rsub(engvalue, "BROKSING", broksingpl) if mod == "" and catvalue ~= "" then insert_if_not(data.categories, catvalue) end if engvalue ~= "" then local key = mod .. numgen .. "_type" if data.forms[key] == nil then data.forms[key] = {} end insert_if_not(data.forms[key], engvalue) end if contains(data.states, "def") and not contains(data.states, "ind") then insert_if_not(data.categories, "Arabic definite " .. data.pos .. "s") end end

-- Return true if we're handling modifier inflections and the modifier's -- case is limited to an oblique case (gen or acc; typically genitive, -- in an ʾidāfa construction). This is used when returning lemma -- inflections -- the modifier part of the lemma should agree in case -- with modifier's case if it's restricted in case. function mod_oblique(mod, data) return mod ~= "" and data[mod .. "case"] and (		data[mod .. "case"] == "acc" or data[mod .. "case"] == "gen") end

-- Similar to mod_oblique but specifically when the modifier case is -- limited to the accusative (which is rare or nonexistent in practice). function mod_acc(mod, data) return mod ~= "" and data[mod .. "case"] and data[mod .. "case"] == "acc" end

-- Handle triptote and diptote inflections function triptote_diptote(stem, tr, data, mod, numgen, is_dip, lc) -- Remove any case ending if rfind(stem, "[" .. UN .. U .. "]$") then stem = rsub(stem, "[" .. UN .. U .. "]$", "") tr = rsub(tr, "un?$", "") end

-- special-case for صلوة pronounced ṣalāh; check translit local is_aah = rfind(stem, TAM .. "$") and rfind(tr, "āh$")

if rfind(stem, TAM .. "$") then if rfind(tr, "h$") then tr = rsub(tr, "h$", "t") elseif not rfind(tr, "t$") then tr = tr .. "t" end end

add_inflections(stem, tr, data, mod, numgen,		{is_dip and U or UN,		 is_dip and A or AN .. ((rfind(stem, "[" .. HAMZA_ON_ALIF .. TAM .. "]$")			or rfind(stem, "[" .. AMAD .. ALIF .. "]" .. HAMZA .. "$")			) and "" or ALIF),		 is_dip and A or IN,		 U, A, I,		 lc and UU or U,		 lc and AA or A,		 lc and II or I,		 {}, {}, {}, -- omit informal inflections		 {}, {}, {}, -- omit lemma inflections		})

-- add category and informal and lemma inflections local tote = lc and "long construct" or is_dip and "diptote" or "triptote" local singpl_tote = "BROKSING " .. tote local cat_prefix = "Arabic NOUNs with " .. tote .. " BROKSING" -- since we're checking translit for -āh we probably don't need to	-- check stem too if is_aah or rfind(stem, "[" .. AMAD .. ALIF .. "]" .. TAM .. "$") then add_inflections(stem, rsub(tr, "t$", ""), data, mod, numgen,			{{}, {}, {},			 {}, {}, {},			 {}, {}, {},			 "/t", "/t", "/t", -- informal pron. is -āt			 "/h", "/h", "/t", -- lemma uses -āh			}) insert_cat(data, mod, numgen, cat_prefix .. " in -āh",			singpl_tote .. " in " .. make_link(HYPHEN .. AAH)) elseif rfind(stem, TAM .. "$") then add_inflections(stem, rsub(tr, "t$", ""), data, mod, numgen,			{{}, {}, {},			 {}, {}, {},			 {}, {}, {},			 "", "", "/t",			 "", "", "/t",			}) insert_cat(data, mod, numgen, cat_prefix .. " in -a",			singpl_tote .. " in " .. make_link(HYPHEN .. AH)) elseif lc then add_inflections(stem, tr, data, mod, numgen,			{{}, {}, {},			 {}, {}, {},			 {}, {}, {},			 "", "", UU,			 "", "", UU,			}) insert_cat(data, mod, numgen, cat_prefix,			singpl_tote) else -- also special-case the nisba ending, which has an informal -- pronunciation. if rfind(stem, IY .. SH .. "$") then local infstem = rsub(stem, SH .. "$", "") local inftr = rsub(tr, "iyy$", "ī") -- add informal and lemma inflections separately add_inflections(infstem, inftr, data, mod, numgen,				{{}, {}, {},				 {}, {}, {},				 {}, {}, {},				 "", "", "",				 {}, {}, {},				}) add_inflections(stem, tr, data, mod, numgen,				{{}, {}, {},				 {}, {}, {},				 {}, {}, {},				 {}, {}, {},				 "", "", "",				}) else add_inflections(stem, tr, data, mod, numgen,				{{}, {}, {},				 {}, {}, {},				 {}, {}, {},				 "", "", "",				 "", "", "",				}) end insert_cat(data, mod, numgen, "Arabic NOUNs with basic " .. tote .. " BROKSING",			"basic " .. singpl_tote) end end

-- Regular triptote inflections["tri"] = function(stem, tr, data, mod, numgen) triptote_diptote(stem, tr, data, mod, numgen, false) end

-- Regular diptote inflections["di"] = function(stem, tr, data, mod, numgen) triptote_diptote(stem, tr, data, mod, numgen, true) end

-- Elative and color/defect adjective: usually same as diptote, -- might be invariable function elative_color_defect(stem, tr, data, mod, numgen) if rfind(stem, "[" .. ALIF .. AMAQ .. "]$") then invariable(stem, tr, data, mod, numgen) else triptote_diptote(stem, tr, data, mod, numgen, true) end end

-- Elative: usually same as diptote, might be invariable inflections["el"] = function(stem, tr, data, mod, numgen) elative_color_defect(stem, tr, data, mod, numgen) end

-- Color/defect adjective: Same as elative inflections["cd"] = function(stem, tr, data, mod, numgen) elative_color_defect(stem, tr, data, mod, numgen) end

-- Triptote with lengthened ending in the construct state inflections["lc"] = function(stem, tr, data, mod, numgen) triptote_diptote(stem, tr, data, mod, numgen, false, true) end

function in_defective(stem, tr, data, mod, numgen, tri) if not rfind(stem, IN .. "$") then error("'in' declension stem should end in -in: '" .. stem .. "'") end

stem = rsub(stem, IN .. "$", "") tr = rsub(tr, "in$", "")

local acc_ind_ending = tri and IY .. AN .. ALIF or IY .. A	add_inflections(stem, tr, data, mod, numgen,		{IN, acc_ind_ending, IN,		 II, IY .. A, II,		 II, IY .. A, II,		 II, II, II,		 -- FIXME: What should happen with the lemma when modifier case		 -- is limited to the accusative and modifier state is e.g. definite?		 -- Should the lemma end in -iya or -ī? In practice this will rarely		 -- if ever happen.		 mod_acc(mod, data) and acc_ind_ending or IN, II, II,		}) local tote = tri and "triptote" or "diptote"

insert_cat(data, mod, numgen, "Arabic NOUNs with " .. tote .. " BROKSING in -in",		"BROKSING " .. tote .. " in " .. make_link(HYPHEN .. IN)) end

function detect_in_type(stem, ispl) if ispl and rfind(stem, "^" .. CONS .. AOPT .. CONS .. AOPTA .. CONS .. IN .. "$") then -- layālin return "diin" else -- other -in words return "triin" end end

-- Defective in -in inflections["in"] = function(stem, tr, data, mod, numgen) in_defective(stem, tr, data, mod, numgen,		detect_in_type(stem, rfind(numgen, "pl")) == "triin") end

-- Defective in -in, force "triptote" variant inflections["triin"] = function(stem, tr, data, mod, numgen) in_defective(stem, tr, data, mod, numgen, true) end

-- Defective in -in, force "diptote" variant inflections["diin"] = function(stem, tr, data, mod, numgen) in_defective(stem, tr, data, mod, numgen, false) end

-- Defective in -an (comes in two variants, depending on spelling with tall alif or alif maqṣūra) inflections["an"] = function(stem, tr, data, mod, numgen) local tall_alif if rfind(stem, AN .. ALIF .. "$") then tall_alif = true stem = rsub(stem, AN .. ALIF .. "$", "") elseif rfind(stem, AN .. AMAQ .. "$") then tall_alif = false stem = rsub(stem, AN .. AMAQ .. "$", "") else error("Invalid stem for 'an' declension type: " .. stem) end tr = rsub(tr, "an$", "")

if tall_alif then add_inflections(stem, tr, data, mod, numgen,			{AN .. ALIF, AN .. ALIF, AN .. ALIF,			 AA, AA, AA,			 AA, AA, AA,			 AA, AA, AA,			 AN .. ALIF, AA, AA,			}) else add_inflections(stem, tr, data, mod, numgen,			{AN .. AMAQ, AN .. AMAQ, AN .. AMAQ,			 AAMAQ, AAMAQ, AAMAQ,			 AAMAQ, AAMAQ, AAMAQ,			 AAMAQ, AAMAQ, AAMAQ,			 AN .. AMAQ, AAMAQ, AAMAQ,			}) end

-- FIXME: Should we distinguish between tall alif and alif maqṣūra? insert_cat(data, mod, numgen, "Arabic NOUNs with BROKSING in -an",		"BROKSING in " .. make_link(HYPHEN .. AN .. (tall_alif and ALIF or AMAQ))) end

function invariable(stem, tr, data, mod, numgen) add_inflections(stem, tr, data, mod, numgen,		{"", "", "",		 "", "", "",		 "", "", "",		 "", "", "",		 "", "", "",		}) insert_cat(data, mod, numgen, "Arabic NOUNs with invariable BROKSING",		"BROKSING invariable") end

-- Invariable in -ā (non-loanword type) inflections["inv"] = function(stem, tr, data, mod, numgen) invariable(stem, tr, data, mod, numgen) end

-- Invariable in -ā (loanword type, behaving in the dual as if ending in -a, I think!) inflections["lwinv"] = function(stem, tr, data, mod, numgen) invariable(stem, tr, data, mod, numgen) end

-- Duals inflections["d"] = function(stem, tr, data, mod, numgen) if rfind(stem, ALIF .. NI .. "?$") then stem = rsub(stem, AOPTA .. NI .. "?$", "") elseif rfind(stem, AMAD .. NI .. "?$") then stem = rsub(stem, AMAD .. NI .. "?$", HAMZA_PH) else error("Dual stem should end in -ān(i): '" .. stem .. "'") end tr = rsub(tr, "āni?$", "") local mo = mod_oblique(mod, data) add_inflections(stem, tr, data, mod, numgen,		{AANI, AYNI, AYNI,		 AANI, AYNI, AYNI,		 AA, AYSK, AYSK,		 AYN, AYN, AYSK,		 mo and AYN or AAN, mo and AYN or AAN, mo and AYSK or AA,		}) insert_cat(data, mod, numgen, "", "dual in " .. make_link(HYPHEN .. AANI)) end

-- Sound masculine plural inflections["smp"] = function(stem, tr, data, mod, numgen) if not rfind(stem, UUNA .. "?$") then error("Sound masculine plural stem should end in -ūn(a): '" .. stem .. "'") end stem = rsub(stem, UUNA .. "?$", "") tr = rsub(tr, "ūna?$", "") local mo = mod_oblique(mod, data) add_inflections(stem, tr, data, mod, numgen,		{UUNA, IINA, IINA,		 UUNA, IINA, IINA,		 UU,  II,   II,		 IIN,  IIN,  II,		 mo and IIN or UUN, mo and IIN or UUN, mo and II or UU,		}) -- use SINGULAR because conceivably this might be used with the paucal -- instead of plural insert_cat(data, mod, numgen, "Arabic NOUNs with sound masculine SINGULAR",		"sound masculine SINGULAR") end

-- Sound feminine plural inflections["sfp"] = function(stem, tr, data, mod, numgen) if not rfind(stem, "[" .. ALIF .. AMAD .. "]" .. T .. UN .. "?$") then error("Sound feminine plural stem should end in -āt(un): '" .. stem .. "'") end stem = rsub(stem, UN .. "$", "") tr = rsub(tr, "un$", "") add_inflections(stem, tr, data, mod, numgen,		{UN, IN, IN,		 U, I,  I,		 U,  I,  I,		 "", "", "",		 "", "", "",		}) -- use SINGULAR because this might be used with the paucal -- instead of plural insert_cat(data, mod, numgen, "Arabic NOUNs with sound feminine SINGULAR",		"sound feminine SINGULAR") end

-- Plural of defective in -an inflections["awnp"] = function(stem, tr, data, mod, numgen) if not rfind(stem, AWNA .. "?$") then error("'awnp' plural stem should end in -awn(a): '" .. stem .. "'") end stem = rsub(stem, AWNA .. "?$", "") tr = rsub(tr, "awna?$", "") local mo = mod_oblique(mod, data) add_inflections(stem, tr, data, mod, numgen,		{AWNA, AYNA, AYNA,		 AWNA, AYNA, AYNA,		 AWSK, AYSK, AYSK,		 AYN, AYN, AYSK,		 mo and AYN or AWN, mo and AYN or AWN, mo and AYSK or AWSK,		}) -- use SINGULAR because conceivably this might be used with the paucal -- instead of plural insert_cat(data, mod, numgen, "Arabic NOUNs with sound SINGULAR in -awna",		"sound SINGULAR in " .. make_link(HYPHEN .. AWNA)) end

-- Unknown inflections["?"] = function(stem, tr, data, mod, numgen) add_inflections("?", "?", data, mod, numgen,		{"", "", "",		 "", "", "",		 "", "", "",		 "", "", "",		 "", "", "",		}) insert_cat(data, mod, numgen, "Arabic NOUNs with unknown SINGULAR",		"SINGULAR unknown") end

-- Detect declension of noun or adjective stem or lemma. We allow triptotes, -- diptotes and sound plurals to either come with ʾiʿrāb or not. We detect -- some cases where vowels are missing, when it seems fairly unambiguous to -- do so. ISFEM is true if we are dealing with a feminine stem (not -- currently used and needs to be rethought). NUM is "sg", "du", or "pl", -- depending on the number of the stem. -- -- POS is the part of speech, generally "noun" or "adjective". Used to -- distinguish nouns and adjectives of the فَعْلَان type. There are nouns of -- this type and they generally are triptotes, e.g. قَطْرَان "tar" -- and شَيْطَان "devil". An additional complication is that the user can set -- the POS to something else, like "numeral". We don't use this POS for -- modifiers, where we determine whether they are noun-like or adjective-like -- according to whether mod_idafa= is true. -- -- Some unexpectedly diptote nouns/adjectives: -- -- jiʿrān in ʾabū jiʿrān "dung beetle" -- distributive numbers: ṯunāʾ "two at a time", ṯulāṯ/maṯlaṯ "three at a time", --  rubāʿ "four at a time" (not a regular diptote pattern, cf. triptote --   junāḥ "misdemeanor, sin", nujār "origin, root", nuḥām "flamingo") -- jahannam (f.) "hell" -- many names: jilliq/jillaq "Damascus", judda/jidda "Jedda", jibrīl (and --  variants) "Gabriel", makka "Mecca", etc. -- jibriyāʾ "pride" -- kibriyāʾ "glory, pride" -- babbaḡāʾ "parrot" -- ʿayāyāʾ "incapable, tired" -- suwaidāʾ "black bile, melancholy" -- Note also: ʾajhar "day-blind" (color-defect) and ʾajhar "louder" (elative) function export.detect_type(stem, isfem, num, pos) local function dotrack(word) track(word) track(word .. "/" .. pos) return true end -- Not strictly necessary because the caller (stem_and_type) already -- reorders, but won't hurt, and may be necessary if this function is -- called from an external caller. stem = reorder_shadda(stem) local origstem = stem -- So that we don't get tripped up by alif madda, we replace alif madda -- with the sequence hamza + fatḥa + alif before the regexps below. stem = rsub(stem, AMAD, HAMZA .. AA) if num == "du" then if rfind(stem, ALIF .. NI .. "?$") then return "d" else error("Malformed stem for dual, should end in the nominative dual ending -ān(i): '" .. origstem .. "'") end end if rfind(stem, IN .. "$") then -- -in words return detect_in_type(stem, num == "pl") elseif rfind(stem, AN .. "[" .. ALIF .. AMAQ .. "]$") then return "an" elseif rfind(stem, AN .. "$") then error("Malformed stem, fatḥatan should be over second-to-last letter: " .. origstem) elseif num == "pl" and rfind(stem, AW .. SKOPT .. N .. AOPT .. "$") then return "awnp" elseif num == "pl" and rfind(stem, ALIF .. T .. UNOPT .. "$") and -- Avoid getting tripped up by plurals like ʾawqāt "times", -- ʾaḥwāt "fishes", ʾabyāt "verses", ʾazyāt "oils", ʾaṣwāt "voices", -- ʾamwāt "dead (pl.)". not rfind(stem, HAMZA_ON_ALIF .. A .. CONS .. SK .. CONS .. AAT .. UNOPT .. "$") then return "sfp" elseif num == "pl" and rfind(stem, W .. N .. AOPT .. "$") and -- Avoid getting tripped up by plurals like ʿuyūn "eyes", -- qurūn "horns" (note we check for U between first two consonants		-- so we correctly ignore cases like sinūn "hours" (from sana),		-- riʾūn "lungs" (from riʾa) and banūn "sons" (from ibn).		not rfind(stem, "^" .. CONS .. U .. CONS .. UUN .. AOPT .. "$") then		return "smp"	elseif rfind(stem, UN .. "$") then -- explicitly specified triptotes (we catch sound feminine plurals above)		return "tri"	elseif rfind(stem, U .. "$") then -- explicitly specified diptotes		return "di"	elseif -- num == "pl" and		( -- various diptote plural patterns; these are diptote even in the singular (e.g. yanāyir "January", falāfil "falafel", tuʾabāʾ "yawn, fatigue"		 -- currently we sometimes end up with such plural patterns in the "singular" in a singular		  -- ʾidāfa construction with plural modifier. (FIXME: These should be fixed to the correct number.)		rfind(stem, "^" .. CONS .. AOPT .. CONS .. AOPTA .. CONS .. IOPT .. Y .. "?" .. CONS .. "$") and dotrack("fawaakih") or -- fawākih, daqāʾiq, makātib, mafātīḥ		rfind(stem, "^" .. CONS .. AOPT .. CONS .. AOPTA .. CONS .. SH .. "$")			and not rfind(stem, "^" .. T) and dotrack("mawaadd") or -- mawādd, maqāmm, ḍawāll; exclude t- so we don't catch form-VI verbal nouns like taḍādd (HACK!!!)		rfind(stem, "^" .. CONS .. U .. CONS .. AOPT .. CONS .. AOPTA .. HAMZA .. "$") and dotrack("wuzaraa") or -- wuzarāʾ "ministers", juhalāʾ "ignorant (pl.)"		rfind(stem, ELCD_START .. SKOPT .. CONS .. IOPT .. CONS .. AOPTA .. HAMZA .. "$") and dotrack("asdiqaa") or -- ʾaṣdiqāʾ		rfind(stem, ELCD_START .. IOPT .. CONS .. SH .. AOPTA .. HAMZA .. "$") and dotrack("aqillaa") -- ʾaqillāʾ, ʾajillāʾ "important (pl.)", ʾaḥibbāʾ "lovers"		) then return "di" elseif num == "sg" and ( -- diptote singular patterns (nouns/adjectives)		rfind(stem, "^" .. CONS .. A .. CONS .. SK .. CONS .. AOPTA .. HAMZA .. "$") and dotrack("qamraa") or -- qamrāʾ "moon-white, moonlight"; baydāʾ "desert"; ṣaḥrāʾ "desert-like, desert"; tayhāʾ "trackless, desolate region"; not pl. to avoid catching e.g. ʾabnāʾ "sons", ʾaḥmāʾ "fathers-in-law", ʾamlāʾ "steppes, deserts" (pl. of malan), ʾanbāʾ "reports" (pl. of nabaʾ)		rfind(stem, ELCD_START .. SK .. CONS .. A .. CONS .. "$") and dotrack("abyad") or -- ʾabyaḍ "white", ʾakbar "greater"; FIXME nouns like ʾaʿzab "bachelor", ʾaḥmad "Ahmed" but not ʾarnab "rabbit", ʾanjar "anchor", ʾabjad "abjad", ʾarbaʿ "four", ʾandar "threshing floor" (cf. diptote ʾandar "rarer")		rfind(stem, ELCD_START .. A .. CONS .. SH .. "$") and dotrack("alaff") or -- ʾalaff "plump", ʾaḥabb "more desirable"		-- do the following on the origstem so we can check specifically for alif madda		rfind(origstem, "^" .. AMAD .. CONS .. A .. CONS .. "$") and dotrack("aalam") -- ʾālam "more painful", ʾāḵar "other"		) then return "di" elseif num == "sg" and pos == "adjective" and ( -- diptote singular patterns (adjectives)		rfind(stem, "^" .. CONS .. A .. CONS .. SK .. CONS .. AOPTA .. N .. "$") and dotrack("kaslaan") or -- kaslān "lazy", ʿaṭšān "thirsty", jawʿān "hungry", ḡaḍbān "angry", tayhān "wandering, perplexed"; but not nouns like qaṭrān "tar", šayṭān "devil", mawtān "plague", maydān "square"		-- rfind(stem, "^" .. CONS .. A .. CONS .. SH .. AOPTA .. N .. "$") and dotrack("laffaa") -- excluded because of too many false positives e.g. ḵawwān "disloyal", not to mention nouns like jannān "gardener"; only diptote example I can find is ʿayyān "incapable, weary" (diptote per Lane but not Wehr)		rfind(stem, "^" .. CONS .. A .. CONS .. SH .. AOPTA .. HAMZA .. "$") and dotrack("laffaa") -- laffāʾ "plump (fem.)"; but not nouns like jarrāʾ "runner", ḥaddāʾ "camel driver", lawwāʾ "wryneck"		) then return "di" elseif rfind(stem, AMAQ .. "$") then -- kaslā, ḏikrā (spelled with alif maqṣūra) return "inv" elseif rfind(stem, "[" .. ALIF .. SK .. "]" .. Y .. AOPTA .. "$") then -- dunyā, hadāyā (spelled with tall alif after yāʾ) return "inv" elseif rfind(stem, ALIF .. "$") then -- kāmērā, lībiyā (spelled with tall alif; we catch dunyā and hadāyā above) return "lwinv" elseif rfind(stem, II .. "$") then -- cases like كُوبْرِي kubrī "bridge" and صَوَانِي ṣawānī pl. of ṣīniyya; modern words that would probably end with -in dotrack("ii") return "inv" elseif rfind(stem, UU .. "$") then -- FIXME: Does this occur? Check the tracking dotrack("uu") return "inv" else return "tri" end end

-- Replace hamza (of any sort) at the end of a word, possibly followed by -- a nominative case ending or -in or -an, with HAMZA_PH, and replace alif -- madda at the end of a word with HAMZA_PH plus fatḥa + alif. To undo these -- changes, use hamza_seat. function canon_hamza(word) word = rsub(word, AMAD .. "$", HAMZA_PH .. AA) word = rsub(word, HAMZA_ANY .. "([" .. UN .. U .. IN .. "]?)$", HAMZA_PH .. "%1") word = rsub(word, HAMZA_ANY .. "(" .. AN .. "[" .. ALIF .. AMAQ .. "])$", HAMZA_PH .. "%1") return word end

-- Supply the appropriate hamza seat(s) for a placeholder hamza. function hamza_seat(word) if rfind(word, HAMZA_PH) then -- optimization to avoid many regexp substs return ar_utilities.process_hamza(word) end return {word} end

-- -- Supply the appropriate hamza seat for a placeholder hamza in a combined -- Arabic/translation expression. function split_and_hamza_seat(word)	if rfind(word, HAMZA_PH) then -- optimization to avoid many regexp substs		local ar, tr = split_arabic_tr(word)		-- FIXME: Do something with all values returned		ar = ar_utilities.process_hamza(ar)[1]		return ar .. "/" .. tr	end	return word end --

-- Return stem and type of an argument given the singular stem and whether -- this is a plural argument. WORD may be of the form ARABIC, ARABIC/TR, -- ARABIC:TYPE, ARABIC/TR:TYPE, or TYPE, for Arabic stem ARABIC with -- transliteration TR and of type (i.e. declension) TYPE. If the type -- is omitted, it is auto-detected using detect_type. If the transliteration -- is omitted, it is auto-transliterated from the Arabic. If only the type -- is present, it is a sound plural type ("sf", "sm" or "awn"), -- in which case the stem and translit are generated from the singular by -- regular rules. SG may be of the form ARABIC/TR or ARABIC. ISFEM is true -- if WORD is a feminine stem. NUM is either "sg", "du" or "pl" according to -- the number of the stem. The return value will be in the ARABIC/TR format. -- -- POS is the part of speech, generally "noun" or "adjective". Used to -- distinguish nouns and adjectives of the فَعْلَان type. There are nouns of -- this type and they generally are triptotes, e.g. قَطْرَان "tar" -- and شَيْطَان "devil". An additional complication is that the user can set -- the POS to something else, like "numeral". We don't use this POS for -- modifiers, where we determine whether they are noun-like or adjective-like -- according to whether mod_idafa= is true. function export.stem_and_type(word, sg, sgtype, isfem, num, pos) local rettype = nil if rfind(word, ":") then local split = rsplit(word, ":") if #split > 2 then error("More than one colon found in argument: '" .. word .. "'") end word, rettype = split[1], split[2] end

local ar, tr = split_arabic_tr(word) -- Need to reorder shaddas here so that shadda at the end of a stem -- followed by ʾiʿrāb or a plural ending or whatever can get processed -- correctly. This processing happens in various places so make sure -- we return the reordered Arabic in all circumstances. ar = reorder_shadda(ar) local artr = ar .. "/" .. tr

-- Now return split-out ARABIC/TR and TYPE, with shaddas reordered in -- the Arabic. if rettype then return artr, rettype end

-- Likewise, do shadda reordering for the singular. local sgar, sgtr = split_arabic_tr(sg) sgar = reorder_shadda(sgar)

-- Apply a substitution to the singular Arabic and translit. If a	-- substitution could be made, return the combined ARABIC/TR with -- substitutions made; else, return nil. The Arabic has ARFROM -- replaced with ARTO, while the translit has TRFROM replaced with -- TRTO, and if that doesn't match, replace TRFROM2 with TRTO2. local function sub(arfrom, arto, trfrom, trto, trfrom2, trto2, trfrom3, trto3) if rfind(sgar, arfrom) then local arret = rsub(sgar, arfrom, arto) local trret = sgtr if rfind(sgtr, trfrom) then trret = rsub(sgtr, trfrom, trto) elseif trfrom2 and rfind(sgtr, trfrom2) then trret = rsub(sgtr, trfrom2, trto2) elseif trfrom3 and rfind(sgtr, trfrom3) then trret = rsub(sgtr, trfrom3, trto3) elseif not rfind(sgtr, BOGUS_CHAR) then error("Transliteration '" .. sgtr .."' does not have same ending as Arabic '" .. sgar .. "'") end return arret .. "/" .. trret else return nil end end

if (num ~= "sg" or not isfem) and (word == "elf" or word == "cdf" or word == "intf" or word == "rf" or word == "f") then error("Inference of form for inflection type '" .. word .. "' only allowed in singular feminine") end if num ~= "du" and word == "d" then error("Inference of form for inflection type '" .. word .. "' only allowed in dual") end if num ~= "pl" and (word == "sfp" or word == "smp" or word == "awnp" or word == "cdp" or word == "sp" or word == "fp" or word == "p") then error("Inference of form for inflection type '" .. word .. "' only allowed in plural") end

local function is_intensive_adj(ar) return rfind(ar, "^" .. CONS .. A .. CONS .. SK .. CONS .. AOPTA .. N .. UOPT .. "$") or rfind(ar, "^" .. CONS .. A .. CONS .. SK .. AMAD .. N .. UOPT .. "$") or rfind(ar, "^" .. CONS .. A .. CONS .. SH .. AOPTA .. N .. UOPT .. "$") end local function is_feminine_cd_adj(ar) return pos == "adjective" and (rfind(ar, "^" .. CONS .. A .. CONS .. SK .. CONS .. AOPTA .. HAMZA .. UOPT .. "$") or -- ʾḥamrāʾ/ʿamyāʾ/bayḍāʾ			rfind(ar, "^" .. CONS .. A .. CONS .. SH .. AOPTA .. HAMZA .. UOPT .. "$") -- laffāʾ			) end

local function is_elcd_adj(ar) return rfind(ar, ELCD_START .. SK .. CONS .. A .. CONS .. UOPT .. "$") or -- ʾabyaḍ "white", ʾakbar "greater" rfind(ar, ELCD_START .. A .. CONS .. SH .. UOPT .. "$") or -- ʾalaff "plump", ʾaqall "fewer" rfind(ar, ELCD_START .. SK .. CONS .. AAMAQ .. "$") or -- ʾaʿmā "blind", ʾadnā "lower" rfind(ar, "^" .. AMAD .. CONS .. A .. CONS .. UOPT .. "$") -- ʾālam "more painful", ʾāḵar "other" end

if word == "?" or			(rfind(word, "^[a-z][a-z]*$") and sgtype == "?") then --if 'word' is a type, actual value inferred from sg; if sgtype is ?, --propagate it to all derived types return "", "?" end

if word == "intf" then if not is_intensive_adj(sgar) then error("Singular stem not in CACCān form: " .. sgar) end local ret = (			sub(AMAD .. N .. UOPT .. "$", AMAD, "nu?$", "") or -- ends in -ʾān			sub(AOPTA .. N .. UOPT .. "$", AMAQ, "nu?$", "") -- ends in -ān		) return ret, "inv" end

if word == "elf" then local ret = (			sub(ELCD_START .. SK .. "[" .. Y .. W .. "]" .. A .. CONSPAR .. UOPT .. "$",				"%1" .. UU .. "%2" .. AMAQ, "ʔa(.)[yw]a(.)u?", "%1ū%2ā") or -- ʾajyad			sub(ELCD_START .. SK .. CONSPAR .. A .. CONSPAR .. UOPT .. "$",				"%1" .. U .. "%2" .. SK .. "%3" .. AMAQ, "ʔa(.)(.)a(.)u?", "%1u%2%3ā") or -- ʾakbar			sub(ELCD_START .. A .. CONSPAR .. SH .. UOPT .. "$",				"%1" .. U .. "%2" .. SH .. AMAQ, "ʔa(.)a(.)%2u?", "%1u%2%2ā") or -- ʾaqall			sub(ELCD_START .. SK .. CONSPAR .. AAMAQ .. "$",				"%1" .. U .. "%2" .. SK .. Y .. ALIF, "ʔa(.)(.)ā", "%1u%2yā") or -- ʾadnā			sub("^" .. AMAD .. CONSPAR .. A .. CONSPAR .. UOPT .. "$",				HAMZA_ON_ALIF .. U .. "%1" .. SK .. "%2" .. AMAQ, "ʔā(.)a(.)u?", "ʔu%1%2ā") -- ʾālam "more painful", ʾāḵar "other"		) if not ret then error("Singular stem not an elative adjective: " .. sgar) end return ret, "inv" end

if word == "cdf" then local ret = (			sub(ELCD_START .. SK .. CONSPAR .. A .. CONSPAR .. UOPT .. "$",				"%1" .. A .. "%2" .. SK .. "%3" .. AA .. HAMZA, "ʔa(.)(.)a(.)u?", "%1a%2%3āʔ") or -- ʾaḥmar			sub(ELCD_START .. A .. CONSPAR .. SH .. UOPT .. "$",				"%1" .. A .. "%2" .. SH .. AA .. HAMZA, "ʔa(.)a(.)%2u?", "%1a%2%2āʔ") or -- ʾalaff			sub(ELCD_START .. SK .. CONSPAR .. AAMAQ .. "$",				"%1" .. A .. "%2" .. SK .. Y .. AA .. HAMZA, "ʔa(.)(.)ā", "%1a%2yāʔ") -- ʾaʿmā		) if not ret then error("Singular stem not a color/defect adjective: " .. sgar) end return ret, "cd" -- so plural will be correct end

-- Regular feminine -- add ة, possibly with stem modifications if word == "rf" then sgar = canon_hamza(sgar) if rfind(sgar, TAM .. UNUOPT .. "$") then --Don't do this or we have problems when forming singulative from --collective with a construct modifier that's feminine --error("Singular stem is already feminine: " .. sgar) return sgar .. "/" .. sgtr, "tri" end

local ret = (			sub(AN .. "[" .. ALIF .. AMAQ .. "]$", AAH, "an$", "āh") or -- ends in -an			sub(IN .. "$", IY .. AH, "in$", "iya") or -- ends in -in			sub(AOPT .. "[" .. ALIF .. AMAQ .. "]$", AAH, "ā$", "āh") or -- ends in alif or alif maqṣūra			-- We separate the ʾiʿrāb and no-ʾiʿrāb cases even though we can			-- do a single Arabic regexp to cover both because we want to			-- remove u(n) from the translit only when ʾiʿrāb is present to			-- lessen the risk of removing -un in the actual stem. We also			-- allow for cases where the ʾiʿrāb is present in Arabic but not			-- in translit.			sub(UNU .. "$", AH, "un?$", "a", "$", "a") or -- anything else + -u(n)			sub("$", AH, "$", "a") -- anything else		) return ret, "tri" end

if word == "f" then if sgtype == "cd" then return export.stem_and_type("cdf", sg, sgtype, true, "sg", pos) elseif sgtype == "el" then return export.stem_and_type("elf", sg, sgtype, true, "sg", pos) elseif sgtype =="di" and is_intensive_adj(sgar) then return export.stem_and_type("intf", sg, sgtype, true, "sg", pos) elseif sgtype == "di" and is_elcd_adj(sgar) then -- If form is elative or color-defect, we don't know which of			-- the two it is, and each has a special feminine which isn't -- the regular "just add ة", so shunt to unknown. This will -- ensure that ?'s appear in place of the inflection -- also -- for dual and plural. return export.stem_and_type("?", sg, sgtype, true, "sg", pos) else return export.stem_and_type("rf", sg, sgtype, true, "sg", pos) end end

if word == "rm" then sgar = canon_hamza(sgar) --Don't do this or we have problems when forming collective from --singulative with a construct modifier that's not feminine, --e.g. شَجَرَة التُفَّاح --if not rfind(sgar, TAM .. UNUOPT .. "$") then --	error("Singular stem is not feminine: " .. sgar) --end

local ret = (			sub(AAH .. UNUOPT .. "$", AN .. AMAQ, "ātun?$", "an", "ā[ht]$", "an") or -- in -āh			sub(IY .. AH .. UNUOPT .. "$", IN, "iyatun?$", "in", "iya$", "in") or -- ends in -iya			sub(AOPT .. TAM .. UNUOPT .. "$", "", "atun?$", "", "a$", "") or --ends in -a			sub("$", "", "$", "") -- do nothing		) return ret, "tri" end

if word == "m" then -- FIXME: handle cd (color-defect) -- FIXME: handle el (elative) -- FIXME: handle int (intensive) return export.stem_and_type("rm", sg, sgtype, false, "sg", pos) end

-- The plural used for feminine adjectives. If the singular type is	-- color/defect or it looks like a feminine color/defect adjective, -- use color/defect plural. Otherwise shunt to sound feminine plural. if word == "fp" then if sgtype == "cd" or is_feminine_cd_adj(sgar) then return export.stem_and_type("cdp", sg, sgtype, true, "pl", pos) else return export.stem_and_type("sfp", sg, sgtype, true, "pl", pos) end end

if word == "sp" then if sgtype == "cd" then return export.stem_and_type("cdp", sg, sgtype, isfem, "pl", pos) elseif isfem then return export.stem_and_type("sfp", sg, sgtype, true, "pl", pos) elseif sgtype == "an" then return export.stem_and_type("awnp", sg, sgtype, false, "pl", pos) else return export.stem_and_type("smp", sg, sgtype, false, "pl", pos) end end

-- Conservative plural, as used for masculine plural adjectives. -- If singular type is color-defect, shunt to color-defect plural; else -- shunt to unknown, so ? appears in place of the inflections. if word == "p" then if sgtype == "cd" then return export.stem_and_type("cdp", sg, sgtype, isfem, "pl", pos) else return export.stem_and_type("?", sg, sgtype, isfem, "pl", pos) end end

-- Special plural used for paucal plurals of singulatives. If ends in -ة -- (most common), use strong feminine plural; if ends with -iyy (next	-- most common), use strong masculine plural; ends default to "p" -- (conservative plural). if word == "paucp" then if rfind(sgar, TAM .. UNUOPT .. "$") then return export.stem_and_type("sfp", sg, sgtype, true, "pl", pos) elseif rfind(sgar, IY .. SH .. UNUOPT .. "$") then return export.stem_and_type("smp", sg, sgtype, false, "pl", pos) else return export.stem_and_type("p", sg, sgtype, isfem, "pl", pos) end end

if word == "d" then sgar = canon_hamza(sgar) local ret = (			sub(AN .. "[" .. ALIF .. AMAQ .. "]$", AY .. AAN, "an$", "ayān") or -- ends in -an			sub(IN .. "$", IY .. AAN, "in$", "iyān") or -- ends in -in			sgtype == "lwinv" and sub(AOPTA .. "$", AT .. AAN, "[āa]$", "atān") or -- lwinv, ends in alif; allow translit with short -a			sub(AOPT .. "[" .. ALIF .. AMAQ .. "]$", AY .. AAN, "ā$", "ayān") or -- ends in alif or alif maqṣūra			-- We separate the ʾiʿrāb and no-ʾiʿrāb cases even though we can			-- do a single Arabic regexp to cover both because we want to			-- remove u(n) from the translit only when ʾiʿrāb is present to			-- lessen the risk of removing -un in the actual stem. We also			-- allow for cases where the ʾiʿrāb is present in Arabic but not			-- in translit.			--			-- NOTE: Collapsing the "h$" and "$" cases into "h?$" doesn't work			-- in the case of words ending in -āh, which end up having the			-- translit end in -tāntān.			sub(TAM .. UNU .. "$", T .. AAN, "[ht]un?$", "tān", "h$", "tān", "$", "tān") or -- ends in tāʾ marbuṭa + -u(n)			sub(TAM .. "$", T .. AAN, "h$", "tān", "$", "tān") or -- ends in tāʾ marbuṭa			-- Same here as above			sub(UNU .. "$", AAN, "un?$", "ān", "$", "ān") or -- anything else + -u(n)			sub("$", AAN, "$", "ān") -- anything else		) return ret, "d" end

-- Strong feminine plural in -āt, possibly with stem modifications if word == "sfp" then sgar = canon_hamza(sgar) sgar = rsub(sgar, AMAD .. "(" .. TAM .. UNUOPT .. ")$", HAMZA_PH .. AA .. "%1") sgar = rsub(sgar, HAMZA_ANY .. "(" .. AOPT .. TAM .. UNUOPT .. ")$", HAMZA_PH .. "%1") local ret = (			sub(AOPTA .. TAM .. UNUOPT .. "$", AYAAT, "ā[ht]$", "ayāt", "ātun?$", "ayāt") or -- ends in -āh			sub(AOPT .. TAM .. UNUOPT .. "$", AAT, "a$", "āt", "atun?$", "āt") or -- ends in -a			sub(AN .. "[" .. ALIF .. AMAQ .. "]$", AYAAT, "an$", "ayāt") or -- ends in -an			sub(IN .. "$", IY .. AAT, "in$", "iyāt") or -- ends in -in			sgtype == "inv" and ( sub(AOPT .. "[" .. ALIF .. AMAQ .. "]$", AYAAT, "ā$", "ayāt") -- ends in alif or alif maqṣūra ) or			sgtype == "lwinv" and ( sub(AOPTA .. "$", AAT, "[āa]$", "āt") -- loanword ending in tall alif; allow translit with short -a ) or			-- We separate the ʾiʿrāb and no-ʾiʿrāb cases even though we can			-- do a single Arabic regexp to cover both because we want to			-- remove u(n) from the translit only when ʾiʿrāb is present to			-- lessen the risk of removing -un in the actual stem. We also			-- allow for cases where the ʾiʿrāb is present in Arabic but not			-- in translit.			sub(UNU .. "$", AAT, "un?$", "āt", "$", "āt") or -- anything else + -u(n)			sub("$", AAT, "$", "āt") -- anything else		) return ret, "sfp" end

if word == "smp" then sgar = canon_hamza(sgar) local ret = (			sub(IN .. "$", UUN, "in$", "ūn") or -- ends in -in			-- See comments above for why we have two cases, one for UNU and			-- one for non-UNU			sub(UNU .. "$", UUN, "un?$", "ūn", "$", "ūn") or -- anything else + -u(n)			sub("$", UUN, "$", "ūn") -- anything else		) return ret, "smp" end

-- Color/defect plural; singular must be masculine or feminine -- color/defect adjective if word == "cdp" then local ret = (			sub(ELCD_START .. SK .. W .. A .. CONSPAR .. UOPT .. "$",				"%1" .. UU .. "%2", "ʔa(.)wa(.)u?", "%1ū%2") or -- ʾaswad			sub(ELCD_START .. SK .. Y .. A .. CONSPAR .. UOPT .. "$",				"%1" .. II .. "%2", "ʔa(.)ya(.)u?", "%1ī%2") or -- ʾabyaḍ			sub(ELCD_START .. SK .. CONSPAR .. A .. CONSPAR .. UOPT .. "$",				"%1" .. U .. "%2" .. SK .. "%3", "ʔa(.)(.)a(.)u?", "%1u%2%3") or -- ʾaḥmar			sub(ELCD_START .. A .. CONSPAR .. SH .. UOPT .. "$",				"%1" .. U .. "%2" .. SH, "ʔa(.)a(.)%2u?", "%1u%2%2") or -- ʾalaff			sub(ELCD_START .. SK .. CONSPAR .. AAMAQ .. "$",				"%1" .. U .. "%2" .. Y, "ʔa(.)(.)ā", "%1u%2y") or -- ʾaʿmā			sub("^" .. CONSPAR .. A .. W .. SKOPT .. CONSPAR .. AA .. HAMZA .. UOPT .. "$", "%1" .. UU .. "%2", "(.)aw(.)āʔu?", "%1ū%2") or -- sawdāʾ			sub("^" .. CONSPAR .. A .. Y .. SKOPT .. CONSPAR .. AA .. HAMZA .. UOPT .. "$", "%1" .. II .. "%2", "(.)ay(.)āʔu?", "%1ī%2") or -- bayḍāʾ			sub("^" .. CONSPAR .. A .. CONSPAR .. SK .. CONSPAR .. AA .. HAMZA .. UOPT .. "$", "%1" .. U .. "%2" .. SK .. "%3", "(.)a(.)(.)āʔu?", "%1u%2%3") or -- ʾḥamrāʾ/ʿamyāʾ			sub("^" .. CONSPAR .. A .. CONSPAR .. SH .. AA .. HAMZA .. UOPT .. "$", "%1" .. U .. "%2" .. SH, "(.)a(.)%2āʔu?", "%1u%2%2") -- laffāʾ		) if not ret then error("For 'cdp', singular must be masculine or feminine color/defect adjective: " .. sgar) end return ret, "tri" end

if word == "awnp" then local ret = (			sub(AN .. "[" .. ALIF .. AMAQ .. "]$", AWSK .. N, "an$", "awn") -- ends in -an		) if not ret then error("For 'awnp', singular must end in -an: " .. sgar) end return ret, "awnp" end

return artr, export.detect_type(ar, isfem, num, pos) end

-- local outersep = " or " -- need LRM here so multiple Arabic plurals end up agreeing in order with -- the transliteration local outersep = LRM .. "; " local innersep = LRM .. "/"

-- Subfunction of show_form, used to implement recursively generating -- all combinations of elements from FORM and from each of the items in -- LIST_OF_MODS, both of which are either arrays of strings or arrays of -- arrays of strings, where the strings are in the form ARABIC/TRANSLIT, -- as described in show_form. TRAILING_ARTRMODS is an array of ARTRMOD -- items, each of which is a two-element array of ARMOD (Arabic) and TRMOD -- (transliteration), accumulating all of the suffixes generated so far -- in the recursion process. Each time we recur we take the last MOD item -- off of LIST_OF_MODS, separate each element in MOD into its Arabic and -- Latin parts and to each Arabic/Latin pair we add all elements in -- TRAILING_ARTRMODS, passing the newly generated list of ARTRMOD items -- down the next recursion level with the shorter LIST_OF_MODS. We end up -- returning a string to insert into the Wiki-markup table. function show_form_1(form, list_of_mods, trailing_artrmods, use_parens) if #list_of_mods == 0 then local arabicvals = {} local latinvals = {} local parenvals = {} -- Accumulate separately the Arabic and transliteration into -- ARABICVALS and LATINVALS, then concatenate each down below. -- However, if USE_PARENS, we put each transliteration directly -- after the corresponding Arabic, in parens, and put the results -- in PARENVALS, which get concatenated below. (This is used in the		-- title of the declension table.) for _, artrmod in ipairs(trailing_artrmods) do			assert(#artrmod == 2) local armod = artrmod[1] local trmod = artrmod[2] for _, subform in ipairs(form) do				local ar_span, tr_span local ar_subspan, tr_subspan local ar_subspans = {} local tr_subspans = {} if type(subform) ~= "table" then subform = {subform} end for _, subsubform in ipairs(subform) do					local arabic, translit = split_arabic_tr(subsubform) if arabic == "-" then ar_subspan = "&mdash;" tr_subspan = "&mdash;" else tr_subspan = (rfind(translit, BOGUS_CHAR) or rfind(trmod, BOGUS_CHAR)) and "?" or require("Module:script utilities").tag_translit(translit .. trmod, lang, "default", 'style="color: #888;"') -- implement elision of al- after vowel tr_subspan = rsub(tr_subspan, "([aeiouāēīōū][ %-])a([sšṣtṯṭdḏḍzžẓnrḷl]%-)", "%1%2") tr_subspan = rsub(tr_subspan, "([aeiouāēīōū][ %-])a(llāh)", "%1%2")

ar_subspan = m_links.full_link({lang = lang, term = arabic .. armod, tr = "-"}) end

insert_if_not(ar_subspans, ar_subspan) insert_if_not(tr_subspans, tr_subspan) end

ar_span = table.concat(ar_subspans, innersep) tr_span = table.concat(tr_subspans, innersep)

if use_parens then table.insert(parenvals, ar_span .. " (" .. tr_span .. ")")				else table.insert(arabicvals, ar_span) table.insert(latinvals, tr_span) end end end

if use_parens then return table.concat(parenvals, outersep) else local arabic_span = table.concat(arabicvals, outersep) local latin_span = table.concat(latinvals, outersep) return arabic_span .. " " .. latin_span end else local last_mods = table.remove(list_of_mods) local artrmods = {} for _, mod in ipairs(last_mods) do			if type(mod) ~= "table" then mod = {mod} end for _, submod in ipairs(mod) do				local armod, trmod = split_arabic_tr(submod) -- If the value is -, we need to create a blank entry -- rather than skipping it; if we have no entries at any -- level, then there will be no overall entries at all -- because the inside of the loop at the next level will -- never be executed. if armod == "-" then armod = "" trmod = "" end if armod ~= "" then armod = ' ' .. armod end if trmod ~= "" then trmod = ' ' .. trmod end for _, trailing_artrmod in ipairs(trailing_artrmods) do					local trailing_armod = trailing_artrmod[1] local trailing_trmod = trailing_artrmod[2] armod = armod .. trailing_armod trmod = trmod .. trailing_trmod artrmod = {armod, trmod} table.insert(artrmods, artrmod) end end end return show_form_1(form, list_of_mods, artrmods, use_parens) end end

-- Generate a string to substitute into a particular form in a Wiki-markup -- table. FORM is the set of inflected forms corresponding to the base, -- either an array of strings (referring e.g. to different possible plurals) -- or an array of arrays of strings (the first level referring e.g. to -- different possible plurals and the inner level referring typically to -- hamza-spelling variants). LIST_OF_MODS is an array of MODS elements, one -- per modifier. Each MODS element is the set of inflected forms corresponding -- to the modifier and is of the same form as FORM, i.e. an array of strings -- or an array of arrays of strings. Each string is typically of the form -- "ARABIC/TRANSLIT", i.e. an Arabic string and a Latin string separated -- by a slash. We loop over all possible combinations of elements from -- each array; this requires recursion. function show_form(form, list_of_mods, use_parens) if not form then return "&mdash;" elseif type(form) ~= "table" then error("a non-table value was given in the list of inflected forms.") end if #form == 0 then return "&mdash;" end

-- We need to start the recursion with the third parameter containing -- one blank element rather than no elements, otherwise no elements -- will be propagated to the next recursion level. return show_form_1(form, list_of_mods,, use_parens) end

-- Create a Wiki-markup table using the values in DATA and the template in -- WIKICODE. function make_table(data, wikicode) -- Function used as replace arg of call to rsub. Replace the -- specified param with its (HTML) value. The param references appear -- as in the wikicode. local function repl(param) if param == "pos" then return data.pos elseif param == "info" then return data.title and " (" .. data.title .. ")" or "" elseif rfind(param, "type$") then return table.concat(data.forms[param] or {"&mdash;"}, outersep) else local list_of_mods = {} for _, mod in ipairs(mod_list) do local mods = data.forms[mod .. "_" .. param] if not mods or #mods == 0 then -- We need one blank element rather than no element, -- otherwise no elements will be propagated from one -- recursion level to the next. mods = {""} end table.insert(list_of_mods, mods) end return show_form(data.forms[param], list_of_mods, param == "lemma") end end -- For states not in the list of those to be displayed, clear out the -- corresponding inflections so they appear as a dash. for _, state in ipairs(data.allstates) do		if not contains(data.states, state) then for _, numgen in ipairs(data.numgens) do				for _, case in ipairs(data.allcases) do data.forms[case .. "_" .. numgen .. "_" .. state] = {} end end end end

return rsub(wikicode, "", repl) .. m_utilities.format_categories(data.categories, lang) end

-- Generate part of the noun table for a given number spec NUM (e.g. sg) function generate_noun_num(num) return [=[! style="background: #CDCDCD;" | Indefinite ! style="background: #CDCDCD;" | Definite ! style="background: #CDCDCD;" | Construct ! style="background: #EFEFEF;" | Informal ! style="background: #EFEFEF;" | Nominative ! style="background: #EFEFEF;" | Accusative ! style="background: #EFEFEF;" | Genitive ]=] end

-- Make the noun table function make_noun_table(data) local wikicode = [=[ Declension of  {| class="inflection-table" style="border-width: 1px; border-collapse: collapse; background:#F9F9F9; text-align:center; width:100%;" ]=]

for _, num in ipairs(data.numbers) do		if num == "du" then wikicode = wikicode .. [=[|- ! style="background: #CDCDCD;" | Dual ]=] .. generate_noun_num("du") else wikicode = wikicode .. [=[|- ! style="background: #CDCDCD;" rowspan=2 | ]=] .. data.engnumberscap[num] .. "\n" .. [=[ ! style="background: #CDCDCD;" colspan=3 | ]=] .. generate_noun_num(num) end end wikicode = wikicode .. [=[|} ]=]

return make_table(data, wikicode) end

-- Generate part of the gendered-noun table for a given numgen spec -- NUM (e.g. m_sg) function generate_gendered_noun_num(num) return [=[|- ! style="background: #CDCDCD;" | Indefinite ! style="background: #CDCDCD;" | Definite ! style="background: #CDCDCD;" | Construct ! style="background: #CDCDCD;" | Indefinite ! style="background: #CDCDCD;" | Definite ! style="background: #CDCDCD;" | Construct ! style="background: #EFEFEF;" | Informal ! style="background: #EFEFEF;" | Nominative ! style="background: #EFEFEF;" | Accusative ! style="background: #EFEFEF;" | Genitive ]=] end

-- Make the gendered noun table function make_gendered_noun_table(data) local wikicode = [=[ Declension of  {| class="inflection-table" style="border-width: 1px; border-collapse: collapse; background:#F9F9F9; text-align:center; width:100%;" ]=]

for _, num in ipairs(data.numbers) do		if num == "du" then wikicode = wikicode .. [=[|- ! style="background: #CDCDCD;" rowspan=2 | Dual ! style="background: #CDCDCD;" colspan=3 | Masculine ! style="background: #CDCDCD;" colspan=3 | Feminine ]=] .. generate_gendered_noun_num("du") else wikicode = wikicode .. [=[|- ! style="background: #CDCDCD;" rowspan=3 | ]=] .. data.engnumberscap[num] .. "\n" .. [=[ ! style="background: #CDCDCD;" colspan=3 | Masculine ! style="background: #CDCDCD;" colspan=3 | Feminine ! style="background: #CDCDCD;" colspan=3 | ! style="background: #CDCDCD;" colspan=3 | ]=] .. generate_gendered_noun_num(num) end end wikicode = wikicode .. [=[|} ]=]

return make_table(data, wikicode) end

-- Generate part of the adjective table for a given numgen spec NUM (e.g. m_sg) function generate_adj_num(num) return [=[|- ! style="background: #CDCDCD;" | Indefinite ! style="background: #CDCDCD;" | Definite ! style="background: #CDCDCD;" | Indefinite ! style="background: #CDCDCD;" | Definite ! style="background: #EFEFEF;" | Informal ! style="background: #EFEFEF;" | Nominative ! style="background: #EFEFEF;" | Accusative ! style="background: #EFEFEF;" | Genitive ]=] end

-- Make the adjective table function make_adj_table(data) local wikicode = [=[ Declension of  {| class="inflection-table" style="border-width: 1px; border-collapse: collapse; background:#F9F9F9; text-align:center; width:100%;" ]=]

if contains(data.numbers, "sg") then wikicode = wikicode .. [=[|- ! style="background: #CDCDCD;" rowspan=3 | Singular ! style="background: #CDCDCD;" colspan=2 | Masculine ! style="background: #CDCDCD;" colspan=2 | Feminine ! style="background: #CDCDCD;" colspan=2 | ! style="background: #CDCDCD;" colspan=2 | ]=] .. generate_adj_num("sg") end if contains(data.numbers, "du") then wikicode = wikicode .. [=[|- ! style="background: #CDCDCD;" rowspan=2 | Dual ! style="background: #CDCDCD;" colspan=2 | Masculine ! style="background: #CDCDCD;" colspan=2 | Feminine ]=] .. generate_adj_num("du") end if contains(data.numbers, "pl") then wikicode = wikicode .. [=[|- ! style="background: #CDCDCD;" rowspan=3 | Plural ! style="background: #CDCDCD;" colspan=2 | Masculine ! style="background: #CDCDCD;" colspan=2 | Feminine ! style="background: #CDCDCD;" colspan=2 | ! style="background: #CDCDCD;" colspan=2 | ]=] .. generate_adj_num("pl") end wikicode = wikicode .. [=[|} ]=]

return make_table(data, wikicode) end

return export

-- For Vim, so we get 4-space tabs -- vim: set ts=4 sw=4 noet: