Module:ang-common

--[=[

Common utilities and definitions used by various Old English modules.

Author: Benwing ]=]

local m_table = require("Module:table")

local u = require("Module:string/char") local rsubn = mw.ustring.gsub

-- version of rsubn that discards all but the first return value local function rsub(term, foo, bar, n)	local retval = rsubn(term, foo, bar, n)	return retval end

local export = {}

export.ACUTE = u(0x0301) export.GRAVE = u(0x0300) export.CFLEX = u(0x0302) export.MACRON = u(0x0304) export.DOTABOVE = u(0x0307) export.SYLLABIC = u(0x0329) export.CEDILLA = u(0x0327) export.DOUBLE_BREVE_BELOW = u(0x035C)

local accent = export.MACRON .. export.ACUTE .. export.GRAVE .. export.CFLEX

local recomposer = { ["g" .. export.DOTABOVE] = "ġ", ["G" .. export.DOTABOVE] = "Ġ", ["c" .. export.DOTABOVE] = "ċ", ["C" .. export.DOTABOVE] = "Ċ", -- used in "explicit allophone" notation in Module:ang-pron ["c" .. export.CEDILLA] = "ç", ["C" .. export.CEDILLA] = "Ç", }

-- Decompose macron, acute, grave, circumflex, but leave alone ġ, ċ and uppercase equiv function export.decompose(text) text = mw.ustring.toNFD(text) text = rsub(text, ".[" .. export.DOTABOVE .. "]", recomposer) return text end

-- We use the following syllable-splitting algorithm. -- (1) A single consonant goes with the following syllable. -- (2) Two consonants are split down the middle. -- (3) For three or more consonants, check for clusters ending in --    onsets_3 then onsets_2, with at least one preceding consonant. --    If so, split between the onset and the preceding consonant(s). -- (4) Check similarly for secondary_onsets_2. If seen, then check --    the preceding consonant; if it's not an l or r, split before --    the onset. -- (5) Otherwise, split before the last consonant (i.e. the last --    consonant goes with the following syllable, and all preceding --     consonants go with the preceding syllable). export.onsets_2 = m_table.listToSet({	"pr", "pl",	"br", "bl",	"tr", "tw",	"dr", "dw",	"cr", "cl", "cw", --skip "cn"	"kr", "kl", "kw", --skip "kn"	"gr", "gl", -- skip "gn"	"sm", "sn", "sl", "sw",	"sp",	"st",	"sc", "sk", "sċ",	"fr", "fl", --skip "fn",	"þr", "þw",	"ðr", "ðw",	"hr", "hl", "hw", -- skip "hn"	"wr", "wl", })

export.secondary_onsets_2 = m_table.listToSet({	"cn", "kn",	"gn",	"fn",	"hn", })

export.onsets_3 = m_table.listToSet({	"spr", "spl",	"str",	"scr", "skr", "sċr", })

export.diphthongs = m_table.listToSet({	"ea", export.decompose("ēa"), export.decompose("eā"),	"eo", export.decompose("ēo"), export.decompose("eō"),	"io", export.decompose("īo"), export.decompose("iō"),	"ie", export.decompose("īe"), export.decompose("iē"), })

export.prefixes = { {export.decompose("ā"), {verb = "unstressed", noun = "stressed"}}, {"æt", {verb = "unstressed"}}, {"æfter", {verb = "secstressed", noun = "stressed"}}, -- not very common {"and", {verb = "unstressed", noun = "stressed"}}, {"an", {verb = "unstressed", noun = "stressed"}}, {"be", {verb = "unstressed", noun = "unstressed", restriction = "^[^" .. accent .. "ao]"}}, {export.decompose("bī"), {noun = "stressed"}}, {"ed", {verb = "unstressed", noun = "stressed"}}, -- not very common {"fore", {verb = "unstressed", noun = "stressed", restriction = "^[^" .. accent .. "ao]"}}, {"for[þð]", {verb = "unstressed", noun = "stressed"}}, {"for", {verb = "unstressed", noun = "unstressed"}}, {"fram", {verb = "unstressed", noun = "stressed"}}, -- not very common -- following is rare as a noun, mostly from verbal forms {"ġeond", {verb = "unstressed"}}, {"ġe", {verb = "unstressed", noun = "unstressed", restriction = "^[^" .. accent .. "ao]"}}, {"in", {verb = "unstressed", noun = "stressed"}}, -- not very common {"mis", {verb = "unstressed"}}, {"ofer", {verb = "secstressed", noun = "stressed"}}, {"of", {verb = "unstressed", noun = "stressed"}}, {"on", {verb = "unstressed", noun = "stressed"}}, {"or", {noun = "stressed"}}, {"o[þð]", {verb = "unstressed"}}, {export.decompose("stēop"), {noun = "stressed"}}, {export.decompose("tō"), {verb = "unstressed", noun = "stressed"}}, {"under", {verb = "secstressed", noun = "stressed"}}, {"un", {verb = "unstressed", noun = "stressed", verbal = "stressed"}}, -- uncommon as verb {"up", {verb = "unstressed", noun = "stressed"}}, {export.decompose("ūt"), {verb = "unstressed", noun = "stressed"}}, {export.decompose("ū[þð]"), {noun = "stressed"}}, {"[wƿ]i[þð]er", {verb = "secstressed", noun = "stressed"}}, {"[wƿ]i[þð]", {verb = "unstressed"}}, {"ymb", {verb = "unstressed", noun = "stressed"}}, {"[þð]urh", {verb = "unstressed", noun = "stressed"}}, }

export.suffixes = { {export.decompose("bǣre"), {noun = "secstressed"}}, {"fæst", {noun = "secstressed"}}, {"feald", {noun = "secstressed"}}, {"full?", {noun = "unstressed"}}, {export.decompose("lēas"), {noun = "secstressed"}}, -- These can be "verbal" if following a verbal past participle or similar {export.decompose("līċe"), {noun = "secstressed", verb = "secstressed"}}, -- ī is decomposed into two chars so can't combine into [īi] {export.decompose("li[ċc]"), {noun = "unstressed", verb = "unstressed"}}, {export.decompose("lī[ċc]"), {noun = "unstressed", verb = "unstressed"}}, {"n[eiy]ss?", {noun = "unstressed", verb = "unstressed"}}, {"sum", {noun = "unstressed"}}, }

return export