Module:User:Φρύδια/pt-data

local PTComposer = require('Module:User:Φρύδια/logic'):new { paradigms = { {			{ 'IND', 'SBJ', 'IMP' }, { 'PRS', 'PST', 'FUT' }, { 'NPRF', 'PRF' } },		{			{ 'INF' }, { 'PERS', 'IMPERS' } },		{			{ 'PTCP' }, { 'M', 'F' } },		{			{ 'GER' } }	},	constraints = { PST = { true, { 'IND', 'SBJ' } }, FUT = { true, { 'IND', 'SBJ' } }, PRF = { true, { 'IND' } } },	endings_map = { IND_PRS_NPRF = '1|1', SBJ_PRS_NPRF = '1|1', IND_PRS_PRF = '1|2', IND_FUT_NPRF = '1|3', IMP_PRS_NPRF = '1|4', SBJ_FUT_NPRF = '1|51', INF_PERS    = '1|51', IND_PST     = '1|61', IND_FUT_PRF = '1|61', SBJ_PST_NPRF = '1|71', PTCP        = '2|8', INF_IMPERS  = '', GER         = '' },	endings_refs = { { '1SG', '2SG', '3SG', '1PL', '2PL', '3PL' }, { 'SG', 'PL' } },	endings = { { ,  's',   ,  'mos',  's',    'm'   }, { 'i', 'ste', 'u', 'mos',  'stes', 'ram' }, { 'ei', 'ás', 'á', 'emos', 'eis',  'ão'  }, { nil, ,    ,   nil,   ,         }, { ,  'e',   ,  '',     'de',   'e'   }, { 'a', 'a',   'a', 'a',    'ei',   'a'   }, { 'e', 'e',   'e', 'e',    'ei',   'e'   }, { '',  's' } } }

local BASE_DATA = { stm_IND_FUT_NPRF         = '@inf', stm_IND_FUT_PRF          = '@stm_IND_FUT_NPRF', stm_IND_PST_PRF          = '@stm_IND_PRS_PRF_2SG', stm_SBJ_PST_NPRF         = '@stm_IND_PRS_PRF_2SG', stm_SBJ_FUT_NPRF         = '@stm_IND_PRS_PRF_2SG', stm_IND_PRS_NPRF_1SG     = '@stm_SBJ_PRS_NPRF_2SG', stm_IND_PRS_NPRF_3SG     = '@stm_IND_PRS_NPRF_2SG', stm_IND_PRS_NPRF_3PL     = '@stm_IND_PRS_NPRF_2SG', stm_SBJ_PRS_NPRF_3SG     = '@stm_SBJ_PRS_NPRF_2SG', stm_SBJ_PRS_NPRF_3PL     = '@stm_SBJ_PRS_NPRF_2SG', stm_IMP_PRS_NPRF_2PL     = '@stm_IND_PRS_NPRF_2PL', vwl_IND_PRS_NPRF_1SG     = 'o', vwl_IND_FUT_NPRF         = '', vwl_IND_FUT_PRF          = 'i', vwl_IND_PST_PRF          = '@vwl_IND_PRS_PRF_2SG', vwl_SBJ_PST_NPRF         = '@vwl_IND_PRS_PRF_2SG', vwl_SBJ_FUT_NPRF         = '@vwl_IND_PRS_PRF_2SG', vwl_IND_PRS_NPRF_3SG     = '@vwl_IND_PRS_NPRF_2SG', vwl_IND_PRS_NPRF_3PL     = '@vwl_IND_PRS_NPRF_2SG', vwl_SBJ_PRS_NPRF_3SG     = '@vwl_SBJ_PRS_NPRF_2SG', vwl_SBJ_PRS_NPRF_3PL     = '@vwl_SBJ_PRS_NPRF_2SG', vwl_IMP_PRS_NPRF_2PL     = '@vwl_IND_PRS_NPRF_2PL', affx_INF                 = 'r', affx_GER                 = 'ndo', affx_PTCP                = 'd', affx_PTCP_M              = '&o', affx_PTCP_F              = '&a', affx_IND_PST_PRF         = 'r', affx_SBJ_FUT_NPRF        = 'r', affx_SBJ_PST_NPRF        = 'ss', affx_PRS_NPRF_2PL        = 'i', affx_IMP_PRS_NPRF_2PL    = '@affx_IND_PRS_NPRF_2PL', IMP_PRS_NPRF_2SG         = '@IND_PRS_NPRF_3SG', IMP_PRS_NPRF_3SG         = '@SBJ_PRS_NPRF_3SG', IMP_PRS_NPRF_3PL         = '@SBJ_PRS_NPRF_3PL' }

local CLASS_DATA = { ar = { vwl_SBJ_PRS_NPRF     = 'e', vwl_IND_PRS_PRF_1SG  = 'e', vwl_IND_PRS_PRF_3SG  = 'o', vwl_IND_PRS_PRF_1PL  = '{á:a}', affx_IND_PST_NPRF    = 'v'	}, er = { vwl_SBJ_PRS_NPRF     = 'a', vwl_IND_PST_NPRF     = 'i', vwl_PTCP             = 'i', vwl_IND_PRS_PRF_1SG  = 'i'	}, ir = { vwl_SBJ_PRS_NPRF     = 'a', vwl_IND_PRS_NPRF_2SG = 'e', affx_IND_PRS_NPRF_2PL = '' },

air = { stm_SBJ_PRS_NPRF     = 'ai', vwl_IND_PRS_NPRF_2SG = 'i', vwl_IND_PRS_NPRF_3PL = 'e'	}, uir = { vwl_IND_PRS_NPRF_2SG = 'i', vwl_IND_PRS_NPRF_3PL = 'e'	}, oer = { stm_IND_PRS_NPRF_2SG = 'ó', stm_IND_PRS_NPRF_3PL = 'o', vwl_IND_PRS_NPRF_2SG = 'i', vwl_IND_PRS_NPRF_3PL = 'e'	}, ear = { stm_PRS_NPRF_2SG     = 'ei', stm_SBJ_PRS_NPRF_1SG = 'ei' } }

local LEMMA_DATA = { ser = { IND_PRS_NPRF_1SG     = 'sou', IND_PRS_NPRF_3PL     = 'são', IND_PRS_PRF_1SG      = 'fui', IND_PRS_PRF_3SG      = 'foi', IMP_PRS_NPRF_2SG     = 'sê', IMP_PRS_NPRF_2PL     = 'sede',

stm_IND_PRS_PRF      = 'f', stm_IND_PST_NPRF     = '', stm_SBJ_PRS_NPRF     = 'sej', stm_IND_PRS_NPRF_2SG = '', vwl                  = 'o', vwl_IND_PST_NPRF     = 'e', vwl_IND_PRS_NPRF_2SG = 'é', affx_IND_PST_NPRF    = 'r'	}, ir = { IND_PRS_NPRF_1SG     = 'vou', IND_PRS_NPRF_1PL     = 'vamos', SBJ_PRS_NPRF_1PL     = 'vamos', SBJ_PRS_NPRF_2PL     = 'vades', IND_PRS_NPRF_3PL     = 'vão', SBJ_PRS_NPRF_3PL     = 'vão', IND_PRS_PRF_1SG      = 'fui', IND_PRS_PRF_3SG      = 'foi',

stm_IND_PRS_NPRF     = 'va', stm_IND_PRS_NPRF_2PL = '', stm_SBJ_PRS_NPRF     = 'v', stm_IND_PRS_PRF      = 'f',

vwl_IND_PRS_NPRF     = 'i', vwl_SBJ_PRS_NPRF     = 'á', vwl_IND_PRS_PRF      = 'o',

affx_IND_PRS_NPRF_2PL = 'de' },

dar = { IND_PRS_NPRF_1SG     = 'dou', IND_PRS_NPRF_3PL     = 'dão', SBJ_PRS_NPRF_1PL     = '{dêmos:demos}', SBJ_PRS_NPRF_3PL     = '/deem|dêem\\', IND_PRS_PRF_1SG      = 'dei', IND_PRS_PRF_3SG      = 'deu', vwl_IND_PRS_NPRF_2SG = 'á', vwl_SBJ_PRS_NPRF     = 'e', vwl_SBJ_PRS_NPRF_2SG = 'ê', vwl_IND_PRS_PRF      = 'e'	}, estar = { IND_PRS_NPRF_1SG     = 'estou', IND_PRS_NPRF_3PL     = 'estão', IND_PRS_PRF_3SG      = 'esteve', stm_IND_PRS_PRF      = 'estiv', vwl_SBJ_PRS_NPRF     = 'a', vwl_IND_PRS_PRF      = 'e', vwl_IND_PRS_NPRF_2SG = 'á', affx_SBJ_PRS_NPRF    = 'ja' },	por = { IND_PRS_PRF_1SG      = 'pus', IND_PRS_PRF_3SG      = 'pôs',

stm                  = 'p', stm_IND_PRS_PRF      = 'pus', stm_SBJ_PRS_NPRF     = 'ponh', vwl_IND_PRS_NPRF_2SG = 'õe', --is this a bad idea? vwl_IND_PRS_PRF      = 'e', vwl_SBJ_PRS_NPRF     = 'a', vwl_IND_PST_NPRF     = 'u',

affx_IND_PRS_NPRF_2PL = 'nde', affx_IND_PST_NPRF    = 'nh', affx_PTCP            = 'st' },	vir = { IND_PRS_NPRF_2SG     = 'vens', IND_PRS_NPRF_3SG     = 'vem', IND_PRS_NPRF_3PL     = 'vêm', IND_PRS_PRF_1SG      = 'vim', IND_PRS_PRF_3SG      = 'veio', stm_SBJ_PRS_NPRF     = 'venh', stm_IND_PRS_PRF      = 'vi', vwl_IND_PRS_PRF      = 'e', affx_IND_PRS_NPRF_2PL = 'nde', affx_IND_PST_NPRF    = 'nh', affx_PTCP            = 'nd' },	ter = { IND_PRS_NPRF_2SG     = 'tens', IND_PRS_NPRF_3SG     = 'tem', IND_PRS_NPRF_3PL     = 'têm', IND_PRS_PRF_1SG      = 'tive', IND_PRS_PRF_3SG      = 'teve', stm_SBJ_PRS_NPRF     = 'tenh', stm_IND_PRS_PRF      = 'tiv', affx_IND_PRS_NPRF_2PL = 'nde', affx_IND_PST_NPRF    = 'nh' },	fazer = { IND_PRS_NPRF_3SG     = 'faz', IND_PRS_PRF_1SG      = 'fiz', IND_PRS_PRF_3SG      = 'fez', stm_SBJ_PRS_NPRF     = 'faç', stm_IND_PRS_PRF      = 'fiz', stm_IND_FUT_NPRF     = 'far', stm_PTCP             = 'f', vwl_PTCP             = 'ei', --is this a bad idea? affx_PTCP            = 't'	}, dizer = { IND_PRS_NPRF_3SG     = 'diz', stm_SBJ_PRS_NPRF     = 'dig', stm_IND_PRS_PRF      = 'diss', stm_IND_FUT_NPRF     = 'dir', stm_PTCP             = 'd', vwl_IND_PRS_PRF_1SG  = 'e', affx_PTCP            = 't'	}, trazer = { IND_PRS_NPRF_3SG     = 'traz', stm_SBJ_PRS_NPRF     = 'trag', stm_IND_PRS_PRF      = 'troux', stm_IND_FUT_NPRF     = 'trar', vwl_IND_PRS_PRF_1SG  = 'e'	}, prazer = { IND_PRS_NPRF_3SG     = 'praz', stm_IND_PRS_PRF      = 'prouv', vwl_IND_PRS_PRF_1SG  = 'e'	}, jazer = { vwl_IND_PRS_NPRF_3SG = '' },

aver = { stm_IND_PRS_PRF      = 'ouv', vwl_IND_PRS_PRF_1SG  = 'e'	}, haver = { IND_PRS_NPRF_1SG     = 'hei', IND_PRS_NPRF_2SG     = 'hás', IND_PRS_NPRF_3SG     = 'há', IND_PRS_NPRF_3PL     = 'hão', stm_SBJ_PRS_NPRF     = 'haj', stm_IND_PRS_PRF      = 'houv', vwl_IND_PRS_PRF_1SG  = 'e'	}, saber = { IND_PRS_NPRF_1SG     = 'sei', stm_SBJ_PRS_NPRF     = 'saib', stm_IND_PRS_PRF      = 'soub', vwl_IND_PRS_PRF_1SG  = 'e'	}, caber = { stm_SBJ_PRS_NPRF     = 'caib', stm_IND_PRS_PRF      = 'coub', vwl_IND_PRS_PRF_1SG  = 'e'	},

ver = { IND_PRS_NPRF_3PL     = 'veem', IND_PRS_PRF_3SG      = 'viu', stm_SBJ_PRS_NPRF     = 'vej', vwl_IND_PRS_NPRF_2SG = 'ê', vwl_IND_PRS_PRF      = 'i', affx_IND_PRS_NPRF_2PL = 'de', affx_PTCP            = 'st' },	crer = { stm_SBJ_PRS_NPRF     = '&ei', vwl_IND_PRS_NPRF_2SG = 'ê', vwl_IND_PRS_NPRF_3PL = 'ee', --is this a bad idea? affx_IND_PRS_NPRF_2PL = 'de' },	rir = { IND_PRS_NPRF_3PL     = 'riem', stm_SBJ_PRS_NPRF     = 'ri', vwl_IND_PRS_NPRF_2SG = 'i', affx_IND_PRS_NPRF_2PL = 'de' },	querer = { IND_PRS_NPRF_1SG     = 'quero', stm_SBJ_PRS_NPRF     = 'queir', stm_IND_PRS_PRF      = 'quis', vwl_IND_PRS_PRF_1SG  = '', vwl_IND_PRS_3SG      = '' },	requerer = { IND_PRS_NPRF_3SG     = 'requer', stm_SBJ_PRS_NPRF     = 'requeir' },	poder = { IND_PRS_PRF_3SG      = 'pôde', stm_SBJ_PRS_NPRF     = 'poss', stm_IND_PRS_PRF      = 'pud', vwl_IND_PRS_PRF_1SG  = 'e'	}, ouvir = { stm_SBJ_PRS_NPRF = '' }, medir = { stm_SBJ_PRS_NPRF = 'meç'  }, pedir = { stm_SBJ_PRS_NPRF = 'peç'  }, perder = { stm_SBJ_PRS_NPRF = 'perc' }, valer = { stm_SBJ_PRS_NPRF = 'valh' }, abrir  = { stm_PTCP = 'ab',  vwl_PTCP = 'e', affx_PTCP = 'rt' }, cobrir = { stm_PTCP = 'cob', vwl_PTCP = 'e', affx_PTCP = 'rt' }, screver = { stm_PTCP = 'scr',                affx_PTCP =  't' } }

local changing_borders_forms = { SBJ_PRS_NPRF =    { [-1] = -1, [1] = 1 },	IND_PRS_NPRF_1SG = { [-1] =  0 }, IND_PRS_PRF_1SG = { [-1] = -1 } } local changing_borders = { { 'qu', 'c', 'ç' }, { 'gu', 'g', 'j' } } local function write_contact_assimilation_data(stm, vwl, data) local dir = (vwl == 'a') and -1 or 1 for _, grphm_seq in ipairs(changing_borders) do		for i, grphm in ipairs(grphm_seq) do			if grphm_seq[i+dir] and not (grphm == 'j' and dir == -1) then for id, shifts in pairs(changing_borders_forms) do					local shift = shifts[dir] if shift then local full_id = 'stm_'..id						local old_stm = data[full_id] or stm if old_stm:find(grphm..'$') then data[full_id] = old_stm:gsub(grphm..'$', grphm_seq[i+shift]) do return end end end end end end end end

local stem_change_map = { _e = { 'frig' }, _o = { 'u[pbml]$', '^s?acud$', 'fug', 'cusp' }, i_ = { 'e[rl][^rlaeiou]?$', 'en?[ptkfschx]+ir$', 'segu' }, u_ = { 'dorm', 'cobr', 'engol', 'toss', 'plod' }, ii = { 'denegr', 'ven', 'gred' }, uu = { 'pol', 'sort' } } local function write_stem_change_data(stm, prfx, data) if (stm == 'sum' and (prfx == 'as' or prfx:find '^p?re$')) or		stm:sub(-2) == 'es' then do return end end for chg, tb in pairs(stem_change_map) do		for _, str in ipairs(tb) do			if (str:sub(-1) == '$' and stm:find(str)) or stm == str then local ids = { } if chg:sub(1, 1) ~= '_' then table.insert(ids, 'SBJ_PRS_NPRF') end if chg:sub(2, 2) ~= '_' then table.insert(ids, 'IND_PRS_NPRF_2SG') end local new_stm = stm:gsub('[eiou](..?)$', chg:match('%a')..'%1') for _, id in ipairs(ids) do data['stm_'..id] = new_stm end do return end end end end end

local DEFECTIVITY_PATTERNS = { [1] = { 'IND_PRS_NPRF_1SG', 'SBJ_PRS_NPRF' }, [2] = { 'IND_PRS_NPRF_[12]SG', 'IND_PRS_NPRF_3', 'SBJ_PRS_NPRF' }, [3] = { '[12]' } } local DEFECTIVE_VERBS = { ar = { { }, { }, { 'grass' } }, er = { { 'ra', 'so' }, { 'precav', 'reav' }, { 'do', 'lat', 'praz' } }, ir = { { 'abol', 'aborr', 'adur', 'brand', 'branqu', 'bu', 'carp', 'color', 'demol', 'demulc', 'emol', 'entangu', 'frem', 'langu', 'comed', 'mon', 'moqu', 'munqu', 'prem', 'pu', 'rebol', 'ru', 'torqu', 'vag' }, { 'ad', 'aduc', 'aguerr', 'as', 'combal', 'cond', 'conqu', 'del', 'emba', 'empedern', 'esbafor', 'espavor', 'estres', 'fal', 'flor', 'forn', 'freten', 'garr', 'inan', 'len', 'manuten', 'rem', 'renh', 'ressequ', 'suqu', 'susqu', 'trans' }, { 'concern', 'preclud', 'preclu' } } } local function get_defective_forms(inf, data) local cl = inf:sub(-2) for i, stms in ipairs(DEFECTIVE_VERBS[cl]) do		for _, stm in ipairs(stms) do			if inf:lower == stm..cl then return DEFECTIVITY_PATTERNS[i] end end end end

local CLASS_DATA_REDIRECTS = { odiar = CLASS_DATA.ear, mediar = CLASS_DATA.ear, ansiar = CLASS_DATA.ear, incendiar = CLASS_DATA.ear } local LEMMA_DATA_REDIRECTS = { ler = LEMMA_DATA.crer, rer = LEMMA_DATA.crer, luzir = LEMMA_DATA.jazer, duzir = LEMMA_DATA.jazer } local defective_forms local irreg_PRS_PRF = false function PTComposer:make_tables(lemma) local _inf = lemma:gsub('ô', 'o'):gsub('ü', 'U'):gsub('ï', 'I') defective_forms = get_defective_forms(_inf) or { } local cl, ext_cl = _inf:sub(-2), _inf:sub(-3) setmetatable(CLASS_DATA, { __index = CLASS_DATA_REDIRECTS }) local class_data = CLASS_DATA[cl] local extns_data if not lemma:find '[gq]uir$' then ext_cl = ext_cl:lower extns_data = CLASS_DATA[ext_cl == 'iar' and _inf or ext_cl] end if extns_data then self.prefix = self.prefix.._inf:sub(1, -4) _inf = ext_cl end local _stm, _vwl = _inf:sub(1, -3), _inf:sub(-2, -2) local init_data = { inf = _inf, stm = _stm, vwl = _vwl, affx = '' } if cl == 'ir' then write_stem_change_data(_stm, self.prefix, init_data) end write_contact_assimilation_data(_stm, _vwl, init_data) setmetatable(LEMMA_DATA, { __index = LEMMA_DATA_REDIRECTS }) local lemma_data = LEMMA_DATA[_inf] if lemma_data then if self.prefix ~= '' and (_inf == 'ter' or _inf == 'vir') then lemma_data.IND_PRS_NPRF_2SG = _stm..'éns' lemma_data.IND_PRS_NPRF_3SG = _stm..'ém' end for k in pairs(lemma_data) do			if k:find 'IND_PRS_PRF' then irreg_PRS_PRF = true break end end end return { init_data, BASE_DATA, class_data, extns_data, lemma_data } end

local diacritics_map = { a = 'á', e = 'ê', i = 'í', o = 'ô', u = 'ú' } function PTComposer:assemble_form(stm, vwl, affx, endg, pdgm, ref) for _, str in ipairs(defective_forms) do		if (pdgm..'_'..ref):find(str) then do return end end end local i_collision = vwl..affx..endg == 'ii' local rhizt_PRF = irreg_PRS_PRF and pdgm == 'IND_PRS_PRF' and ref:find '[13]SG' if not (i_collision or rhizt_PRF) then affx = affx..endg end if pdgm:find 'PRS_NPRF' and not ref:find '[12]PL' then local function shift_stress(vf) return vf:gsub('%u', function(c)				return diacritics_map[c:lower] end) end self.prefix = shift_stress(self.prefix) stm = shift_stress(stm) else self.prefix = self.prefix:lower stm = stm:lower local _affx = affx:gsub('s', '') local affx_sub, sub_count = _affx:gsub('[^aeiou]?[aeio]m?', '') local ext_stm = self.prefix..stm local diaeresis = vwl == 'i' and (ext_stm:find '[aeio]$' or ext_stm:find '[^gq]u$') if sub_count > 1 or (diaeresis and affx_sub == '') then vwl = (irreg_PRS_PRF and vwl == 'e' and 'é') or (diacritics_map[vwl] or vwl) end end local form = self.prefix..stm..vwl..affx if form == 'por' then form = 'pôr' end return form end

return PTComposer