Module:User:Erutuon/grc

local export = {}

local add_dotted_circle = require("Module:Unicode data").add_dotted_circle local highlight = require("Module:debug").highlight { lang = "html" } local Latin_to_Greek = require("Module:User:Erutuon/grc/Latin to Greek") local tokenize = require("Module:grc-utilities").tokenize

local grc = require("Module:languages").getByCode("grc") local function make_entry_name(text) return grc:makeEntryName(text) end

local title = mw.title.getCurrentTitle local namespace = title.nsText local pagename = title.text local nonmainspace = namespace ~= ""

local str_find = string.find local str_gsub = string.gsub local str_gmatch = string.gmatch

local ufind = mw.ustring.find local umatch = mw.ustring.match local decompose = mw.ustring.toNFD local U = mw.ustring.char local macron = U(0x304) local breve = U(0x306) local rough = U(0x314) local smooth = U(0x313) local diaeresis = U(0x308) local acute = U(0x301) local grave = U(0x300) local circumflex = U(0x342) local subscript = U(0x345) local diacritic_patt = table.concat { "[",	macron, breve, rough, smooth, diaeresis, acute, grave, circumflex, subscript, "]" }

local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" local basic_Greek = "[\206-\207][\128-\191]" -- excluding first line of Greek and Coptic block: ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ

-- Includes ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ -- local diacritic = "[\204-\205][\128-\191]"

local decompose = mw.ustring.toNFD

local function quote(str) return "“" .. str .. "”" end

local info = {}

-- The tables are shared among different characters so that they can be checked -- for equality if needed, and to use less space. local vowel = { vowel = true, diacritic_seat = true } local iota = { vowel = true, diacritic_seat = true, offglide = true } local upsilon = { vowel = true, diacritic_seat = true, offglide = true } -- Technically rho is only a seat for rough or smooth breathing. local rho = { consonant = true, diacritic_seat = true } local consonant = { consonant = true } local diacritic = { diacritic = true } -- Needed for equality comparisons. local breathing = { diacritic = true }

local function add_info(characters, t)	if type(characters) == "string" then for character in string.gmatch(characters, UTF8_char) do			info[character] = t		end else for i, character in ipairs(characters) do			info[character] = t		end end end

add_info({ macron, breve,		diaeresis,		acute, grave, circumflex,		subscript,	}, diacritic)

add_info({rough, smooth}, breathing) add_info("ΑΕΗΟΩαεηοω", vowel) add_info("Ιι", iota) add_info("Υυ", upsilon) add_info("ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨϜϘϺϷͶϠβγδζθκλμνξπρσςτφχψϝϙϻϸͷϡ", consonant) add_info("Ρρ", rho)

local not_recognized = {} setmetatable(info, { __index =	function(t, key)		return not_recognized	end })

-- Equivalent of the current function. function export.tokenize(text) local tokens, vowel_info, prev_info = {}, {}, {} local token_i = 1 local prev for character in str_gmatch(decompose(text), UTF8_char) do		local curr_info = info[character] -- Split vowels between tokens if not a diphthong. if curr_info.vowel then if prev and (not (curr_info.offglide and prev_info.vowel)					-- υυ → υ, υ					-- ιυ → ι, υ					or prev_info.offglide and curr_info == upsilon) then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character table.insert(vowel_info, { index = token_i }) elseif curr_info.diacritic then tokens[token_i] = (tokens[token_i] or "") .. character if prev_info.vowel or prev_info.diacritic then if character == diaeresis then -- Current token is vowel, vowel, possibly other diacritics, -- and a diaeresis. -- Split the current token into two: -- the first letter, then the second letter plus any diacritics. local previous_vowel, vowel_with_diaeresis = string.match(tokens[token_i], "^(" .. basic_Greek .. ")(" .. basic_Greek .. ".+)")					if previous_vowel then tokens[token_i], tokens[token_i + 1] = previous_vowel, vowel_with_diaeresis token_i = token_i + 1 end end elseif prev_info == rho then if curr_info ~= breathing then return string.format("The character %s cannot have the accent %s on it.", prev, add_dotted_circle(character)) end else error("The character " .. quote(prev) .. " cannot have a diacritic on it.") end elseif curr_info == rho then if prev and not (prev_info == breathing and info[string.match(tokens[token_i], "^" .. basic_Greek)] == rho) then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character else if prev then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character end prev = character prev_info = curr_info end return tokens end

function export.tokenize_vowels(text) text = decompose(text) local tokens, vowel_info, prev_info = {}, {}, {} local token_i = 1 local prev for character in str_gmatch(text, UTF8_char) do		local curr_info = info[character] -- Split vowels between tokens if not a diphthong. if curr_info.vowel then if prev and not (curr_info.offglide and prev_info.vowel					-- υυ → υ, υ					-- ιυ → ι, υ					and not (prev_info.offglide and curr_info == upsilon)) then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character table.insert(vowel_info, { index = token_i }) elseif curr_info.diacritic then tokens[token_i] = (tokens[token_i] or "") .. character if prev_info.vowel or prev_info.diacritic then if character == diaeresis then -- Current token is vowel, vowel, possibly other diacritics, -- and a diaeresis. -- Split the current token into two: -- the first letter, then the second letter plus any diacritics. local previous_vowel, vowel_with_diaeresis = string.match(tokens[token_i], "^(" .. basic_Greek .. ")(" .. basic_Greek .. ".+)")					if previous_vowel then tokens[token_i], tokens[token_i + 1] = previous_vowel, vowel_with_diaeresis token_i = token_i + 1 end end elseif prev_info == rho then if curr_info ~= breathing then return string.format("The character %s cannot have the accent %s on it.", prev, add_dotted_circle(character)) end else error("The character " .. quote(prev) .. " cannot have a diacritic on it.") end elseif curr_info == rho then if prev and not (prev_info == breathing and info[string.match(tokens[token_i], "^" .. basic_Greek)] == rho) then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character else -- consonant or "not recognized" if prev and not (prev_info.consonant or prev_info == not_recognized) then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character end prev = character prev_info = curr_info end return tokens end

function export.show(frame) local map = require("Module:User:Erutuon/functional").map local token_format = ' %s ' local spacing = { ["\n"] = "&para;", ["\r"] = "&para;", [" "] = "&ensp;", }	local function print_tokens(tokens) if type(tokens) == "string" then return tokens end local output = {} for i, token in ipairs(tokens) do			output[i] = string.format(token_format, str_gsub(token, "%s", spacing)) end return table.concat(output, " ") end return table.concat(map(print_tokens, map(export.tokenize, frame.args)), " ") end

-- Assumes val is string or nil, as will be true if it is a template parameter. local function boolean_or_string(val) if not val then return false else val = val:lower end local number = tonumber(val) if number == 0 or val == "no" or val == "false" then return false elseif number == 1 or val == "yes" or val == "true" then return true else return val end end

-- Modification of findAmbig function in Module:grc-utilities that returns -- boolean value. local function has_ambiguous_vowel(text) local lengthDiacritic = "[" .. macron .. breve .. circumflex .. subscript .. "]"	local aiu_diacritic = "^([" .. "αιυ" .. "])(" .. diacritic_patt .. "*)$" -- breaks the word into units for _, token in ipairs(tokenize(text)) do		local vowel, diacritics = umatch(token, aiu_diacritic) if vowel and (diacritics == "" or				not ufind(diacritics, lengthDiacritic)) then return true end end return false end

local function has_breve_or_macron(text) text = decompose(text) return str_find(text, macron) or str_find(text, breve) end

local function add_to_t(t1, t2) for k, v in pairs(t2) do		if t1[k] == nil then t1[k] = v		else error("Table 1 already has value for " .. quote(k) .. ".") end end return t1 end

local function in_array(array, val) for i, v in ipairs(array) do		if val == v then return i		end end return false end

local function make_param(value, name) return value and "|" .. (name and name .. "=" or "") .. value or nil end

local function add_content(template, content_table) return str_gsub(template,		"}",		function (code)			local content = content_table[code]			return in_array({ "string", "nil" }, type(content)) and (content or "")				or error("Invalid type for content variable " .. quote(tostring(code)) .. ": " .. type(content) .. ".")		end) end

local function accent_recessively(form) local m_accent = require "Module:grc-accent" return m_accent.add_accent(m_accent.strip_tone(form), -3) end

local function get_participle_information(masculine, contraction_vowel) local decl, feminine, neuter, lemma if not (not contraction_vowel or contraction_vowel == "a"	or contraction_vowel == "e" or contraction_vowel == "o") then error("Invalid contraction vowel. Either omit or choose between a, e, o.") end masculine = mw.ustring.toNFC(masculine) if masculine:find("ος$") then decl = "1&2" feminine = accent_recessively(masculine:gsub("ος$", "η")) neuter = masculine:gsub("ς$", "ν") -- Warning! This simply assumes the participle is present mediopassive, -- like λεγόμενος, not second aorist, like λαβόμενος. if masculine:find("όμενος$") then lemma = accent_recessively(masculine:gsub("όμενος$", "ω")) end else decl = "1&3" local m_utilities_data = require "Module:grc-utilities/data" -- Warning! This pattern will not work if there's a vowel before the -- participle ending, as in τεθνεώς, perfect active participle of -- θνῄσκω. local ending = mw.ustring.match(mw.ustring.toNFD(masculine),			m_utilities_data.vowel .. "?[ιυ]?[^" .. m_utilities_data.vowels .. "]+$") if ending then local m_accent = require "Module:grc-accent" ending = mw.ustring.toNFC(m_accent.strip_tone(ending)) local accent_pos, accent_type = m_accent.detect_accent(masculine) local toneless_masculine = m_accent.strip_tone(masculine) if ending == "ων" then -- recessive, oxytone, properispomenon local stem = mw.ustring.gsub(toneless_masculine, "ων$", "") if contraction_vowel == "a" then -- τῑμῶν, τῑμῶσᾰ, τῑμῶν feminine = stem .. "ωσᾰ" neuter = masculine lemma = stem .. "ᾰ́ω" else -- λέγων, λέγουσα, λέγον; λαβών, λαβοῦσᾰ, λαβόν; -- ποιῶν, ποιοῦσᾰ, ποιοῦν; δηλῶν, δηλοῦσᾰ, δηλοῦν feminine = stem .. "ουσᾰ" if contraction_vowel == "e" or contraction_vowel == "o" then neuter = stem .. "ουν" lemma = stem .. (contraction_vowel == "e" and "έω" or "όω") else -- If accent is paroxytone, then this is a present-tense -- verb. if m_accent.detect_accent(masculine, true) == 2 then lemma = accent_recessively(stem .. "ω") end neuter = stem .. "ον" end end elseif ending == "ᾱς" then -- recessive except in athematic verbs stem = mw.ustring.gsub(toneless_masculine,					"α" .. m_utilities_data.diacritics.macron .. m_utilities_data.diacritics.acute .. "?ς",					"") feminine = stem .. "ᾱσᾰ" neuter = stem .. "ᾰν" -- Aorist passive or athematic; always oxytone? elseif ending == "εις" then stem = masculine:gsub("είς", "") feminine = stem .. "εῖσᾰ" neuter = stem .. "έν" elseif ending == "ως" then -- Perfect active; always oxytone? stem = masculine:gsub("ώς", "") feminine = stem .. "υῖᾰ" neuter = stem .. "ός" -- Present active, δεικνῡ́ς and such elseif ending == "ῡς" then stem = masculine:gsub("ῡ́ς", "") feminine = stem .. "ῦσᾰ" neuter = stem .. "ῠ́ν" end if feminine and neuter then local options = { circumflex = accent_type == "circumflex" }				feminine, neuter = m_accent.add_accent(feminine, accent_pos, options), m_accent.add_accent(neuter, accent_pos, options) end end end return decl, feminine, neuter, lemma end export.get_participle_information = get_participle_information

local function needs_head_parameter(pagename) return nonmainspace or pagename:find "'" or has_ambiguous_vowel(pagename) end

local dialect_abbr = { ["aio"] = "Aeolic Greek", ["ark"] = "Arcadocypriot", ["att"] = "Attic", ["boi"] = "Boeotian", ["del"] = "Delphic", ["dor"] = "Doric", ["ele"] = "Elean", ["epi"] = "Epic", ["hom"] = "Homeric", ["ion"] = "Ionic", ["koi"] = "Koine", ["kre"] = "Cretan", ["lak"] = "Lacedaemonian", ["lok"] = "Locrian", ["lur"] = "Lyric", ["muk"] = "Mycenaean", ["pam"] = "Pamphylian", ["pho"] = "Phocian", ["poi"] = "poetic", ["the"] = "Thessalian", }

local function make_term_label(dialect_code) if not dialect_code then return nil end local dialect = dialect_abbr[dialect_code] if dialect then return " " else error("The dialect code " .. dialect_code .. " was not recognized.") end end

local templates = { language_header = [=[

Ancient Greek
]=],	etymology = [=[

Etymology
]=],	pronunciation = [=[

Pronunciation
]=],	noun = [=[

Declension
]=],	adjective = [=[

Declension
]=],	participle = [=[

Declension
]=],	verb = [=[

Verb


]=],	verb_form = [=[

Verb
]=],	ref_header = [=[