Module:hit-translit

local export = {}

local bit32 = require('bit32') local m_table = require('Module:table') local m_tag = require('Module:hit-translit/tag') local sign_list = mw.loadData('Module:hit-translit/data')

local ulen = mw.ustring.len local usub = mw.ustring.sub

local segments = { -- vowels ['a'] = 'a', ['á'] = 'a', ['à'] = 'a', ['e'] = 'e', ['é'] = 'e', ['è'] = 'e', ['i'] = 'i', ['í'] = 'i', ['ì'] = 'i', ['u'] = 'u', ['ú'] = 'u', ['ù'] = 'u', -- consonants with voicing alternaternates ['b'] = 'b', ['p'] = 'p', ['d'] = 'd', ['t'] = 't', ['g'] = 'g', ['k'] = 'k', ['q'] = 'q', -- single consonants ['ḫ'] = 'h', ['r'] = 'r', ['l'] = 'l', ['m'] = 'm', ['n'] = 'n', ['š'] = 's', ['z'] = 'z', ['y'] = 'y', ['w'] = 'w', -- numbers ['0'] = '0',	['1'] = '1',	['2'] = '2',	['3'] = '3',	['4'] = '4',	['5'] = '5',	['6'] = '6',	['7'] = '7',	['8'] = '8',	['9'] = '9', }

--[=[ -- Set up bit array to for marking which onsets and codas are available for ambiguous characters ]=] local sort_order = { -- vowels ['a'] = 2 ^ 0, ['i'] = 2 ^ 1, -- I've chosen "i" over "e" ['e'] = 2 ^ 2, ['u'] = 2 ^ 3, -- consonants with voicing alternaternates ['p'] = 2 ^ 4, ['b'] = 2 ^ 5, ['t'] = 2 ^ 6, ['d'] = 2 ^ 7, ['k'] = 2 ^ 8, ['g'] = 2 ^ 9, ['q'] = 2 ^ 10, -- single consonants ['h'] = 2 ^ 11, ['r'] = 2 ^ 12, ['l'] = 2 ^ 13, ['m'] = 2 ^ 14, ['n'] = 2 ^ 15, ['s'] = 2 ^ 16, ['z'] = 2 ^ 17, ['y'] = 2 ^ 18, ['w'] = 2 ^ 19, -- numbers ['0'] = 2 ^ 20,	['1'] = 2 ^ 21,	['2'] = 2 ^ 22,	['3'] = 2 ^ 23,	['4'] = 2 ^ 24,	['5'] = 2 ^ 25,	['6'] = 2 ^ 26,	['7'] = 2 ^ 27,	['8'] = 2 ^ 28,	['9'] = 2 ^ 29, }

local function inplace_multikey_sort(t) -- Sorts a table inplace by the onset and then coda table.sort(t, function(a, b)		if a.o ~= b.o then			return sort_order[a.o] < sort_order[b.o]		end		return sort_order[a.c] < sort_order[b.c]	end) return t end

local function find_seg(syl, rev) -- [=[	-- A helper function that iterates forwards or backwards (if "rev" is set) -- 	in order to find the first phonetic segment and return the normalized -- 	form of that segment. Thus: -- 		find_seg("šaq") gives "s" -- 		find_seg("luḫ", true) gives "h" -- -- ]=]	local f	for i = 1, ulen(syl) do		f = usub(syl, rev and -i or i, rev and -i or i)		if segments[f] then -- return segments[f] end end error('Could not find a ' .. (rev and 'coda' or 'onset') .. ' for the syllable "' .. syl .. '".') end

function export.find_onset(syl) -- [=[	-- Find the normalized onset character of a syllable -- -- ]=]	return find_seg(syl) end

function export.find_coda(syl) -- [=[	-- Find the normalized coda character of a syllable -- -- ]=]	return find_seg(syl, true) end

function export.hash_sign(sign) -- [=[	-- Turn the list of Hittite syllables into a list of list containing: -- 		The syllable -- 		The normalized onset character of the syllable -- 		The normalized coda character of the syllable -- And add a hashes of the onsets and codas in the syllables. Thus -- 		{ "it", "id", "et", "ed", hit = true } -- 		becomes: -- 		{	-- 			{ "it", o = "i", c = "t" }, -- 			{ "id", o = "i", c = "d" }, -- 			{ "et", o = "e", c = "t" }, -- 			{ "ed", o = "e", c = "d" }, --			o_hash = 6, c_hash = 192, hit = true -- 		}	-- -- ]=]	sign.o_hash, sign.c_hash = 0, 0 -- init onset and coda hashes for signs for i, syl in ipairs(sign) do		sign[i] = { syl, o = export.find_onset(syl), c = export.find_coda(syl) } sign.o_hash = bit32.bor(sign.o_hash, sort_order[sign[i].o]) sign.c_hash = bit32.bor(sign.c_hash, sort_order[sign[i].c]) end end

function export.copy_sign(sign) -- copy, sort, and set up new sign local new = m_table.deepcopy(sign_list[sign], true) if new.hit then -- has Hittite signs export.hash_sign(new) inplace_multikey_sort(new) end return new end

local function remove_syls(first, second, mask) -- [=[	-- For two adjacent sets of Hittite syllables and a mask of their shared characters, -- 	go through each one and remove the unnecessary values, and update the hashes. -- -- ]=]	local new_o_hash, new_c_hash, new_first, new_second = 0, 0, { hit = true }, { hit = true } for _, syl in ipairs(first) do		if bit32.band(sort_order[syl.c], mask) > 0 then table.insert(new_first, syl) new_o_hash = bit32.bor(new_o_hash, sort_order[syl.o]) -- unnecessary, but useful for tracking end end new_first.o_hash = new_o_hash new_first.c_hash = mask for _, syl in ipairs(second) do		if bit32.band(sort_order[syl.o], mask) > 0 then table.insert(new_second, syl) new_c_hash = bit32.bor(new_c_hash, sort_order[syl.c]) end end new_second.o_hash = mask new_second.c_hash = new_c_hash return new_first, new_second end

local related_character_masks = { -- voicing alternates bit32.bor(sort_order['p'], sort_order['b']), bit32.bor(sort_order['t'], sort_order['d']), bit32.bor(sort_order['k'], sort_order['g'], sort_order['q']), -- "u" patterns next to "w" bit32.bor(sort_order['u'], sort_order['w']), -- numbers pattern together bit32.bor(sort_order['0'], sort_order['1'], sort_order['2'], sort_order['3'], sort_order['4'],		sort_order['5'], sort_order['6'], sort_order['7'], sort_order['8'], sort_order['9']), }

local function approx_match(first_hash, second_hash) -- [=[	-- Builds a bit mask for all approximate matches like "p" and "b", or "t" and "d". -- -- ]=]	local new_mask = 0 for _, mask in ipairs(related_character_masks) do		if bit32.band(mask, first_hash) > 0 and bit32.band(mask, second_hash) > 0 then new_mask = bit32.bor(new_mask, mask) end end return new_mask end

function export.fit_signs(first, second) -- [=[	-- Takes two adjacent signs and removes unlikely Hittite syllables. -- -- ]=]	if first and second then -- two signs if first.hit and second.hit then -- both have Hittite syllables local match_mask = bit32.band(first.c_hash, second.o_hash) if match_mask > 0 then -- there are matching chars in each return remove_syls(first, second, match_mask) end match_mask = approx_match(first.c_hash, second.o_hash) if match_mask > 0 then -- there are approxiamtely matching chars in each return remove_syls(first, second, match_mask) end end elseif first then -- final sign -- nothing yet else -- initial sing -- nothing yet end return first, second end

local function assemble_word(signs) -- [=[	-- Choose all the signs, tag when appropriate, then concatenate -- -- ]=]	local word = {} for _, sign in ipairs(signs) do		if sign.hit then -- If Hittite, take first sign table.insert(word, sign[1][1]) elseif sign.sum then -- If Sumerogram, take and tag first sign table.insert(word, m_tag.tag_sumerogram(sign[1])) elseif sign.akk then -- If Akkadogram, take and tag first sign table.insert(word, m_tag.tag_akkadogram(sign[1])) elseif sign.hurr then -- If Hurrian, take and tag first sign table.insert(word, m_tag.tag_hurrian_tr(sign[1])) elseif sign.hatt then -- If Hattic, take and tag first sign table.insert(word, m_tag.tag_hattic_tr(sign[1])) elseif sign.glossenkeil then -- If Glossenkeil, display it			table.insert(word, m_tag.glossenkeil) end end return table.concat(word, '-') end

function export.transpose(text) -- [=[	-- Takes a continuous Cuneiform string and converts it to transliteration -- -- ]=]	local signs = {} while ulen(text) > 0 do		if sign_list[usub(text, 1, 3)] then table.insert(signs, export.copy_sign(usub(text, 1, 3))) -- add in new sign text = usub(text, 4) -- truncate string elseif sign_list[usub(text, 1, 2)] then table.insert(signs, export.copy_sign(usub(text, 1, 2))) text = usub(text, 3) elseif sign_list[usub(text, 1, 1)] then table.insert(signs, export.copy_sign(usub(text, 1, 1))) text = usub(text, 2) end signs[#signs - 1], signs[#signs] = export.fit_signs(signs[#signs - 1], signs[#signs]) -- fit two signs end signs[#signs] = export.fit_signs(signs[#signs], nil) -- fit end of word return assemble_word(signs) end

function export.tr(text, lang, sc) if sc ~= "Xsux" then return nil end text = mw.ustring.gsub(text, '[𒀀-𒑱]+', export.transpose) return m_tag.tag_hittite_tr(text) end

return export