Module:ain-kana-conv

local export = {}

local CONSONANTS = { "p", "t", "c", "k", "m", "n", "s", "h", "w", "r", "y", "'" } local VOWELS = { "a", "i", "u", "e", "o", "á", "í", "ú", "é", "ó", } local ACCENT_CONVERSION_TABLE = { ["á"] = "a", ["í"] = "i", ["ú"] = "u", ["é"] = "e", ["ó"] = "o", }

local CONVERSION_TABLE = { [ "a"] = "ア", [ "i"] = "イ", [ "u"] = "ウ", [ "e"] = "エ", [ "o"] = "オ", ["'a"] = "ア", ["'i"] = "イ", ["'u"] = "ウ", ["'e"] = "エ", ["'o"] = "オ", ["ka"] = "カ", ["ki"] = "キ", ["ku"] = "ク", ["ke"] = "ケ", ["ko"] = "コ", ["sa"] = "サ", ["si"] = "シ", ["su"] = "ス", ["se"] = "セ", ["so"] = "ソ", ["ta"] = "タ",                ["tu"] = "ト゚", ["te"] = "テ", ["to"] = "ト", ["ca"] = "チャ", ["ci"] = "チ", ["cu"] = "チュ", ["ce"] = "チェ", ["co"] = "チョ", ["na"] = "ナ", ["ni"] = "ニ", ["nu"] = "ヌ", ["ne"] = "ネ", ["no"] = "ノ", ["ha"] = "ハ", ["hi"] = "ヒ", ["hu"] = "フ", ["he"] = "ヘ", ["ho"] = "ホ", ["pa"] = "パ", ["pi"] = "ピ", ["pu"] = "プ", ["pe"] = "ペ", ["po"] = "ポ", ["ma"] = "マ", ["mi"] = "ミ", ["mu"] = "ム", ["me"] = "メ", ["mo"] = "モ", ["ya"] = "ヤ", ["yi"] = "イ", ["yu"] = "ユ", ["ye"] = "イェ", ["yo"] = "ヨ", ["ra"] = "ラ", ["ri"] = "リ", ["ru"] = "ル", ["re"] = "レ", ["ro"] = "ロ", ["wa"] = "ワ", ["wi"] = "ヰ",                ["we"] = "ヱ", ["wo"] = "ヲ", ["nn"] = "ン", ["tt"] = "ッ" }

local CODA_CONS = { ["w"] = "ゥ", ["y"] = "ィ", ["m"] = "ㇺ", ["n"] = "ㇴ", ["N"] = "ン", ["s"] = "ㇱ", ["p"] = "ㇷ゚", ["t"] = "ㇳ", ["k"] = "ㇰ" }

local CODA_VARA = { ["r"] = { ["a"] = "ㇻ", ["i"] = "ㇼ", ["u"] = "ㇽ", ["e"] = "ㇾ", ["o"] = "ㇿ" },   ["h"] = { ["a"] = "ㇵ", ["i"] = "ㇶ", ["u"] = "ㇷ", ["e"] = "ㇸ", ["o"] = "ㇹ" } }

function in_values(item, items) for _, v in pairs(items) do       if v == item then return true end end return false end

function in_keys(item, items) for k, _ in pairs(items) do       if k == item then return true end end return false end

local function convert_syllable(syllable, next_char) local l_syllable = mw.ustring.len(syllable)

local remains = syllable local coda = ""

local last_char = mw.ustring.sub(syllable, mw.ustring.len(syllable), mw.ustring.len(syllable))

if in_keys(last_char, CODA_CONS) then remains = mw.ustring.sub(remains, 1, l_syllable - 1) coda = CODA_CONS[last_char] -- ruunpe ルウㇴペ？ルウンペ？　暫定的に後者を取る if last_char == "n" and (next_char ~= nil and next_char ~= "") then coda = CONVERSION_TABLE['nn'] end elseif in_keys(last_char, CODA_VARA) then remains = mw.ustring.sub(remains, 1, l_syllable - 1) local second_last_char = mw.ustring.sub(syllable, l_syllable - 1, l_syllable - 1) coda = CODA_VARA[last_char][second_last_char] end

local accented_flag = false local nucleus = mw.ustring.sub(remains, mw.ustring.len(remains), mw.ustring.len(remains)) if in_keys(nucleus, ACCENT_CONVERSION_TABLE) then accented_flag = true remains = mw.ustring.sub(remains, 1, mw.ustring.len(remains) - 1) .. ACCENT_CONVERSION_TABLE[nucleus] end

if in_keys(remains, CONVERSION_TABLE) then remains = CONVERSION_TABLE[remains] elseif in_keys(mw.ustring.lower(remains), CONVERSION_TABLE) then remains = CONVERSION_TABLE[mw.ustring.lower(remains)] else error("cannot find katakana for CV pair: ‘" .. remains .. "’") end

local converted = remains .. coda

if accented_flag then converted = "" .. converted .. " "   end return converted end

local function do_convert(temp) -- function export.do_convert(temp) -- Extensibility を考慮せねばならない -- # N ン 記号 -- # Pawci-Kamuy 固有名詞 -- # accent 記号やその他特別表記 %u %l はそれらを含む. %a はひらがな・カタカナも含むのでダメ. local ignore_chars = "%-=." local valid_pattern = "[%u%l'" .. ignore_chars .. "]+"   -- TODO: hotne = ホッネ？ホㇳネ？ -- TODO: wan e-tu ワㇴ　エト゚？ ワネト゚？

-- If contains more than alphabets and symbols -> e.g. 　-> カナ表記 ヒオィオィ／ヒオイオイ -- -- Dectect カタカタ if detected do nothing but return the original value -- CONVERSION_TABLE にあるかどうかだけで判断するのは早計すぎたので、変更 if not mw.ustring.match(temp, valid_pattern) then return temp end

-- if in_values(mw.ustring.sub(temp, 1, 1), CONVERSION_TABLE) then --    return temp -- end

-- Normalize -- # Lower -- temp = temp:lower N のためにここで lower しない -- # Clear special characters such as "-", ".", "=" temp = mw.ustring.gsub(temp, "[" .. ignore_chars .. "]", "") local group_ids = {}

-- TODO: iyayiraykere = イヤィイラィケレ ? イヤイラィケレ? 暫定的に後者を取る

-- Count syllables by vowels and save to a map with onset and nucleus marked local syllable_count = 1 local i = 1 for char in mw.ustring.gmatch(temp, ".") do       -- print("Current Char (No. " .. tostring(i) .. "): " .. char) if in_values(char, VOWELS) then -- print("-- Current Vowel: " .. char) -- print("-- Char Before: " .. temp:sub(i - 1, i - 1)) local char_before = mw.ustring.sub(temp, i - 1, i - 1) if in_values(char_before, CONSONANTS) or in_values(mw.ustring.lower(char_before), CONSONANTS) then -- print(" Char Before is Consonant") group_ids[i - 1] = syllable_count end group_ids[i] = syllable_count

syllable_count = syllable_count + 1 end i = i + 1 end

local str_buffer = "" for i = 1, mw.ustring.len(temp) do       if group_ids[i] ~= nil then str_buffer = str_buffer .. group_ids[i] else str_buffer = str_buffer .. "X" end end -- error("group_indicies: " .. str_buffer .. " " .. "group_contents: " .. temp)

-- Fill codas local i = 1

for char in mw.ustring.gmatch(temp, ".") do       if group_ids[i] == nil then group_ids[i] = group_ids[i - 1] end i = i + 1 end

local result = "" local i = 1 local current_group_id = 1 local head = 1 local tail = 1 local content = ""

-- while i < mw.ustring.len(temp) do   for i = 1, mw.ustring.len(temp) do        -- print("group_id " .. tostring(group_ids[i]) .. "  " .. tostring(temp:sub(i, i))) if group_ids[i] ~= current_group_id then current_group_id = group_ids[i] tail = i - 1 content = mw.ustring.sub(temp, head, tail) -- print("(head, tail) = " .. tostring(head) .. " " .. tostring(tail)) -- print(content) result = result .. convert_syllable(content, mw.ustring.sub(temp, i, i)) head = i       end -- i = i + 1 end content = mw.ustring.sub(temp, head, mw.ustring.len(temp)) -- print(content) result = result .. convert_syllable(content) return result end

-- local function valid_ainu_word(word) --    -- TODO: -- end

function export.debug(word) return do_convert(word) end

function export.convert(frame) -- Get Args local params = { [1] = {list = true, allow_holes = true} }   local args = require("Module:parameters").process(frame:getParent.args, params)

-- Do Conversion local items = {} for i = 1, math.max(args[1].maxindex, 1) do       local original_str = args[1][i] if not original_str or original_str == "" then original_str = mw.title.getCurrentTitle.text -- else -- original_str = frame:preprocess(original_str) end

local converted_words = {} for word in mw.text.gsplit(original_str, " ") do           -- error(do_convert(word)) table.insert(converted_words, do_convert(word)) end table.insert(items, table.concat(converted_words, "　")) end

return table.concat(items, "／") end

return export