Module:ain-pron

local export = {}

-- TODO: x /x/ ? local CONSONANTS = { "p", "t", "c", "k", "m", "n", "s", "h", "w", "r", "y", "'" }

local VOWELS = { "a", "i", "u", "e", "o", "á", "í", "ú", "é", "ó", }

local CONVERSION_TABLE = { -- Consonants ["p"] = "p", ["t"] = "t", ["c"] = "t͡ʃ", ["k"] = "k", ["m"] = "m", ["n"] = "n", ["s"] = "s", ["h"] = "h", ["w"] = "w", ["r"] = "ɾ", ["y"] = "j", ["'"] = "", -- Vowels ["a"] = "a", ["i"] = "i", ["u"] = "u", ["e"] = "e", ["o"] = "o", ["á"] = "á", ["í"] = "í", ["ú"] = "ú", ["é"] = "é", ["ó"] = "ó" }

-- Append Glottal Stop ruunpe -> ruʔunpe / -> teʔeta local CONVERSION_TABLE_PHONETIC = { -- Consonants ["p"] = "p", ["t"] = "t", ["c"] = "t͡ʃ", ["k"] = "k", ["m"] = "m", ["n"] = "n", ["s"] = "s", ["h"] = "h", ["w"] = "w", ["r"] = "ɾ", ["y"] = "j", ["'"] = "ʔ", -- Vowels ["a"] = "a", ["i"] = "i", ["u"] = "u", ["e"] = "e", ["o"] = "o", ["á"] = "á", ["í"] = "í", ["ú"] = "ú", ["é"] = "é", ["ó"] = "ó" }

-- as -> aʃ ( /e.ɾa.mus.ka.ɾe/ -> /e.ɾa.mus.ka.ɾe/ [/e.ɾa.muʃ.ka.ɾe/] ) local SPECIAL_CODA = { ["s"] = "ʃ", ["p"] = "p̚", ["k"] = "k̚", ["t"] = "t̚", }

function in_values(item, items) for _, v in pairs(items) do       if v == item then return true end end return false end

function in_keys(item, items) for k, _ in pairs(items) do       if k == item then return true end end return false end

local function convert_syllable(syllable) -- yay > jaj -- mur > muɾ -- an > ʔan -- ka > ka   local result = "" for char in mw.ustring.gmatch(syllable, ".") do       result = result .. CONVERSION_TABLE[char] end return result end

local function convert_syllable_phonetic(syllable) local result = ""

if not in_values(mw.ustring.sub(syllable, 1, 1), CONSONANTS) then syllable = "'" .. syllable end

for char in mw.ustring.gmatch(syllable, ".") do       result = result .. CONVERSION_TABLE_PHONETIC[char] end

local l_result = mw.ustring.len(result)

local coda = "" if l_result > 1 then coda = mw.ustring.sub(syllable, l_result, l_result) if in_keys(coda, SPECIAL_CODA) then coda = SPECIAL_CODA[coda] result = mw.ustring.sub(result, 1, l_result - 1) .. coda end end

return result end

-- local function print_groups(group_ids, temp) --    local str_buffer = "" --    for i = 1, #temp do --         if group_ids[i] ~= nil then --            str_buffer = str_buffer .. group_ids[i] --        else --            str_buffer = str_buffer .. "X" --        end --    end --    print("group_indicies: " .. str_buffer) --    print("group_contents: " .. temp) -- end

local function do_convert(romanized) local group_ids = {}

-- Count syllables by vowels and save to a map with onset and nucleus marked local syllable_count = 1 local i = 1 for char in mw.ustring.gmatch(romanized, ".") do       -- print("Current Char (No. " .. tostring(i) .. "): " .. char) if in_values(char, VOWELS) then -- print("-- Current Vowel: " .. char) -- print("-- Char Before: " .. mw.ustring.sub(romanized, i - 1, i - 1)) if in_values(mw.ustring.sub(romanized, i - 1, i - 1), CONSONANTS) then -- print(" Char Before is Consonant") group_ids[i - 1] = syllable_count end group_ids[i] = syllable_count

syllable_count = syllable_count + 1 end i = i + 1 end

-- print_groups(group_ids, romanized)

-- Fill codas local i = 1 for char in mw.ustring.gmatch(romanized, ".") do       if group_ids[i] == nil then group_ids[i] = group_ids[i - 1] end i = i + 1 end

-- print_groups(group_ids, romanized) local result = {} local i = 1 local current_group_id = 1 local head = 1 local tail = 1 local content = "" while i < mw.ustring.len(romanized) do       -- print("group_id " .. tostring(group_ids[i]) .. "  " .. tostring(mw.ustring.sub(romanized, i, i))) if group_ids[i] ~= current_group_id then current_group_id = group_ids[i] tail = i - 1 content = mw.ustring.sub(romanized, head, tail) -- print(convert_syllable(content)) table.insert(result, convert_syllable(content)) head = i       end i = i + 1 end content = mw.ustring.sub(romanized, head, mw.ustring.len(romanized)) table.insert(result, convert_syllable(content)) return table.concat(result, ".") end

local function convert_phonetic(romanized) local group_ids = {}

-- Count syllables by vowels and save to a map with onset and nucleus marked local syllable_count = 1 local i = 1 for char in mw.ustring.gmatch(romanized, ".") do       if in_values(char, VOWELS) then if in_values(mw.ustring.sub(romanized, i - 1, i - 1), CONSONANTS) then -- Char Before is Consonant group_ids[i - 1] = syllable_count end group_ids[i] = syllable_count

syllable_count = syllable_count + 1 end i = i + 1 end

-- Fill codas local i = 1 for char in mw.ustring.gmatch(romanized, ".") do       if group_ids[i] == nil then group_ids[i] = group_ids[i - 1] end i = i + 1 end

local result = {} local i = 1 local current_group_id = 1 local head = 1 local tail = 1 local content = "" while i < mw.ustring.len(romanized) do       if group_ids[i] ~= current_group_id then current_group_id = group_ids[i] tail = i - 1 content = mw.ustring.sub(romanized, head, tail) table.insert(result, convert_syllable_phonetic(content)) head = i       end i = i + 1 end content = mw.ustring.sub(romanized, head, mw.ustring.len(romanized)) table.insert(result, convert_syllable_phonetic(content))

local result = table.concat(result, ".") -- TODO: Phonetic Transcription: konkane /kon.ka.ne/ [koŋ.ɡa.ne] / ʔ result = mw.ustring.gsub(result, "n%.k", "ŋ.k") result = mw.ustring.gsub(result, "m%.k", "ŋ.k") result = mw.ustring.gsub(result, "n%.m", "m.m") result = mw.ustring.gsub(result, "n%.p", "m.p") result = mw.ustring.gsub(result, "si", "ʃi") result = mw.ustring.gsub(result, "u%.ʔa", "u.wa") result = mw.ustring.gsub(result, "u%.ʔe", "u.we") result = mw.ustring.gsub(result, "u%.ʔo", "u.wo") result = mw.ustring.gsub(result, "i%.ʔa", "i.ja") result = mw.ustring.gsub(result, "i%.ʔe", "i.je") result = mw.ustring.gsub(result, "i%.ʔo", "i.jo") result = mw.ustring.gsub(result, "ɾ%.n", "n.n") result = mw.ustring.gsub(result, "ɾ%.t", "t.t") result = mw.ustring.gsub(result, "ɾ%.ɾ", "n.ɾ") return result end

-- local function valid_ainu_word(word) --    -- TODO: -- end function export.ain_IPA(frame) local params = { [1] = {list = true, allow_holes = true} }

local args = require("Module:parameters").process(frame:getParent.args, params)

local items = {}

-- FIXME: IPA(?): /hi.oj.oj/, [hi.oj.oj] → IPA(?): /hi.oj.oj/ [hi.oj.oj] for i = 1, math.max(args[1].maxindex, 1) do       -- TODO: error("") if not valid Ainu word local romanized = args[1][i] if not romanized or romanized == "" then romanized = mw.title.getCurrentTitle.text end

-- Normalize -- # Lower romanized = mw.ustring.lower(romanized) -- # Clear special characters such as "-", ".", "=" romanized = mw.ustring.gsub(romanized, "[^%u%l']", "")

local phonemic = do_convert(romanized) local phonetic = convert_phonetic(romanized)

table.insert(items, {pron = "/" .. phonemic .. "/"}) if phonemic ~= phonetic then table.insert(items, {pron = "[" .. phonetic .. "]"}) end -- for word in mw.text.gsplit(args[1][i], " ") do       --     table.insert(items, {pron = "/" .. do_convert(word) .. "/"}) -- end end -- end

local lang = require("Module:languages").getByCode("ain") return require("Module:IPA").format_IPA_full { lang = lang, items = items } end

function export.convert(frame) -- Get Args local temp = frame.args[1] return do_convert(temp) end

return export