Module:User:Kiril kovachev/tok-hyph

-- Primary module authorship: Chernorizets (original Bulgarian syllabification code) -- Port to Lua: Kiril Kovachev -- Adaptation to Toki Pona: Kiril Kovachev -- 17 April 2024.

local export = {}

local substring = mw.ustring.sub local rsubn = mw.ustring.gsub local rsplit = mw.text.split local U = mw.ustring.char local lang = require("Module:languages").getByCode("tok") local script = require("Module:scripts").getByCode("Latn")

local hvowels_c = "[aioeu]"

local HYPH = U(0x2027) local BREAK_MARKER = "."

-- version of rsubn that discards all but the first return value local function rsub(term, foo, bar) local retval = rsubn(term, foo, bar) return retval end

local function char_at(str, index) return substring(str, index, index) end

local function count_vowels(word) local _, vowel_count = mw.ustring.gsub(word, hvowels_c, "") return vowel_count end

local function is_vowel(ch) for _, v in pairs{"a", "e", "i", "o", "u"} do		if v == ch then return true end end return false end

Main syllabification code -- word: the word being scanned -- left/right vowels: integers local function find_next_syllable_onset(word, left_vowel, right_vowel) local n_cons = right_vowel - left_vowel - 1

-- No consonants - syllable starts on rightVowel if n_cons == 0 then return right_vowel end

-- Single consonant between two vowels - starts a syllable if n_cons == 1 then return left_vowel + 1 end

-- Two ("or more") consonants between the vowels. -- In Toki Pona, the phonotactics only allow this if the first syllable -- ends in a nasal and the second begins in a consonant, so there can only -- ever be two consonants, and the sonority break occurs between the two -- consonants (so just add 2 to the left vowel). local sonority_break = left_vowel + 2

return sonority_break end

-- Returns a table of strings (list) local function syllabify_poly(word) local syllables = {}

local prev_vowel = -1 local prev_onset = 1; for i = 1, mw.ustring.len(word) do	   if is_vowel(mw.ustring.lower(char_at(word, i))) then -- A vowel, yay! local should_skip = false if prev_vowel == -1 then prev_vowel = i	           should_skip = true; end

-- This is not the first vowel we've seen. In-between -- the previous vowel and this one, there is a syllable -- break, and the first character after the break starts -- a new syllable. if not should_skip then local next_onset = find_next_syllable_onset(word, prev_vowel, i)		       table.insert(syllables, substring(word, prev_onset, next_onset - 1)) prev_vowel = i		       prev_onset = next_onset end end end

-- Add the last syllable table.insert(syllables, substring(word, prev_onset))

return syllables end

function export.syllabify_word(word) if mw.ustring.len(word) == 0 then return {} end;

local n_vowels = count_vowels(word) local syllables = n_vowels <= 1 and {word} or syllabify_poly(word)

return table.concat(syllables, HYPH) end

function export.syllabify(term) local words = rsplit(term, " ")

local out = {} for _, word in pairs(words) do		table.insert(out, export.syllabify_word(word)) end return table.concat(out, " ") end

function export.show_syllabification(frame) local params = { [1] = {},	}	local title = mw.title.getCurrentTitle local args = require("Module:parameters").process(frame:getParent.args, params) local term = args[1] or title.nsText == "Template" and "sitelen" or title.text

local syllabification = export.syllabify(term) local syllables = rsplit(syllabification, HYPH) return require("Module:hyphenation").format_hyphenations(		{ 			lang = lang,			hyphs = { { hyph = syllables } },			sc = script,			caption = "Syllabification",		}	) end

return export