Module:za-pron

local export = {}

local m_str_utils = require("Module:string utilities")

local find = m_str_utils.find local gmatch = m_str_utils.gmatch local gsub = m_str_utils.gsub local lower = m_str_utils.lower local match = m_str_utils.match local reverse = m_str_utils.reverse local upper = m_str_utils.upper

local lang = require("Module:languages").getByCode("za")

-- FIXME: needs rewrite [3 February 2020 (UTC)] -- FIXME: 老壯文 seems to omit marks tones from new Mandarin borrowings (新壮文)

-- https://en.wikipedia.org/wiki/Standard_Zhuang -- https://baike.baidu.com/item/壮语/7703463 -- 在线学壮文 https://web.archive.org/web/0/http://www.gxmyw.com.cn/plus/list.php?tid=21 -- 基础壮文学习系列：壮文标点符号与书写规则 https://web.archive.org/web/0/http://www.gxmyw.com.cn/wsxzw/2013/1017/57.html

local initialConv = { ['b']  = 'p', ['mb'] = 'ɓ', ['m']  = 'm', ['f']  = 'f', ['v']  = 'β', ['by'] = 'pʲ', ['my'] = 'mʲ',

['d']  = 't', ['nd'] = 'ɗ', ['n']  = 'n', ['l']  = 'l', ['s']  = 'θ',

['ny'] = 'ɲ', ['c']  = 'ɕ', ['y']  = 'j',

['g']  = 'k', ['ng'] = 'ŋ', ['r']  = 'ɣ', ['gy'] = 'kʲ', ['ngv'] = 'ŋʷ', ['gv'] = 'kʷ',

['']   = 'ʔ', ['h']  = 'h', }	-- [bmfvdnslghrcy]?[gbd]?[vy]?

local vowelConv = { ['a']  = { alone = 'a', wfinal = 'aː' }, ['e']  = { alone = 'e', wfinal = 'eː' }, ['i']  = { alone = 'i', wfinal = 'i' }, ['o']  = { alone = 'o', wfinal = 'oː' }, ['u']  = { alone = 'u', wfinal = 'u' }, ['w']  = { alone = 'ɯ', wfinal = 'ɯ' },

['ai'] = { alone = 'aːi', wfinal = false }, ['ei'] = { alone = 'ei', wfinal = false }, ['oi'] = { alone = 'oːi', wfinal = false }, ['ui'] = { alone = 'uːi', wfinal = false }, ['wi'] = { alone = 'ɯːi', wfinal = false },

['ae'] = { alone = 'ai', wfinal = 'a' }, ['ie'] = { alone = false, wfinal = 'iː' }, ['oe'] = { alone = false, wfinal = 'o' }, ['ue'] = { alone = false, wfinal = 'uː' }, ['we'] = { alone = false, wfinal = 'ɯː' },

['au'] = { alone = 'aːu', wfinal = false }, ['aeu'] = { alone = 'au', wfinal = false }, ['eu'] = { alone = 'eːu', wfinal = false }, ['iu'] = { alone = 'iu', wfinal = false }, ['ou'] = { alone = 'ou', wfinal = false },

['aw'] = { alone = 'aɯ', wfinal = false }, }	-- [aeiouw][ieu]?[uw]? -- w/ final only: [aeiouw]e? -- cannot be w/ final: ai, ei, oi, ui, wi, au, aeu, eu, iu, ou, aw // [aeiouw]e?[iuw] -- cannot be w/o final: ie, oe, ue // [iou]e

local finalConv = { []  = ,	['m']  = 'm',

['n'] = 'n', ['ng'] = 'ŋ', ['p'] = 'p', ['b'] = 'p', ['t'] = 't', ['d'] = 't', ['k'] = 'k', ['g'] = 'k', }	-- [mnpbtdkg]?g?

local toneConv = { ['1']  = '˨˦', --24	['2']  = '˧˩', --31 z	['3']  = '˥', --55 j	['4']  = '˦˨', --42 x	['5']  = '˧˥', --35 q	['6']  = '˧', --33 h

['7'] = '˥', --55	['7:'] = '˧˥', --35	['8']  = '˧', --33 }

local toneConvToNumbers = { ['']  = '1',	['z']  = '2', ['j'] = '3', ['x'] = '4', ['q'] = '5', ['h'] = '6', }

local toneConvFromNumbers = { ['1'] = '',	['2']  = 'z', ['3'] = 'j', ['4'] = 'x', ['5'] = 'q', ['6'] = 'h',

['7'] = ,	['7:']  = ,	['8']  = '', }

local consonantConv_1957 = { ['mb'] = 'ƃ', ['nd'] = 'ƌ', ['ng'] = 'ŋ', ['ngv'] = 'ŋv', }

local vowelConv_1957 = { ['oe'] = 'ɵ', ['ae'] = 'ə', ['w'] = 'ɯ', }

local toneConv_1957 = { ['1'] = '',	['2'] = 'ƨ', ['3'] = 'з', ['4'] = 'ч', ['5'] = 'ƽ', ['6'] = 'ƅ',

['7'] = ,	['7:']  = ,	['8']  = '', }

local function fix(text) local output = {}

for word in gmatch(text, '\'?[A-Za-z]+[^A-Za-z]*') do		local apostrophe, word, nonword = match(word, '(\'?)([A-Za-z]+)([^A-Za-z]*)')

word = gsub(word, '[zjxq]', toneConvToNumbers) -- excludes h which is ambiguously tone or consonant

-- /CV-CV/...=... -- /CVC-V/...=... -- regex (pattern?) wildcards are greedy from the beginning of the string -- so counteract this by reversing the string -- so if we look for "([CVC])" it will first match what was originally the last CVC sequence -- (or something) word = reverse(word) word = '|' .. gsub(word, '(g?[mnpbtdkg]?)([ieu]?[uw]?[aeiouwAEIUOUW]+)([vy]?[gbd]?[bmfvdnslghrcyBMFVDNSLGHRCY]?)', '%1%2%3|') -- "+" seems to be needed after "[aiueow]" -- correct: "daeuz"→"daeuz" wrong: "daeuz"→"da|euz" word = reverse(word) mw.log('za1＞' .. word)

-- fix bad initial consonant: "|hya"→"h|ya", "|ngya"→"n|gya" word = gsub(word, '(|)([^aiueow])([^aiueow])([^aiueow]?)([aiueow])', function(x,a,b,c,d)			if not initialConv[lower(a..b..c)] then				return a..x..b..c..d			end		end) word = gsub(word, '([aiueow]+)([mnpbtdkg]g?)(|)', function(v,c,x)			-- if there is a final consonant,			if c ~= '' then				-- and vowel sequence is not a sequence that only appears before finals,				if not match(v, '^[aeiouw]e?$') then					-- detect valid ...VC sequence at end of string					return reverse(gsub(reverse(v..c..x), '(|)([^aiueow]+)(e?[aeiouw])', '%1%2%3|'))				end			end		end) word = gsub(word, '|gvu', 'g|vu') mw.log('za2＞' .. word)

word = gsub(word, 'h|', '6|') word = gsub(word, '([A-Za-z]+)|', function(a)			if match(a, '[ptk]$') then				return a..'7|'			elseif match(a, '[bdg]$') and not match(a, 'ng$') then				return a..'8|'			else				return a..'1|'			end		end) mw.log('za3＞' .. word)

table.insert(output, apostrophe .. gsub(word, '|', '') .. nonword) end

return table.concat(output) end

function export.convert(text, scheme, new_bor) if type(text) == "table" then text, scheme, new_bor = text.args[1], text.args[2], text.args['new_bor'] end local converted = {}

local extra_pre = match(text, '^[^A-Za-z]*')

text = fix(text)

mw.log('za4＞' .. text)

for syllable in gmatch(text, '[A-Za-z]+%d[^A-Za-z]*') do		local initial, vowel, final, tone, extra = match(syllable, '^([BMFVDNSLGHRCYbmfvdnslghrcy]?[gbd]?[vy]?)([AEIOUWaeiouw][ieu]?[uw]?)([mnpbtdkg]?g?)(%d)([^A-Za-z]*)$') local caps = false mw.log('za5＞' .. initial, vowel, final, tone, extra)

if find(initial .. vowel .. final, '[A-Z]') then caps = true initial, vowel, final = lower(initial), lower(vowel), lower(final) end

if scheme == 'IPA' then initial = initialConv[initial] vowel = final == '' and vowelConv[vowel].alone or vowelConv[vowel].wfinal final = finalConv[final] if tone == '7' and find(vowel, 'ː') then tone = '7:' elseif new_bor and tone == '1' then tone = '5' end

tone = toneConv[tone]

syllable = initial .. vowel .. final .. tone

table.insert(converted, syllable) elseif scheme == 'old' then initial = consonantConv_1957[initial] or initial vowel = gsub(vowel, '[oa]e', vowelConv_1957) vowel = gsub(vowel, 'w', vowelConv_1957) final = consonantConv_1957[final] or final tone = toneConv_1957[tone]

if vowel == 'ə' and final == '' then vowel = 'əi' elseif vowel == 'aɯ' and final == '' then vowel = 'əɯ' end

syllable = initial .. vowel .. final .. tone .. extra if caps then syllable = gsub(syllable, '^(.)', upper) end

table.insert(converted, syllable) elseif scheme == 'hyphenation' then tone = toneConvFromNumbers[tone]

extra = gsub(extra, '\, ) syllable = initial .. vowel .. final .. tone .. extra if caps then syllable = gsub(syllable, '^(.)', upper) end

table.insert(converted, syllable) elseif scheme == 'tone_numbers' then if new_bor and tone == '1' then tone = '5' end

extra = gsub(extra, '\, ) syllable = initial .. vowel .. final .. '' .. tone .. '' .. extra if caps then syllable = gsub(syllable, '^(.)', upper) end

table.insert(converted, syllable) elseif scheme == 'raw_syllables' then table.insert(converted, syllable) else error('Convert to what representation?') end end

if scheme == 'IPA' then converted = '/' .. table.concat(converted, ' ') .. '/'	elseif scheme == 'old' then converted = extra_pre .. table.concat(converted, '') converted = mw.ustring.gsub(mw.ustring.gsub(converted, "([6Ƅƅ])'", "%1"), "([6Ƅƅ])&#39;", "%1") elseif scheme == 'hyphenation' then converted = gsub(extra_pre .. table.concat(converted, '‧'), ' ', '') elseif scheme == 'tone_numbers' then converted = extra_pre .. table.concat(converted, '') elseif scheme == 'raw_syllables' then -- (pass) end

return converted end

function export.show(frame) local params = { [1] = { },		['new_bor'] = { type = "boolean" }, }	local args = require("Module:parameters").process(frame:getParent.args, params)

local text, new_bor = args[1], args['new_bor'] if not text then text = mw.title.getCurrentTitle.text end

local ret = {}

table.insert(		ret,		require("Module:accent qualifier").format_qualifiers(lang, {"Standard Zhuang"}) ..		" " ..		require("Module:IPA").format_IPA_full {			lang = lang,			items = 		}	)

table.insert(		ret,		'Tone numbers: ' ..		export.convert(text, 'tone_numbers', new_bor)	)

table.insert(		ret,		'Hyphenation: ' ..		export.convert(text, 'hyphenation', new_bor) ..		''	)

return table.concat(ret, '\n* ') end

function export.is_latin(frame) local text = frame.args[1] if find(text, '[ƂƃƋƌŊŋƏəƟɵƜɯƧƨЗзЧчƼƽƄƅ]') then return '' elseif find(text, '[A-Za-z]') then return 'y'	else return '' -- CJK is too much of a pain to detect end end

return export