Module:vi/sandbox

---Lexicographic tools for Vietnamese language text. local lang = require("Module:languages").getByCode("vi")

local find = mw.ustring.find

local p = {}

---Converts the given text to traditional tone marks. function p.toTraditionalTones(text) if type(text) == "table" then text = text.args[1] end return (mw.ustring.gsub(text, "%a+", function (word) if mw.ustring.match(word, "^qu[yýỳỷỹỵ]$") then return word end return (mw.ustring.gsub(word, "%a%a$", { ["oá"] = "óa", ["oà"] = "òa", ["oả"] = "ỏa", ["oã"] = "õa", ["oạ"] = "ọa", ["oé"] = "óe", ["oè"] = "òe", ["oẻ"] = "ỏe", ["oẽ"] = "õe", ["oẹ"] = "ọe", ["uý"] = "úy", ["uỳ"] = "ùy", ["uỷ"] = "ủy", ["uỹ"] = "ũy", ["uỵ"] = "ụy" }))	end)) end

---Converts the given text to reformed tone marks. function p.toReformedTones(text) if type(text) == "table" then text = text.args[1] end return (mw.ustring.gsub(text, "%a+", function (word) return (mw.ustring.gsub(word, "%a%a$", { ["óa"] = "oá", ["òa"] = "oà", ["ỏa"] = "oả", ["õa"] = "oã", ["ọa"] = "oạ", ["óe"] = "oé", ["òe"] = "oè", ["ỏe"] = "oẻ", ["õe"] = "oẽ", ["ọe"] = "oẹ", ["úy"] = "uý", ["ùy"] = "uỳ", ["ủy"] = "uỷ", ["ũy"] = "uỹ", ["ụy"] = "uỵ" }))	end)) end

---Generate alternative orthographies. function p.allSpellings(main_spelling, makeLinks) local frame = nil if type(main_spelling) == "table" then frame = main_spelling main_spelling, makeLinks = frame.args[1], frame.args.link end local xformers = { p.toTraditionalTones, p.toReformedTones, }	local spellings = {} for i, xformer in ipairs(xformers) do		local alt_spelling = xformer(main_spelling) if not spellings[alt_spelling] then table.insert(spellings, alt_spelling) spellings[alt_spelling] = true end end if makeLinks then local m_links = require("Module:links") -- Module:links for k, link in ipairs(spellings) do			spellings[k] = m_links.full_link({lang = lang, term = link}) end end return frame and table.concat(spellings, "/") or spellings end

---Unicode codepoints for combining Vietnamese tone marks. p.combiningToneMarks = mw.ustring.char(	0x300, -- à	0x301,  -- á	0x303,  -- ã	0x309,  -- ả	0x323   -- ạ )

---Unicode codepoints for combining Vietnamese accent marks. p.combiningAccentMarks = mw.ustring.char(	0x302, -- â	0x306,  -- ă	0x31b   -- ơ )

---Strips Vietnamese diacritical marks from the given text. -- @param tones    Set to “0” to leave tone marks intact. -- @param accents  Set to “0” to leave accent marks intact. -- @param đ        Set to “0” to leave “Đ” and “đ” intact. function p.removeDiacritics(text, toneMarks, accentMarks, stroke) if type(text) == "table" then text, toneMarks, accentMarks, stroke = text.args[1], not text.args.tones or tonumber(text.args.tones) == 1, not text.args.accents or tonumber(text.args.accents) == 1, not text.args["đ"] or tonumber(text.args["đ"]) == 1 end text = mw.ustring.toNFD(text) if toneMarks then text = mw.ustring.gsub(text, "[" .. p.combiningToneMarks .. "]", "") end if accentMarks then text = mw.ustring.gsub(text, "[" .. p.combiningAccentMarks .. "]", "") end if stroke then text = mw.ustring.gsub(text, "[Đđ]", {["Đ"] = "D", ["đ"] = "d"}) end return mw.ustring.toNFC(text) end

---Vietnamese letters for use in comp. p.letters = "aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ"

---Compare two syllables according to Vietnamese dictionary sorting order. function p.compWord(word1, word2) if mw.ustring.find(word1, word2, 1, true) == 0 then return false end if mw.ustring.find(word2, word1, 1, true) == 0 then return true end do local func1, static1, var1 = mw.ustring.gmatch(word1, "[" .. p.letters .. "]") local func2, static2, var2 = mw.ustring.gmatch(word2, "[" .. p.letters .. "]") while true do			local c1 = func1(static1, var1) local c2 = func2(static2, var2) if c1 == nil or c2 == nil then break end local idx1 = mw.ustring.find(p.letters, c1, 1, true) local idx2 = mw.ustring.find(p.letters, c2, 1, true) if idx1 and idx2 then if idx1 < idx2 then return true end if idx1 > idx2 then return false end end end end return word1 < word2 end

---Compare two strings according to Vietnamese dictionary sorting order. function p.comp(text1, text2) if text1 == text2 then return false end do local func1, static1, var1 = mw.ustring.gmatch(text1, "%a+") local func2, static2, var2 = mw.ustring.gmatch(text2, "%a+") while true do			local word1 = func1(static1, var1) local word2 = func2(static2, var2) if word1 == nil then return true end if word2 == nil then return false end if word1 ~= word2 then local lower1 = mw.ustring.lower(word1) local lower2 = mw.ustring.lower(word2) local noTones1 = p.removeDiacritics(lower1, true, false, false) local noTones2 = p.removeDiacritics(lower2, true, false, false) -- Compare base letters. if noTones1 ~= noTones2 then return p.compWord(noTones1, noTones2) end -- Compare letters case-insensitively. if lower1 ~= lower2 then return p.compWord(lower1, lower2) end -- Compare letters including tones. assert(word1 ~= word2) return p.compWord(word1, word2) end end end return text1 < text2 end

-- pruby variable for phien thiet hyperlinks (used by p.readings and p.ruby) local pruby = {}

---Abbreviations and text for Han tu references (used by p.createRefTag) ---Beer parlour/2018/December p.refAbbreviations = { tdcndg = "Nguyễn (2014)", tdcntd = "Nguyễn et al. (2009)", gdhn = "Trần (2004)", btcn = "Hồ (1976)", bonet = "Bonet (1899)", genibrel = "Génibrel (1898)", taberd = "Taberd & Pigneau de Béhaine (1838)", }

---Creates a ref tag containing Template:vi-ref. ---Expands abbreviations using p.refAbbreviations. function p.createRefTag(ref) local refFullName = p.refAbbreviations[ref] or ref return mw.getCurrentFrame:extensionTag{ name = "ref", args = { name = ref, },		content = mw.ustring.format("", refFullName), } end

---Template:vi-readings function p.readings(hanviet, nom, rs, phienthiet, reading) local pagename = mw.title.getCurrentTitle.text if type(hanviet) == "table" then local args = hanviet:getParent.args hanviet, nom, rs, phienthiet, reading = args.hanviet or args.hv, args.nom or args.n, args.rs or args.sort, args.phienthiet or args.phth or args.fanqie, args.reading or args.readings end local lines = {} local styles = { {			link = "Hán Việt", cat = "Vietnamese Han tu", list = hanviet and mw.text.split(hanviet, "%s*,%s*"), phienthiet = phienthiet and mw.text.split(phienthiet, "%s*,%s*") },		{			link = "chữ Nôm|Nôm", cat = "Vietnamese Nom", list = nom and mw.text.split(nom, "%s*,%s*"), },		{			link = "Hán Nôm", cat = "Vietnamese Han characters with unconfirmed readings", list = reading and mw.text.split(reading, "%s*,%s*") },	}	for i, style in ipairs(styles) do		if style.list and #style.list > 0 and #style.list[1] > 0 then local readings = style.list --			table.sort(readings, p.comp) for j, reading in ipairs(readings) do

local ref local a, b = mw.ustring.match(reading, "(.-)%s*%-%s*(.+)") if a then reading, ref = a, b				end

local spellings = p.allSpellings(reading, true) readings[j] = table.concat(spellings, "/") -- Linking of "切" to "fanqie" for English explanation if style.phienthiet and style.phienthiet[j] then pruby = "link" local ruby = p.ruby(mw.ustring.match(mw.text.trim(style.phienthiet[j]), "(%a+) +(.+)")) pruby = {} if ruby then pruby = "nocolor" local suffix = p.ruby("切", "thiết") pruby = {} readings[j] = mw.ustring.format("%s (%s%s)",							readings[j], ruby, suffix) end end

-- References if ref then for ref in mw.text.gsplit(ref, "%s*;%s*") do readings[j] = readings[j] .. p.createRefTag(ref) end end end if #readings > 0 then local sortkey = rs or mw.title.getCurrentTitle.text readings = table.concat(readings, ", ") table.insert(lines, mw.ustring.format(" %s : %s readings: %s  ", pagename, style.link, readings, style.cat, sortkey)) end end end return table.concat(lines, "\n") end

---Template:vi-ruby function p.ruby(characters, readings, mark, alts) if type(characters) == "table" then local args = characters:getParent.args characters, readings, mark, alts = args[1] or "", args[2] or "", args.mark or mw.title.getCurrentTitle.text, ((args.alts and mw.text.split(args.alts, "%s+")) or				(args.ids and mw.text.split(args.ids, "%s+")) or {}) end if not readings then return characters end readings = mw.text.split(readings, "[^" .. p.letters .. "]+") local result = {} local character_idx = 1 local alt_idx = 1 for character in mw.ustring.gmatch(characters, ".") do		local is_alt = false if character == "*" and alts[alt_idx] then character = alts[alt_idx] is_alt = true alt_idx = alt_idx + 1 end if is_alt or (mw.ustring.match(character, "^%a$") and not character:match("^%w$")) then local reading = readings[character_idx] if mark and character == mark then character = mw.ustring.format(" %s ", character) reading = mw.ustring.format(" %s ", reading) end if pruby == 'link' then character = mw.ustring.format(					"  %s ( %s ) ",					character, character, reading, reading) end if pruby == 'nocolor' then character = mw.ustring.format(					"  %s ( %s ) ",					character, reading) end if pruby ~= 'link' and pruby ~= 'nocolor' then character = mw.ustring.format(					" <rb> %s </rb><rp>(</rp><rt> %s </rt><rp>)</rp> ",					character, reading) end character_idx = character_idx + 1 end table.insert(result, character) end return mw.ustring.format(" %s ", table.concat(result)) end

function p.hantutab local hantu = mw.ustring.gsub(mw.title.getCurrentTitle.text, '[^一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮯯𰀀-𱍏]', '') local table_head = ' ' end

---Returns the categories indicated by the given wikitext. function p.classifierCategories(frame) local src = frame.args[1] local classifiers = {} for classifier in mw.ustring.gmatch(mw.ustring.gsub(src, "<[^>]->", ""), "[" .. p.letters .. "]+") do		if classifier ~= "l" and classifier ~= "vi" and classifier ~= "vi-l" and classifier ~= "Vietnamese" then local cat = mw.ustring.format("",				classifier) table.insert(classifiers, cat) end end return table.concat(classifiers) end

function p.new(frame) local title = mw.title.getCurrentTitle.subpageText local args = frame:getParent.args local pos = args[1] or "" local def = args[2] or "" local pos2 = args[3] or (args[4] and "" or false) local def2 = args[4] or "" local pos3 = args[5] or (args[6] and "" or false) local def3 = args[6] or "" local etym = args["e"] or false local head = args["head"] or false local cat = args["cat"] or false local reg = args["reg"] or false local cls = args["cls"] or false local rdp = args["rdp"] or false local nom = args["nom"] or false local pic = args["pic"] or false local picc = args["picc"] or false nom = nom and mw.ustring.gsub(nom, "(.)", "%1, ") or false nom = nom and mw.ustring.gsub(nom, ", $", "") or false if args["h"] then etym = "." end if not etym and mw.ustring.match(title, " ") then etym = "🇰🇲." end if etym == "-" then etym = false end if etym then etym = mw.ustring.gsub(etym, "^%<", "From") end local result = "" local function genTitle(text) local pos_title = { [""] = "Noun", ["n"] = "Noun", ["pn"] = "Proper noun", ["propn"] = "Proper noun", ["pron"] = "Pronoun", ["v"] = "Verb", ["vf"] = "Verb", ["a"] = "Adjective", ["adj"] = "Adjective", ["adv"] = "Adverb", ["prep"] = "Preposition", ["postp"] = "Postposition", ["conj"] = "Conjunction", ["part"] = "Particle", ["suf"] = "Suffix", ["prov"] = "Proverb", ["id"] = "Idiom", ["ph"] = "Phrase", ["intj"] = "Interjection", ["interj"] = "Interjection", ["cl"] = "Classifier", ["cls"] = "Classifier", ["num"] = "Numeral", ["abb"] = "Abbreviation", ["deter"] = "Determiner" };		return pos_title[text] or mw.ustring.upper(sub(text, 1, 1)) .. sub(text, 2, -1) end local function genHead(text) local pos_head = { [""] = "noun", ["n"] = "noun", ["pn"] = "proper noun", ["propn"] = "proper noun", ["v"] = "verb", ["vf"] = "verb form", ["a"] = "adj", ["postp"] = "post", ["conj"] = "conj", ["part"] = "particle", ["pron"] = "pronoun", ["prov"] = "proverb", ["id"] = "idiom", ["ph"] = "phrase", ["intj"] = "interj", ["abb"] = "abbr", ["cl"] = "classifier", ["deter"] = "det" };		return pos_head[text] or text end local function other(class, title, args) local code = "" if class == "der" and args[class] then code = code .. "\n\n===" .. title .. "===\n" elseif args[class] then code = code .. "\n\n===" .. title .. "===\n* " i = 2 while args[class .. i] do code = code .. "\n* " i = i + 1 end end return code end result = result .. "==Vietnamese==" if args["wp"] then result = result .. "\n" end if pic then result = result .. "\n" end result = result .. other("alt", "Alternative forms", args) if etym then result = result .. "\n\n===Etymology===\n" .. etym end result = result .. "\n\n===Pronunciation===\n" result = result .. "\n\n===" .. genTitle(pos) .. "===\n\n\n# " .. def result = result .. other("syn", "=Synonyms=", args) result = result .. other("ant", "=Antonyms=", args) result = result .. other("der", "=Derived terms=", args) result = result .. other("also", "=See also=", args) if pos2 then result = result .. "\n\n===" .. genTitle(pos2) .. "===\n\n\n# " .. def2 end if pos3 then result = result .. "\n\n===" .. genTitle(pos3) .. "===\n\n\n# " .. def3 end if cat then result = result .. "\n\n" end return result end

function p.new_der(frame) local title = mw.title.getCurrentTitle.subpageText local data_module = require("Module:vi/vocab-list") local args = frame:getParent.args local result = {} for _, arg in ipairs(args) do		table.insert(result, arg) end for _, word in ipairs(data_module) do if find(word, title) and word ~= title and not find(word, title .. "[^ ]") and not find(word, "[^ ]" .. title) then table.insert(result, word) end end local hash, res = {}, {} for _, element in ipairs(result) do		if not hash[element] then res[#res + 1] = element hash[element] = true end end local vi_sort_module = require("Module:vi-sortkey") local makeSortKey = require("Module:fun").memoize(vi_sort_module.makeSortKey) table.sort(res, function(term1, term2) return makeSortKey(term1) < makeSortKey(term2) end) return "" end

function p.derived(frame) local tu_lay_note = "<span style=\"padding-left:4px; padding-right:4px\"> <span style=\"background:#ffffe0\">(từ láy) " local m_columns = require("Module:columns") local lang = require("Module:languages").getByCode("vi") local m_links = require("Module:links") local args = frame:getParent.args local pagename = mw.title.getCurrentTitle.text local result = {} local length = 0 unfold = args["unfold"] and true or false title = args["title"] or false title_text = title or "Derived terms"

for i, word in ipairs(args) do		word, is_tu_lay = mw.ustring.gsub(word, "%:tl", "") tu_lay = is_tu_lay > 0 and tu_lay_note or "" local word_parts = mw.text.split(mw.ustring.gsub(word, "\n", "" ), ":") table.insert(result, m_links.full_link({ lang = lang, term = word_parts[1], gloss = word_parts[2] or nil }) ..		tu_lay) length = math.max(mw.ustring.len(word), length) end return m_columns.create_table(			(length > 15 and 2 or 3), 			result, 			1, 			"#F5F5FF",			((unfold or #result < 7) and false or true), 			"Derived terms",			title_text, 			nil, 			nil,			lang		) end

return p