Module:Unicode data/testcases

local tests = require("Module:UnitTests")

local m_str_utils = require("Module:string utilities") local m_Unicode_data = require("Module:Unicode data")

local cp = m_str_utils.codepoint local len = m_str_utils.len local toNFC = mw.ustring.toNFC local u = m_str_utils.char

local function show_whitespace(whitespace) return ' ' .. whitespace:gsub(" ", " ") .. ' ' end

local function show(codepoint) if m_Unicode_data.is_printable(codepoint) then local printed_codepoint = u(codepoint) if toNFC(printed_codepoint) ~= printed_codepoint then printed_codepoint = ("&#x%X;"):format(codepoint) end if m_Unicode_data.is_whitespace(codepoint) then printed_codepoint = show_whitespace(printed_codepoint) end if m_Unicode_data.is_combining(codepoint) then printed_codepoint = "◌" .. printed_codepoint end return ("U+%04X: %s"):format(codepoint, printed_codepoint) else return ("U+%04X"):format(codepoint) end end

local function tag(char, sc) return string.format(' %s ', sc, char) end

local function return_all(...) return ... end

local function test_lookup(what, examples, display) local funcname = "lookup_" .. what local func = m_Unicode_data[funcname] display = display or return_all tests["test_" .. funcname] = function (self) self:iterate(			examples,			function (self, codepoint, result)				self:equals( show(codepoint), display(func(codepoint)), display(result))			end) end end

test_lookup(	"category",	{		{ cp "\t", "Cc" },		{ cp " ", "Zs" },		{ cp "[",  "Ps" },		{ cp "]",  "Pe" },		{ cp "^",  "Sk" },		{ cp "A",  "Lu" },		{ 0x00AD,             "Cf" },		{ cp "¾",  "No" },		{ cp "«",  "Pi" },		{ cp "»",  "Pf" },		{ 0x0300,             "Mn" },		{ 0x0488,             "Me" },		{ cp "٣",  "Nd" },		{ cp "子", "Lo" },		{ cp "ᾮ",  "Lt" },		{ 0x1B44,             "Mc" },		{ cp "∈",  "Sm" },		{ cp "‿",  "Pc" },		{ cp "↹",  "So" },		{ cp "⸗",  "Pd" },		{ cp "Ⅷ", "Nl" },		{ 0x2028,              "Zl" },		{ 0x2029,              "Zp" },		{ cp "ゞ",  "Lm" },		{ 0xD800,              "Cs" },		{ cp "￡",  "Sc" },		{ 0xFFFF,              "Cn" },		{ 0x100000,            "Co" },	})

test_lookup(	"name",	{		{  0x0000, "" },		{   0x007F, "" },		{   0x00C1, "LATIN CAPITAL LETTER A WITH ACUTE" },		{   0x0300, "COMBINING GRAVE ACCENT" },		{   0x0378, "" },		{   0x1B44, "BALINESE ADEG ADEG" },		{   0x1F71, "GREEK SMALL LETTER ALPHA WITH OXIA" },		{   0x3555, "CJK UNIFIED IDEOGRAPH-3555" },		{   0xAC01, "HANGUL SYLLABLE GAG" },		{   0xD5FF, "HANGUL SYLLABLE HEH" },		{   0xDC00, "", },		{   0xEEEE, "" },		{   0xFDD1, "", },		{   0xFFFD, "REPLACEMENT CHARACTER" },		{   0xFFFF, "" },		{  0x1F4A9, "PILE OF POO" },		{  0xE0000, "" },		{  0xF0F0F, "" },		{ 0x10FFFF, "" },	})

test_lookup(	"script",	{		{ cp "[", "Zyyy" },		{ cp "A", "Latn" },		{ cp "一", "Hani" },		{ 0x0300,            "Zinh" },		{ cp "ώ", "Grek" },		{ cp "ὦ", "Grek" },		{ cp "Ж", "Cyrl" },		{ cp "Ѹ", "Cyrl" },		{ cp "ꙑ", "Cyrl" },		{ cp "ა", "Geor" },		{ cp "Ⴀ", "Geor" },		{ cp "ⴀ", "Geor" },		{ cp "!", "Zyyy" },		{ 0x2F82B,           "Hani" },	})

test_lookup(	"block",	{		{  0x0064, "Basic Latin"                      },		{   0x030B, "Combining Diacritical Marks"      },		{   0x03A3, "Greek and Coptic"                 },		{   0x3175, "Hangul Compatibility Jamo"        },		{   0xAC01, "Hangul Syllables"                 },		{ 0x10FFFF, "Supplementary Private Use Area-B" },	})

test_lookup(	"image",	{		{ 0x203D, "Interrobang.svg" },		{ 0x30A2, "Japanese Katakana A.svg" },		{ 0x0B85, "Tamil-alphabet-அஅ.svg" },	},	function (image_title)		return ""	end)

local function test_is(what, examples) local funcname = "is_" .. what local func = m_Unicode_data[funcname] tests["test_" .. funcname] = function (self) self:iterate(			examples,			function (self, codepoint, result)				self:equals( show(codepoint), func(codepoint), result)			end) end end

test_is(	"assigned",	{		{ 0x0061, true  },		{  0x0378, false },		{ 0x40000, false },	})

test_is(	"combining",	{		{ cp "`", false },		{ 0x0300, true },		{ 0x0378, false },		{ 0xDC00, false },	})

test_is(	"printable",	{		{ 0x0000, false },		{ 0x0020, true },		{ 0x0061, true },	})

test_is(	"whitespace",	{		{ 0x0020, true },		{ 0x0061, false },	})

function tests:test_get_block_range local function display_block_range(low, high) if type(low) == "nil" then return "nil" end return ("U+%04X–U+%04X"):format(low, high) end self:iterate(		{			{ "Basic Latin", { 0x0000, 0x007F } },			{ "blah", { nil, nil } },		},		function (self, block_name, block_range)			self:equals( block_name, display_block_range(m_Unicode_data.get_block_range(block_name)), display_block_range(unpack(block_range)))		end) end

function tests:test_is_valid_pagename self:iterate(		{			{ "#", false },			{ " ", false },			{ "word", true },		},		function (self, pagename, validity)			local displayed_pagename			if len(pagename) == 1					and m_Unicode_data.is_whitespace(cp(pagename)) then				displayed_pagename = show_whitespace(pagename)			else				displayed_pagename = pagename			end			self:equals( displayed_pagename, m_Unicode_data.is_valid_pagename(pagename), validity)		end) end

function tests:test_get_entry_title self:iterate(		{			{ cp "#", "Number sign" },			{ cp " ", "Space" },		},		function (self, codepoint, entry_title)			self:equals( show(codepoint), "" .. m_Unicode_data.get_entry_title(codepoint) .. "", "Unsupported titles/" .. entry_title .. "")		end) end

-- Log names of functions that don't have testcases yet. for k, v in pairs(require "Module:Unicode data") do if type(k) == "string" and type(v) == "function" and not tests["test_" .. k] then mw.log(k) end end

for k, v in require "Module:table".sortedPairs(tests) do	if type(k) == "string" then local new_k = k:gsub("^test_(.+)$", "test ") if new_k ~= k then tests[k] = nil tests[new_k] = v		end end end

return tests