Module:character list

local m_unicode = require("Module:Unicode data") local m_uni_alias = mw.loadData("Module:Unicode data/aliases") local general_category_data = mw.loadData("Module:Unicode data/category") local script_data = mw.loadData("Module:Unicode data/scripts") local Array = require("Module:array")

local char_to_script = require("Module:scripts").charToScript local u = require("Module:string utilities").char

local general_category_aliases = general_category_data.long_names

local export = {}

local Unicode_version = "15.0"

local function get_data_for_code_point_range(block_start, block_end, filterer) local cps = {} for cp = block_start, block_end do		if not filterer or filterer(cp) then local data = {} data.aliases = m_uni_alias[cp] for _, item in ipairs { "name", "script", "category", "image" } do data[item] = m_unicode["lookup_" .. item](cp) end data.cp = cp			table.insert(cps, data) end end return cps end

-- Copied from Module:Unicode data. local function binary_range_search(codepoint, ranges) local low, mid, high low, high = 1, ranges.length or require "Module:table".length(ranges) while low <= high do		mid = math.floor((low + high) / 2) local range = ranges[mid] if codepoint < range[1] then high = mid - 1 elseif codepoint <= range[2] then return range, mid else low = mid + 1 end end return nil, mid end

-- The data_module argument must be a table -- with the fields "ranges" and "singles". -- If all code points in the inclusive range between start_code_point and -- end_code_point have the same property value in the data module, or no value, -- return this value. local function get_shared_value(start_code_point, end_code_point, data_module) local code_point = start_code_point local previous_value local singles, ranges = data_module.singles, data_module.ranges while code_point <= end_code_point do		local singles_value = singles[code_point] if singles_value then if previous_value then if singles_value ~= previous_value then return nil end else previous_value = singles_value end code_point = code_point + 1 else local range = binary_range_search(code_point, ranges) if range then if previous_value then if range[3] ~= previous_value then return nil end else previous_value = range[3] end code_point = range[2] + 1 else code_point = code_point + 1 end end end return previous_value end

function export.show_header(frame) local block_name = frame.args.block local names = {} local i, block_start, block_end for j, name, start, ending in m_unicode.enum_blocks do		names[j] = name if block_name == name then i, block_start, block_end = j, start, ending end end

if not i then error('Invalid Unicode block name: ' .. block_name) end local function appendix_link(block_name, left_arrow) return block_name and ("%s %s") :format(					block_name,					left_arrow and "⟵" or block_name,					left_arrow and block_name or "⟶") or "" end local general_category = get_shared_value(block_start, block_end, general_category_data) local script = get_shared_value(block_start, block_end, script_data) local text = (		'{| style="width: 100%%;"\n' ..		' | style="width: 30%%; text-align: left;"  | %s\n' ..		' | style="text-align: center;" | %s \n' ..		' | style="width: 30%%; text-align: right;"  | %s\n' ..		' |}\nThis page lists the characters in the “%s” block of the Unicode standard, version ' ..		Unicode_version .. '. This block covers code points from U+%04X to U+%04X.\n' .. 	 ..	):format(			appendix_link(names[i - 1], true),			names[i],			appendix_link(names[i + 1], false),			block_start, names[i], block_start, block_end,			names[i],			names[i]		) if general_category or script then text = text .. (' All assigned characters in this block ') local items = {} if general_category then table.insert(items, ('belong to the General Category %s (%s).')			:format(general_category, general_category_aliases[general_category]:gsub('_', ' '))) end if script then local alias = script_data.aliases[script] local Wikipedia_article local script_obj = require "Module:scripts".getByCode(script) if script_obj then Wikipedia_article = script_obj:getWikipediaArticle else Wikipedia_article = alias .. ' script' end table.insert(items, ('have the Script value %s (%s)')				:format(script, Wikipedia_article, alias)) end text = text .. table.concat(items, " and ") .. "."	end return text end

function export.show_blocks(frame) local result = {} local start_codepoint, end_codepoint = tonumber(frame.args[1]), tonumber(frame.args[2]) table.insert(result, '{| class="wikitable" style="width: 100%;"\n! width="10%;" | Start\n! width="10%;" | End\n ! Block name\n') for i, name, block_start, block_end in m_unicode.enum_blocks do		if (block_start >= start_codepoint) and (block_end <= end_codepoint) then table.insert(result, ( '|-\n|U+%04X\n|U+%04X\n|%s\n' ):format(block_start, block_end, name, name)) end end table.insert(result, "|}") return table.concat(result) end

function export.show(frame) local result = {} local args = frame.args local block_start, block_end if args.block then block_start, block_end = m_unicode.get_block_range(args.block) if not block_start then error("Invalid Unicode block specified") end elseif args[1] and args[2] then block_start, block_end = tonumber(args[1]), tonumber(args[2]) if not (block_start and block_end) then error("Invalid number specified") end else error("Must give a Unicode block or character range") end local function present_codepoint(codepoint) if not m_unicode.is_printable(codepoint) then local character = u(codepoint) local text = ' (unprintable) ' if mw.title.new(character) then return "" .. text .. "" else return text end end local link_target = m_unicode.get_entry_title(codepoint)

local display = ("%s&#%u;"):format(m_unicode.is_combining(codepoint) and "&#x25cc;" or "", codepoint) if m_unicode.is_whitespace(codepoint) then display = "]" .. display .. "["			end

return (link_target and ' %s '			or ' %s '):format(				link_target or "", char_to_script(codepoint), display			) end local cps = get_data_for_code_point_range(block_start, block_end, m_unicode.is_assigned) local headers = {} local image_module = ("Module:Unicode data/images/%03X"):format(math.floor(block_start / 0x1000)) table.insert(result, [=[ {| class="wikitable sortable" ! width="12%" data-sort-type="number" | Code point ! width="5%" | Image edit ! width="5%"  | Character ]=]	) local all_with_same_general_category = Array(cps) :all(function(data) return data.category == cps[1].category end) local all_with_same_script = Array(cps) :all(function(data) return data.script == cps[1].script end) if not all_with_same_general_category then table.insert(result, ' ! General Category\n') end if not all_with_same_script then table.insert(result, ' ! Script\n') end table.insert(result, ' ! Name\n') for _, data in ipairs(cps) do		local alt_names = "" local cp = data.cp		if data.aliases then local aliases = { ["correction" ] = {}, ["control"    ] = {}, ["alternate"  ] = {}, ["figment"    ] = {}, ["abbreviation"] = {}, }			for _, info in ipairs(data.aliases) do table.insert(aliases[info[1]], " " .. info[2] .. " ") end for i, name in ipairs(aliases.alternate) do alt_names = alt_names .. (' aka %s'):format(name) end

if #aliases.control > 0 then alt_names = alt_names .. '; control character name: ' .. table.concat(aliases.control, " or ") end

for i, name in ipairs(aliases.correction) do alt_names = alt_names .. (' Corrected name: %s'):format(name) end for i, name in ipairs(aliases.figment) do alt_names = alt_names .. (' Figment name: %s'):format(name) end

if #aliases.abbreviation > 0 then alt_names = alt_names .. ' (' .. table.concat(aliases.abbreviation, ", ") .. ')' end end

local current_image if data.image then current_image = (''):format(data.image) else current_image = '' end table.insert(result, ( ' |- id="U-%04X"\n' .. ' | data-sort-value="%u" | U+%04X (%u) \n' .. ' | %s \n' .. ' | %s \n' ):format( cp, cp, cp, cp, current_image, present_codepoint(cp), data.category .. ' (' ..				general_category_aliases[data.category]:gsub('_', ' ') ..				')' ))		if not all_with_same_general_category then table.insert(result, (' | %s (%s) \n')				:format( data.category, general_category_aliases[data.category]:gsub('_', ' '))) end if not all_with_same_script then table.insert(result, (' | %s (%s) \n')				:format( data.script, script_data.aliases[data.script])) end table.insert(result, (' | %s %s\n')			:format( mw.text.nowiki(data.name), alt_names)) end table.insert(result,		' |}'	) table.insert(result, require("Module:TemplateStyles")("Template:character info/style.css"))

return table.concat(result) end

return export