Module:Hani-sortkey/data/core

local u = mw.ustring.char

local data = {}

-- Codepoint ranges (start, end). -- Note: does not use subtables to save memory. data.ranges = { 0x3007, 0x3007, 0x3400, 0x4DBF, 0x4E00, 0x9FFF, 0xF900, 0xFA6D, 0xFA70, 0xFAD9, 0x20000, 0x2A6DF, 0x2A700, 0x2B739, 0x2B740, 0x2B81D, 0x2B820, 0x2CEA1, 0x2CEB0, 0x2EBE0, 0x2EBF0, 0x2EE5D, 0x2F800, 0x2FA1D, 0x30000, 0x3134A, 0x31350, 0x323AF } data.ranges.n = #data.ranges

-- Characters not included in Unicode, which must be described using IDS. data.unsupported = { ["⿰丿丨"] = "丿01", ["⿱𠆢𬼽"] = "人03", ["⿱⿻丅⿱冖⿰丶丶双"] = "冖08", ["⿱⿻丅⿱冖⿰丶丶令"] = "冖09", ["⿰十⿺专丶"] = "十05", ["⿰土肅"] = "土13", ["⿰⿸声耳殳"] = "士14", ["⿻㇒夫"] = "大02", ["⿰女人"] = "女02", ["⿱女子"] = "女03", ["⿱𡩧⿺進⿰貝招"] = "宀37", ["⿰扌幸"] = "手08", ["⿰扌𦍒"] = "手09", ["⿰车匡"] = "手10車06", ["⿱犬一"] = "犬01", ["⿰男也"] = "田05", ["⿰纟恋"] = "糹10", ["⿱䒑合"] = "艸06", ["⿰⿳⿰SIR木阝"] = "邑11", ["⿳⻗人𰆊"] = "雨04", ["⿱成龙"] = "龍06", ["⿱成龍"] = "龍06", }

-- IDS characters paired to the number of characters which must follow them. data.ids = { ["⿰"] = 2,	-- left-to-right ["⿱"] = 2,	-- above-to-below ["⿲"] = 3,	-- left-to-middle and right ["⿳"] = 3,	-- above-to-middle and below ["⿴"] = 2,	-- full surround ["⿵"] = 2,	-- surround from above ["⿶"] = 2,	-- surround from below ["⿷"] = 2,	-- surround from left ["⿸"] = 2,	-- surround from upper left ["⿹"] = 2,	-- surround from upper right ["⿺"] = 2,	-- surround from lower left ["⿻"] = 2,	-- overlaid ["⿼"] = 2,	-- surround from right ["⿽"] = 2,	-- surround from lower right ["⿾"] = 1,	-- horizontal reflection ["⿿"] = 1,	-- rotation ["㇯"] = 1	-- subtraction }

data.preconvert = { -- Enclosed CJK Letters and Months ["㈠"] = "一", ["㈡"] = "二", ["㈢"] = "三", ["㈣"] = "四", ["㈤"] = "五", ["㈥"] = "六", ["㈦"] = "七", ["㈧"] = "八", ["㈨"] = "九", ["㈩"] = "十", ["㈪"] = "月", ["㈫"] = "火", ["㈬"] = "水", ["㈭"] = "木", ["㈮"] = "金", ["㈯"] = "土", ["㈰"] = "日", ["㈱"] = "株", ["㈲"] = "有", ["㈳"] = "社", ["㈴"] = "名", ["㈵"] = "特", ["㈶"] = "財", ["㈷"] = "祝", ["㈸"] = "労", ["㈹"] = "代", ["㈺"] = "呼", ["㈻"] = "学", ["㈼"] = "監", ["㈽"] = "企", ["㈾"] = "資", ["㈿"] = "協", ["㉀"] = "祭", ["㉁"] = "休", ["㉂"] = "自", ["㉃"] = "至", ["㉄"] = "問", ["㉅"] = "幼", ["㉆"] = "文", ["㉇"] = "箏", ["㊀"] = "一", ["㊁"] = "二", ["㊂"] = "三", ["㊃"] = "四", ["㊄"] = "五", ["㊅"] = "六", ["㊆"] = "七", ["㊇"] = "八", ["㊈"] = "九", ["㊉"] = "十", ["㊊"] = "月", ["㊋"] = "火", ["㊌"] = "水", ["㊍"] = "木", ["㊎"] = "金", ["㊏"] = "土", ["㊐"] = "日", ["㊑"] = "株", ["㊒"] = "有", ["㊓"] = "社", ["㊔"] = "名", ["㊕"] = "特", ["㊖"] = "財", ["㊗"] = "祝", ["㊘"] = "労", ["㊙"] = "秘", ["㊚"] = "男", ["㊛"] = "女", ["㊜"] = "適", ["㊝"] = "優", ["㊞"] = "印", ["㊟"] = "注", ["㊠"] = "項", ["㊡"] = "休", ["㊢"] = "写", ["㊣"] = "正", ["㊤"] = "上", ["㊥"] = "中", ["㊦"] = "下", ["㊧"] = "左", ["㊨"] = "右", ["㊩"] = "医", ["㊪"] = "宗", ["㊫"] = "学", ["㊬"] = "監", ["㊭"] = "企", ["㊮"] = "資", ["㊯"] = "協", ["㊰"] = "夜", ["㋿"] = "令和", -- CJK Compatibility ["㍻"] = "平成", ["㍼"] = "昭和", ["㍽"] = "大正", ["㍾"] = "明治", ["㍿"] = "株式会社", -- Enclosed Ideographic Supplement ["🈐"] = "手", ["🈑"] = "字", ["🈒"] = "双", ["🈔"] = "二", ["🈕"] = "多", ["🈖"] = "解", ["🈗"] = "天", ["🈘"] = "交", ["🈙"] = "映", ["🈚"] = "無", ["🈛"] = "料", ["🈜"] = "前", ["🈝"] = "後", ["🈞"] = "再", ["🈟"] = "新", ["🈠"] = "初", ["🈡"] = "終", ["🈢"] = "生", ["🈣"] = "販", ["🈤"] = "声", ["🈥"] = "吹", ["🈦"] = "演", ["🈧"] = "投", ["🈨"] = "捕", ["🈩"] = "一", ["🈪"] = "三", ["🈫"] = "遊", ["🈬"] = "左", ["🈭"] = "中", ["🈮"] = "右", ["🈯"] = "指", ["🈰"] = "走", ["🈱"] = "打", ["🈲"] = "禁", ["🈳"] = "空", ["🈴"] = "合", ["🈵"] = "満", ["🈶"] = "有", ["🈷"] = "月", ["🈸"] = "申", ["🈹"] = "割", ["🈺"] = "営", ["🈻"] = "配", ["🉀"] = "本", ["🉁"] = "三", ["🉂"] = "二", ["🉃"] = "安", ["🉄"] = "点", ["🉅"] = "打", ["🉆"] = "盗", ["🉇"] = "勝", ["🉈"] = "敗", ["🉐"] = "得", ["🉑"] = "可", ["🉠"] = "福", ["🉡"] = "祿", ["🉢"] = "壽", ["🉣"] = "喜", ["🉤"] = "囍", ["🉥"] = "財", }

local function add_sequences(from, to, offset, char) for i = from, to do		local k = u(i) local v = (i - from + offset) .. char data.preconvert[k] = v	end end

add_sequences(0x32C0, 0x32CB, 1, "月") add_sequences(0x3358, 0x3370, 0, "点") add_sequences(0x33E0, 0x33FE, 1, "日")

data.radicals = { "一", "丨", "丶", "丿", "乙", "亅", "二", "亠", "人", "儿", "入", "八", "冂", "冖", "冫", "几", "凵", "刀", "力", "勹", "匕", "匚", "匸", "十", "卜", "卩", "厂", "厶", "又", "口", "囗", "土", "士", "夂", "夊", "夕", "大", "女", "子", "宀", "寸", "小", "尢", "尸", "屮", "山", "巛", "工", "己", "巾", "干", "幺", "广", "廴", "廾", "弋", "弓", "彐", "彡", "彳", "心", "戈", "戶", "手", "支", "攴", "文", "斗", "斤", "方", "无", "日", "曰", "月", "木", "欠", "止", "歹", "殳", "毋", "比", "毛", "氏", "气", "水", "火", "爪", "父", "爻", "爿", "片", "牙", "牛", "犬", "玄", "玉", "瓜", "瓦", "甘", "生", "用", "田", "疋", "疒", "癶", "白", "皮", "皿", "目", "矛", "矢", "石", "示", "禸", "禾", "穴", "立", "竹", "米", "糸", "缶", "网", "羊", "羽", "老", "而", "耒", "耳", "聿", "肉", "臣", "自", "至", "臼", "舌", "舛", "舟", "艮", "色", "艸", "虍", "虫", "血", "行", "衣", "襾", "見", "角", "言", "谷", "豆", "豕", "豸", "貝", "赤", "走", "足", "身", "車", "辛", "辰", "辵", "邑", "酉", "釆", "里", "金", "長", "門", "阜", "隶", "隹", "雨", "靑", "非", "面", "革", "韋", "韭", "音", "頁", "風", "飛", "食", "首", "香", "馬", "骨", "高", "髟", "鬥", "鬯", "鬲", "鬼", "魚", "鳥", "鹵", "鹿", "麥", "麻", "黃", "黍", "黑", "黹", "黽", "鼎", "鼓", "鼠", "鼻", "齊", "齒", "龍", "龜", "龠" }

local function add_radicals(radicals) for k, v in pairs(radicals) do		data.preconvert[k] = data.radicals[v] end end

-- Kangxi radicals add_radicals{ ["⼀"] = 1, ["⼁"] = 2, ["⼂"] = 3, ["⼃"] = 4, ["⼄"] = 5,	["⼅"] = 6, ["⼆"] = 7, ["⼇"] = 8, ["⼈"] = 9, ["⼉"] = 10,	["⼊"] = 11, ["⼋"] = 12, ["⼌"] = 13, ["⼍"] = 14, ["⼎"] = 15,	["⼏"] = 16, ["⼐"] = 17, ["⼑"] = 18, ["⼒"] = 19, ["⼓"] = 20,	["⼔"] = 21, ["⼕"] = 22, ["⼖"] = 23, ["⼗"] = 24, ["⼘"] = 25,	["⼙"] = 26, ["⼚"] = 27, ["⼛"] = 28, ["⼜"] = 29, ["⼝"] = 30,	["⼞"] = 31, ["⼟"] = 32, ["⼠"] = 33, ["⼡"] = 34, ["⼢"] = 35,	["⼣"] = 36, ["⼤"] = 37, ["⼥"] = 38, ["⼦"] = 39, ["⼧"] = 40,	["⼨"] = 41, ["⼩"] = 42, ["⼪"] = 43, ["⼫"] = 44, ["⼬"] = 45,	["⼭"] = 46, ["⼮"] = 47, ["⼯"] = 48, ["⼰"] = 49, ["⼱"] = 50,	["⼲"] = 51, ["⼳"] = 52, ["⼴"] = 53, ["⼵"] = 54, ["⼶"] = 55,	["⼷"] = 56, ["⼸"] = 57, ["⼹"] = 58, ["⼺"] = 59, ["⼻"] = 60,	["⼼"] = 61, ["⼽"] = 62, ["⼾"] = 63, ["⼿"] = 64, ["⽀"] = 65,	["⽁"] = 66, ["⽂"] = 67, ["⽃"] = 68, ["⽄"] = 69, ["⽅"] = 70,	["⽆"] = 71, ["⽇"] = 72, ["⽈"] = 73, ["⽉"] = 74, ["⽊"] = 75,	["⽋"] = 76, ["⽌"] = 77, ["⽍"] = 78, ["⽎"] = 79, ["⽏"] = 80,	["⽐"] = 81, ["⽑"] = 82, ["⽒"] = 83, ["⽓"] = 84, ["⽔"] = 85,	["⽕"] = 86, ["⽖"] = 87, ["⽗"] = 88, ["⽘"] = 89, ["⽙"] = 90,	["⽚"] = 91, ["⽛"] = 92, ["⽜"] = 93, ["⽝"] = 94, ["⽞"] = 95,	["⽟"] = 96, ["⽠"] = 97, ["⽡"] = 98, ["⽢"] = 99, ["⽣"] = 100,	["⽤"] = 101, ["⽥"] = 102, ["⽦"] = 103, ["⽧"] = 104, ["⽨"] = 105,	["⽩"] = 106, ["⽪"] = 107, ["⽫"] = 108, ["⽬"] = 109, ["⽭"] = 110,	["⽮"] = 111, ["⽯"] = 112, ["⽰"] = 113, ["⽱"] = 114, ["⽲"] = 115,	["⽳"] = 116, ["⽴"] = 117, ["⽵"] = 118, ["⽶"] = 119, ["⽷"] = 120,	["⽸"] = 121, ["⽹"] = 122, ["⽺"] = 123, ["⽻"] = 124, ["⽼"] = 125,	["⽽"] = 126, ["⽾"] = 127, ["⽿"] = 128, ["⾀"] = 129, ["⾁"] = 130,	["⾂"] = 131, ["⾃"] = 132, ["⾄"] = 133, ["⾅"] = 134, ["⾆"] = 135,	["⾇"] = 136, ["⾈"] = 137, ["⾉"] = 138, ["⾊"] = 139, ["⾋"] = 140,	["⾌"] = 141, ["⾍"] = 142, ["⾎"] = 143, ["⾏"] = 144, ["⾐"] = 145,	["⾑"] = 146, ["⾒"] = 147, ["⾓"] = 148, ["⾔"] = 149, ["⾕"] = 150,	["⾖"] = 151, ["⾗"] = 152, ["⾘"] = 153, ["⾙"] = 154, ["⾚"] = 155,	["⾛"] = 156, ["⾜"] = 157, ["⾝"] = 158, ["⾞"] = 159, ["⾟"] = 160,	["⾠"] = 161, ["⾡"] = 162, ["⾢"] = 163, ["⾣"] = 164, ["⾤"] = 165,	["⾥"] = 166, ["⾦"] = 167, ["⾧"] = 168, ["⾨"] = 169, ["⾩"] = 170,	["⾪"] = 171, ["⾫"] = 172, ["⾬"] = 173, ["⾭"] = 174, ["⾮"] = 175,	["⾯"] = 176, ["⾰"] = 177, ["⾱"] = 178, ["⾲"] = 179, ["⾳"] = 180,	["⾴"] = 181, ["⾵"] = 182, ["⾶"] = 183, ["⾷"] = 184, ["⾸"] = 185,	["⾹"] = 186, ["⾺"] = 187, ["⾻"] = 188, ["⾼"] = 189, ["⾽"] = 190,	["⾾"] = 191, ["⾿"] = 192, ["⿀"] = 193, ["⿁"] = 194, ["⿂"] = 195,	["⿃"] = 196, ["⿄"] = 197, ["⿅"] = 198, ["⿆"] = 199, ["⿇"] = 200,	["⿈"] = 201, ["⿉"] = 202, ["⿊"] = 203, ["⿋"] = 204, ["⿌"] = 205,	["⿍"] = 206, ["⿎"] = 207, ["⿏"] = 208, ["⿐"] = 209, ["⿑"] = 210,	["⿒"] = 211, ["⿓"] = 212, ["⿔"] = 213, ["⿕"] = 214 }

-- CJK Radicals Supplement add_radicals{ ["⺀"] = 3, ["⺁"] = 27, ["⺂"] = 5, ["⺃"] = 5, ["⺄"] = 5,	["⺅"] = 9, ["⺆"] = 13, ["⺇"] = 16,["⺈"] = 18, ["⺉"] = 18,	["⺊"] = 25, ["⺋"] = 26, ["⺌"] = 42, ["⺍"] = 42, ["⺎"] = 43,	["⺏"] = 43, ["⺐"] = 43, ["⺑"] = 43, ["⺒"] = 49, ["⺓"] = 52,	["⺔"] = 58, ["⺕"] = 58, ["⺖"] = 61, ["⺗"] = 61, ["⺘"] = 64,	["⺙"] = 66, ["⺛"] = 71, ["⺜"] = 72, ["⺝"] = 74, ["⺞"] = 78,	["⺟"] = 80, ["⺠"] = 83, ["⺡"] = 85, ["⺢"] = 85, ["⺣"] = 86,	["⺤"] = 87, ["⺥"] = 87, ["⺦"] = 90, ["⺧"] = 93, ["⺨"] = 94,	["⺩"] = 96, ["⺪"] = 103, ["⺫"] = 109, ["⺬"] = 113, ["⺭"] = 113,	["⺮"] = 118, ["⺯"] = 120, ["⺰"] = 120, ["⺱"] = 122, ["⺲"] = 109,	["⺳"] = 122, ["⺴"] = 122, ["⺵"] = 122, ["⺶"] = 123, ["⺷"] = 123,	["⺸"] = 123, ["⺹"] = 125, ["⺺"] = 129, ["⺻"] = 129, ["⺼"] = 130,	["⺽"] = 134, ["⺾"] = 140, ["⺿"] = 140, ["⻀"] = 140, ["⻁"] = 141,	["⻂"] = 145, ["⻃"] = 146, ["⻄"] = 146, ["⻅"] = 147, ["⻆"] = 148,	["⻇"] = 148, ["⻈"] = 149, ["⻉"] = 154, ["⻊"] = 157, ["⻋"] = 159,	["⻌"] = 162, ["⻍"] = 162, ["⻎"] = 162, ["⻏"] = 163, ["⻐"] = 167,	["⻑"] = 168, ["⻒"] = 168, ["⻓"] = 168, ["⻔"] = 169, ["⻕"] = 170,	["⻖"] = 170, ["⻗"] = 173, ["⻘"] = 174, ["⻙"] = 178, ["⻚"] = 181,	["⻛"] = 182, ["⻜"] = 183, ["⻝"] = 184, ["⻞"] = 184, ["⻟"] = 184,	["⻠"] = 184, ["⻡"] = 185, ["⻢"] = 187, ["⻣"] = 188, ["⻤"] = 194,	["⻥"] = 195, ["⻦"] = 196, ["⻧"] = 197, ["⻨"] = 199, ["⻩"] = 201,	["⻪"] = 205, ["⻫"] = 210, ["⻬"] = 210, ["⻭"] = 211, ["⻮"] = 211,	["⻯"] = 212, ["⻰"] = 212, ["⻱"] = 213, ["⻲"] = 213, ["⻳"] = 213 }

return data