Module:Jpan-sortkey

local export = {}

local concat = table.concat local find = mw.ustring.find local gsub = mw.ustring.gsub local insert = table.insert local match = mw.ustring.match local sub = mw.ustring.sub local toNFC = mw.ustring.toNFC

local range = mw.loadData("Module:ja/data/range") local kanji_pattern = range.kanji local ideograph_pattern = range.ideograph local kana_graph_pattern = range.kana_graph local latin_pattern = range.latin

local get_by_code = require("Module:languages").getByCode local Hani_sort = require("Module:Hani-sortkey").makeSortKey local track = require("Module:debug/track")

function export.makeSortKey(text, lang, sc) -- Determine reading. local seen_pages, langname = {} while lang ~= "mul" and (not seen_pages[text]) and find(text, "[0-9" .. kanji_pattern .. ideograph_pattern .. kana_graph_pattern .. latin_pattern .. "]") do		repeat langname = langname or				get_by_code(lang) :getCanonicalName seen_pages[text] = true local content = mw.title.new(toNFC(text)):getContent content = require("Module:utilities").get_section(content, langname, 2) if not content then break end local findTemplates = require("Module:template parser").findTemplates local kanjitab, br			for template, args in findTemplates(content) do				local templates = { [lang .. "-head"] = true, [lang .. "-pos"] = true, }				if templates[template] and args[2] then text = args[2]:gsub("[ %-%.^%%]", "") br = true break elseif (template == "head" or template == "head-lite") and args[1] == lang then for i, arg in ipairs(args) do						if arg == "kana" then local kana = args[i+1] if kana then text = kana br = true break end end end end templates = { [lang .. "-noun"] = true, [lang .. "-verb"] = true, [lang .. "-adj"] = true, [lang .. "-phrase"] = true, [lang .. "-verb form"] = true, [lang .. "-verb-suru"] = true, [lang .. "-see"] = true, [lang .. "-see-kango"] = true, [lang .. "-gv"] = true, }				if templates[template] and args[1] then text = args[1]:gsub("[ %-%.^%%]", "") br = true break elseif template == lang .. "-kanjitab" then kanjitab = kanjitab or args end end if (not br) and kanjitab then track{"Jpan-sortkey/kanjitab", "Jpan-sortkey/kanjitab/" .. lang} if kanjitab.sortkey then text = kanjitab.sortkey break end -- extract kanji and non-kanji local kanji = {} local non_kanji = {} local kanji_border = 1 gsub(text, "([" .. kanji_pattern .. "々])", function(p1, w1, p2)					insert(non_kanji, sub(text, kanji_border, p1 - 1))					kanji_border = p2					insert(kanji, w1)				end) insert(non_kanji, sub(text, kanji_border)) -- 々 for i, v in ipairs(kanji) do					if v == "々" then kanji[i] = kanji[i - 1] end end -- process readings local readings = {} local readings_actual = {} local reading_length_total = 0 for i in ipairs(kanjitab) do					local reading_kana, reading_length = match(kanjitab[i] or "", "^([^0-9]*)([0-9]*)$") reading_kana = reading_kana ~= "" and reading_kana or nil reading_length = reading_kana and tonumber(reading_length) or 1

insert(readings, {reading_kana, reading_length}) reading_length_total = reading_length_total + reading_length for i = reading_length_total + 1, #kanji do						insert(readings, {nil, 1}) end if reading_kana then local actual_reading = kanjitab["k" .. i] local okurigana = kanjitab["o" .. i] readings_actual[i] = {(actual_reading or reading_kana) .. (okurigana or ""), reading_length} else readings_actual[i] = {nil, 1} end end local sortkey = {non_kanji[1]} local id = 1 for _, v in ipairs(readings_actual) do					id = id + v[2] v[1] = v[1] ~= "-" and v[1] insert(sortkey, (v[1] or "") .. (non_kanji[id] or "")) end sortkey = concat(sortkey) if sortkey ~= "" then text = sortkey end end until true end -- Use hiragana sort. text = require("Module:Hira-sortkey").makeSortKey(text, lang, sc) -- Run through Hani sort, to catch any stray kanji. This shouldn't happen but often does, and we still want to handle them sensibly in the time before the entry is fixed. local ret = Hani_sort(text, lang, sc) if not (lang == "mul" or ret == text) then track{"Jpan-sortkey/fallback", "Jpan-sortkey/fallback/" .. lang} end return ret end

return export