Module:User:Nyarukoseijin/ja-see

local export = {}

local find = mw.ustring.find local len = mw.ustring.len local gsub = mw.ustring.gsub local match = mw.ustring.match local gmatch = mw.ustring.gmatch local split = mw.text.split

local m_ja = require('Module:ja')

local function gmatch_array(s, pattern) local result = {} for e in gmatch(s, pattern) do table.insert(result, e) end return result end local function map(arr, f) local result = {} for _, e in ipairs(arr) do local fe = f(e) if fe ~= nil then table.insert(result, fe) end end return result end local function filter(arr, f) local result = {} for _, e in ipairs(arr) do if f(e) then table.insert(result, e) end end return result end local function contains(arr, item) for _, e in ipairs(arr) do if e == item then return true end end return false end local function flatten(arrs) local result = {} for _, arr in ipairs(arrs) do for _, e in ipairs(arr) do table.insert(result, e) end end return result end -- f should be str->str in the following functions local function memoize(f) local results = {} return function(s) if not results[s] then results[s] = f(s) end return results[s] end end local getContent_memo = memoize(function(title) return mw.title.new(title):getContent or '' end) local function group(arr, f) local r = {} for _, e in ipairs(arr) do local fe = f(e) if r[#r] and r[#r].key == fe then table.insert(r[#r], e) else table.insert(r, { e, key = fe }) end end return r end

-- returns an array of definitions, each having the format	{ def = ,	 kanji_spellings = ,	  kana_spellings = ,	  historical_kana_spellings = ,	  header = ,	  headword_line = } local function get_definitions_from_wikicode(wikicode) local current_kanji_spellings = {} local current_kana_spellings = {} local current_historical_kana_spellings = {} local current_header local current_headword_line local currently_under_headword_line = false local result = {} for line in gmatch(match(wikicode, '==Japanese==\n(.*)') or '', '[^\n]+') do		-- the following branches are ordered by frequency; read backwards if currently_under_headword_line and find(line, '^#+[^:*]') then table.insert(result, { def = line,				kanji_spellings = find(line, '{{ja%-def|') and split(match(line, '{{ja%-def|([^}]+)'), '|')									or find(line, '<!%-%- kana only %-%->') and {}									or current_kanji_spellings,				kana_spellings = current_kana_spellings,				historical_kana_spellings = current_historical_kana_spellings,				header = current_header,				headword_line = current_headword_line }) elseif find(line, '^{{ja%-noun[|}]') or find(line, '^{{ja%-adj[|}]') or find(line, '^{{ja%-pos[|}]') or find(line, '^{{ja%-phrase[|}]') or find(line, '^{{ja%-verb[|}]') or find(line, '^{{ja%-verb form[|}]') or find(line, '^{{ja%-verb%-suru[|}]') then local escaped_line = gsub(gsub(line, '%[%[([^%[%]|]-)|([^%[%]|]-)%]%]', '%1`%2'), '|hkata=', '|hhira=') current_kana_spellings = map(gmatch_array(escaped_line, '|([ぁ-ゖァ-ヺー%^%-%. %%]+)'), m_ja.remove_ruby_markup) current_historical_kana_spellings = gmatch_array(escaped_line, '|hhira=([ぁ-ゖァ-ヺー]+)') current_headword_line = line currently_under_headword_line = true elseif find(line, '^===+[^=]+===+$') then current_header = match(line, '^===+([^=]+)===+$') currently_under_headword_line = false elseif find(line, '^{{ja%-kanjitab[|}]') then local alt_argument = match(line, '|alt=([^|}]*)') current_kanji_spellings = alt_argument and split(gsub(alt_argument, ':[^,]*', ''), ',') or {} elseif line == '' then break end end return result end

-- ditto, except that each definition also contains the title of the page it is from local function get_definitions_from_entry(title) local wikicode = getContent_memo(title) local defs = get_definitions_from_wikicode(wikicode) map(defs, function(def) def.title = title end) return defs end

local function get_definitions_from_entries(titles) return flatten(map(titles, get_definitions_from_entry)) end

local function format_definitions(defs, frame) local function ja(text) return ' ' .. text .. ' ' end local function link(lemma, display) return ja( .. (display or lemma) .. ) end local function link_bracket(lemma, display) return ja('【' .. (display or lemma) .. '】') end local kanji_grade_labels = { ' 1 ',		' 2 ',		' 3 ',		' 4 ',		' 5 ',		' 6 ',		'S ', 'J ', 'H ' } local function ruby(kanji, kana) -- this function ought to be in Module:ja local kanji_segments = gsub(kanji, "([A-Za-z0-9々㐀-䶵一-鿌" .. mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9) .. "𠀀-𯨟０-９Ａ-Ｚａ-ｚ]+)", "`%1`") -- returns possible matches between kanji and kana -- for example, match('`物`の`哀`れ', 'もののあわれ') returns { '[物](も)の[哀](のあわ)れ', '[物](もの)の[哀](あわ)れ' } local function match(kanji_segments, kana) if kanji_segments:find('`') then local kana_portion, kanji_portion, rest = mw.ustring.match(kanji_segments, '(.-)`(.-)`(.*)') _, _, kana = mw.ustring.find(kana, '^' .. kana_portion .. '(.*)') if not kana then return {} end local candidates = {} for i = 1, mw.ustring.len(kana) do					for _, candidate in ipairs(match(rest, mw.ustring.sub(kana, i + 1))) do table.insert(candidates, kana_portion .. '[' .. kanji_portion .. '](' .. mw.ustring.sub(kana, 1, i) .. ')' .. candidate) end end return candidates else return (kanji_segments == kana) and { kana } or {} end end local matches = match(kanji_segments, kana) local result = #matches == 1 and matches[1] or ('[' .. kanji .. '](' .. kana .. ')')		return gsub(result, "%[([^%[%]]+)%]%(([^%(%)]+)%)", " %1%2 ") end local function format_headword(defs) local title = defs[1].title local kana = defs[1].kana_spellings[1] local headword = link_bracket(title, mw.title.getCurrentTitle.text == kana and title or ruby(title, kana)) local kanji_grade = len(title) == 1 and m_ja.kanji_grade(title) return ' ' .. headword .. ' ' .. (kanji_grade and kanji_grade_labels[kanji_grade] or '') end local preprocess_memo = memoize(function (s) return frame:preprocess(s) end) local function format_definitions(defs) local headword_line_categories = {} local function format_definition(def) local def_text = find(def.def, '{{rfdef[|}]') and "This term needs a translation to English." or preprocess_memo(gsub(def.def, '^#+ *', '')) local def_prefix = gsub(match(def.def, '^#+'), '#', ':') local def_pos_label = ' [' .. mw.ustring.lower(def.header) .. '] '			local headword_line = def.headword_line if ({ Hira = true, Kana = true, ['Hira+Kana'] = true })[m_ja.script(def.title)] then headword_line = gsub(headword_line, '}}$', '|hira=' .. def.title .. '}}') end table.concat(headword_line_categories, table.concat(gmatch_array(preprocess_memo(headword_line), '%[%[Category:.-%]%]'))) return def_prefix .. def_pos_label .. def_text end return table.concat(headword_line_categories) .. '\n' .. table.concat(map(defs, format_definition), '\n') end local is_first_row = true local function format_row(defs) local result = '|-\n| style="white-space:nowrap;vertical-align:top;' .. (is_first_row and '' or 'border-top:1px solid lightgray;') .. '" | ' .. format_headword(defs) .. '\n| style="' .. (is_first_row and '' or 'border-top:1px solid lightgray;') .. '" |\n' .. format_definitions(defs) .. '\n' is_first_row = false return result end local def_groups = group(defs, function(def) return def.title .. ',' .. def.kana_spellings[1] end) local rows = map(def_groups, format_row) return '{| style="width: 100%"\n' .. table.concat(rows) .. '|}' end

function export.show(frame) local params = { [1] = { list = true }, ['key'] = {}, }	local args, unrecognized_args = require("Module:parameters").process(frame:getParent.args, params, true) for key, value in pairs(unrecognized_args) do error("“" .. key .. "” is not a recognized parameter.") end local title = mw.title.getCurrentTitle.text local key = args.key or title local defs = get_definitions_from_entries(args[1]) local matching_defs = filter(defs, function(def)		return contains(def.kanji_spellings, key) or contains(def.kana_spellings, key) or contains(def.historical_kana_spellings, key)		end) return format_definitions(defs, frame) end

return export