Module:ja-see/furigana

local export = {} local kanji_pattern = "々一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮯯𰀀-𱍏"

-- returns an array of possible matches between kanji and kana -- for example, simple_match('物の哀れ', 'もののあわれ') returns { '[物](も)の[哀](のあわ)れ', '[物](もの)の[哀](あわ)れ' } local function simple_match(kanji, kana) local kanji_segments = mw.ustring.gsub(kanji, "([A-Za-z0-9々一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮯯𰀀-𱍏０-９Ａ-Ｚａ-ｚ]+)", "`%1`")

local function simple_match_rec(kanji_segments, kana) if kanji_segments:find('`') then local kana_portion, kanji_portion, rest = mw.ustring.match(kanji_segments, '(.-)`(.-)`(.*)') _, _, kana = mw.ustring.find(kana, '^' .. kana_portion .. '(.*)') if not kana then return {} end local candidates = {} for i = 1, mw.ustring.len(kana) do				for _, candidate in ipairs(simple_match_rec(rest, mw.ustring.sub(kana, i + 1))) do table.insert(candidates, kana_portion .. '[' .. kanji_portion .. '](' .. mw.ustring.sub(kana, 1, i) .. ')' .. candidate) end end return candidates else return (kanji_segments == kana) and { kana } or {} end end return simple_match_rec(kanji_segments, kana) end

function export.simple_match(kanji, kana) local simple_results = simple_match(kanji, kana) return #simple_results == 1 and simple_results[1] or '[' .. kanji .. '](' .. kana .. ')' end

-- transcludes the entry, and returns an array of its kanjitabs transformed into the format above -- for example, extract_kanjitab_from_entry('書留') returns { '[書](かき)[留](とめ)' } -- if the 書留 entry contains local function extract_kanjitab_from_entry(entry_title) local entry_wikicode = mw.title.new(entry_title):getContent or '' local results = {} for kanjitab in mw.ustring.gmatch(entry_wikicode, '') do		kanjitab = mw.ustring.gsub(kanjitab, '%[%[([^%[%]|]-)|([^%[%]|]-)%]%]', '%1`%2') local args, counter = {}, 1 for arg in mw.text.gsplit(kanjitab, '|') do			if mw.ustring.find(arg, '=') then local _, _, k, v = mw.ustring.find(arg, '(.-)=(.*)') k = ({ k = 'k1', o = 'o1' })[k] or tonumber(k) or k				args[k] = v			else args[counter] = arg counter = counter + 1 end end local argpos, skip = 1, 0 local result = mw.ustring.gsub(entry_title, '[' .. kanji_pattern .. ']', function(kanji)			if skip > 0 then skip = skip - 1 return '' .. kanji end			local reading_kana, reading_length = '', nil			if args[argpos] then _, _, reading_kana, reading_length = mw.ustring.find(args[argpos], '^([^0-9]*)([0-9]*)$') end			if args['k' .. argpos] then reading_kana = args['k' .. argpos] end			if args['o' .. argpos] then reading_kana = reading_kana .. args['o' .. argpos] end			reading_length = reading_kana and tonumber(reading_length) or 1			skip = reading_length - 1			argpos = argpos + 1			return '[' .. kanji .. '](' .. reading_kana .. ')'		end) for i = 1, 10 do			if not mw.ustring.find(result, '') then break end result = mw.ustring.gsub(result, '%[([^%[%]]+)%]%(([^%(%)]+)%)(.)', '[%1%3](%2)') end table.insert(results, result) end return results end

-- Try simple match first. If the result is not accurate, that is, -- if there are zero results, or more than one result, or the result contains consecutive kanji like [書留](かきとめ), -- then try to transclude the entry and look for its kanjitabs to decide function export.accurate_match(kanji, kana) local simple_results = simple_match(kanji, kana) if #simple_results == 1 and not mw.ustring.find(simple_results[1], '[' .. kanji_pattern .. '][' .. kanji_pattern .. ']') then return simple_results[1] else local kanjitab_results = extract_kanjitab_from_entry(kanji) for _, result in ipairs(kanjitab_results) do			if mw.ustring.gsub(result, '%[([^%[%]]+)%]%(([^%(%)]+)%)', '%2') == kana then return result end end -- if all fails return '[' .. kanji .. '](' .. kana .. ')' end end

return export