Module:User:Ssvb/ru-accentdict

-- A Russian dictionary. Based on the https://en.wikipedia.org/wiki/Bloom_filter algorithm. -- Capacity: 870005 words (~7.0 bytes per word). Expected false positive rate: 1 in 600 millions. -- Automatically generated from "enwiktionary-20240401-pages-articles-multistream.xml.bz2" -- via https://kaikki.org/dictionary/Russian/kaikki.org-dictionary-Russian.json local export = {} local bloom_filter_k = 29 local bloom_filter_bitbuf = { require("Module:User:Ssvb/ru-accentdict/data1"), require("Module:User:Ssvb/ru-accentdict/data2"), require("Module:User:Ssvb/ru-accentdict/data3"), } local bloom_filter_base64dec_lut = { { ["B"] = 1, ["D"] = 1, ["F"] = 1, ["H"] = 1, ["J"] = 1, ["L"] = 1, ["N"] = 1, ["P"] = 1, ["R"] = 1, ["T"] = 1, ["V"] = 1, ["X"] = 1, ["Z"] = 1, ["b"] = 1, ["d"] = 1, ["f"] = 1, ["h"] = 1, ["j"] = 1, ["l"] = 1, ["n"] = 1, ["p"] = 1, ["r"] = 1, ["t"] = 1, ["v"] = 1, ["x"] = 1, ["z"] = 1, ["1"] = 1, ["3"] = 1, ["5"] = 1, ["7"] = 1, ["9"] = 1, ["/"] = 1}, { ["C"] = 1, ["D"] = 1, ["G"] = 1, ["H"] = 1, ["K"] = 1, ["L"] = 1, ["O"] = 1, ["P"] = 1, ["S"] = 1, ["T"] = 1, ["W"] = 1, ["X"] = 1, ["a"] = 1, ["b"] = 1, ["e"] = 1, ["f"] = 1, ["i"] = 1, ["j"] = 1, ["m"] = 1, ["n"] = 1, ["q"] = 1, ["r"] = 1, ["u"] = 1, ["v"] = 1, ["y"] = 1, ["z"] = 1, ["2"] = 1, ["3"] = 1, ["6"] = 1, ["7"] = 1, ["+"] = 1, ["/"] = 1}, { ["E"] = 1, ["F"] = 1, ["G"] = 1, ["H"] = 1, ["M"] = 1, ["N"] = 1, ["O"] = 1, ["P"] = 1, ["U"] = 1, ["V"] = 1, ["W"] = 1, ["X"] = 1, ["c"] = 1, ["d"] = 1, ["e"] = 1, ["f"] = 1, ["k"] = 1, ["l"] = 1, ["m"] = 1, ["n"] = 1, ["s"] = 1, ["t"] = 1, ["u"] = 1, ["v"] = 1, ["0"] = 1, ["1"] = 1, ["2"] = 1, ["3"] = 1, ["8"] = 1, ["9"] = 1, ["+"] = 1, ["/"] = 1}, { ["I"] = 1, ["J"] = 1, ["K"] = 1, ["L"] = 1, ["M"] = 1, ["N"] = 1, ["O"] = 1, ["P"] = 1, ["Y"] = 1, ["Z"] = 1, ["a"] = 1, ["b"] = 1, ["c"] = 1, ["d"] = 1, ["e"] = 1, ["f"] = 1, ["o"] = 1, ["p"] = 1, ["q"] = 1, ["r"] = 1, ["s"] = 1, ["t"] = 1, ["u"] = 1, ["v"] = 1, ["4"] = 1, ["5"] = 1, ["6"] = 1, ["7"] = 1, ["8"] = 1, ["9"] = 1, ["+"] = 1, ["/"] = 1}, { ["Q"] = 1, ["R"] = 1, ["S"] = 1, ["T"] = 1, ["U"] = 1, ["V"] = 1, ["W"] = 1, ["X"] = 1, ["Y"] = 1, ["Z"] = 1, ["a"] = 1, ["b"] = 1, ["c"] = 1, ["d"] = 1, ["e"] = 1, ["f"] = 1, ["w"] = 1, ["x"] = 1, ["y"] = 1, ["z"] = 1, ["0"] = 1, ["1"] = 1, ["2"] = 1, ["3"] = 1, ["4"] = 1, ["5"] = 1, ["6"] = 1, ["7"] = 1, ["8"] = 1, ["9"] = 1, ["+"] = 1, ["/"] = 1}, { ["g"] = 1, ["h"] = 1, ["i"] = 1, ["j"] = 1, ["k"] = 1, ["l"] = 1, ["m"] = 1, ["n"] = 1, ["o"] = 1, ["p"] = 1, ["q"] = 1, ["r"] = 1, ["s"] = 1, ["t"] = 1, ["u"] = 1, ["v"] = 1, ["w"] = 1, ["x"] = 1, ["y"] = 1, ["z"] = 1, ["0"] = 1, ["1"] = 1, ["2"] = 1, ["3"] = 1, ["4"] = 1, ["5"] = 1, ["6"] = 1, ["7"] = 1, ["8"] = 1, ["9"] = 1, ["+"] = 1, ["/"] = 1} } if not mw then -- for local testing using something like: --  https://github.com/Egor-Skriptunoff/pure_lua_SHA/blob/master/sha2.lua --  https://stackoverflow.com/questions/51559181/sha512-pure-lua-5-1-adaptation/51561685#51561685 local sha2 = require("sha2") mw = {["hash"] = {}} function mw.hash.hashValue(algo, text) return sha2.sha512(text) end end

-- Returns true if the word is found in the dictionary and false otherwise function export.lookup_word(word) local h, cnt, bufsize = word, 0, 0 for _, bitchunk in ipairs(bloom_filter_bitbuf) do		bufsize = bufsize + string.len(bitchunk) * 6 end while true do		h = mw.hash.hashValue("sha512", h)		for i = 1, 128, 8 do			local idx = tonumber(h:sub(i, i + 8 - 1), 16) % bufsize local rem = idx % 6 local div = (idx - rem) / 6 for _, bitchunk in ipairs(bloom_filter_bitbuf) do				if div + 1 <= string.len(bitchunk) then local val = string.sub(bitchunk, div + 1, div + 1) if not bloom_filter_base64dec_lut[rem + 1][val] then return false end break end div = div - string.len(bitchunk) end cnt = cnt + 1 if cnt >= bloom_filter_k then return true end end end end

function export.query_extra_info return { ["data_source"] = "https://kaikki.org/dictionary/Russian/kaikki.org-dictionary-Russian.json", ["max_stress_search_steps"] = 7, ["worst_stress_search_word"] = "благоустра́ивающаяся", ["max_jo_search_steps"] = 4, ["worst_jo_search_word"] = "посерьёзневшее", ["words_with_double_jo"] = { "Бёрёлё́х", "Бёрёлё́ха", "Бёрёлё́хе", "Бёрёлё́хом", "Бёрёлё́ху", "трёхколё́сная", "трёхколё́сного", "трёхколё́сное", "трёхколё́сной", "трёхколё́сном", "трёхколё́сному", "трёхколё́сною", "трёхколё́сную", "трёхколё́сные", "трёхколё́сный", "трёхколё́сным", "трёхколё́сными", "трёхколё́сных", "четырёхзвё́здная", "четырёхзвё́здного", "четырёхзвё́здное", "четырёхзвё́здной", "четырёхзвё́здном", "четырёхзвё́здному", "четырёхзвё́здною", "четырёхзвё́здную", "четырёхзвё́здные", "четырёхзвё́здный", "четырёхзвё́здным", "четырёхзвё́здными", "четырёхзвё́здных" } } end

return export