Module:User:Suzukaze-c/zh-extract

local export = {} local replace = mw.ustring.gsub local match = mw.ustring.match local itermatch = mw.ustring.gmatch local split = mw.text.split

-- idea: +simplified? since we're pulling stuff from the page anyway

local default_set_separator = '//'

function export.extract_roman(word, combine, set_separator) local plaintext = false if type(word) == 'table' then plaintext = true word, combine, set_separator = word.args[1], word.args[2], word.args[3] end

mw.log('PROCESSING: ' .. word .. '')

local content = mw.title.new(word):getContent or error('the ' .. word .. ' entry does not exist!?') local each = {} local roman_final = {}

content = replace(content, "\n", "%1ⓩⓩⓩⓩⓩ") -- making assumptions about formatting

if match(content, "ⓐ") and not match(content, "ⓩ") then error("please add the cat param to zh-pron at " .. word .. "") end

-- Convert each instance to a table subsumed in $each local box_i = 1 for innards in itermatch(content, "ⓐⓐⓐⓐⓐ([^ⓩ]+)ⓩⓩⓩⓩⓩ") do		each[box_i] = {}

innards = split(innards, "\n|") table.remove(innards, 1) for i, item in ipairs(innards) do			local param, value = match(item, "^([^=]+)=(.*)$") each[box_i][param] = value end

box_i = box_i + 1 end

-- If told to combine tables, then combine each $each sub-table into a mega-table, -- otherwise return the data of the first instance if combine then -- make $roman_final[param] a table containing every possible $value for i, etable in ipairs(each) do			for param, value in pairs(etable) do				if not roman_final[param] then roman_final[param] = {} end if value ~= '' then table.insert(roman_final[param], value) end end end

-- flatten $roman_final[param] into text for param, value in pairs(roman_final) do			roman_final[param] = table.concat(roman_final[param], (set_separator or default_set_separator)) end else roman_final = each[1] end

if plaintext then return require('module:debug').dump(roman_final) else return roman_final end end

return export