Module:User:Suzukaze-c/zh-l

local export = {}

local M = require("Module:zh") local m_links = require("Module:links") local m_languages = require("Module:languages") local m_script_utilities = require("Module:script utilities") local m_test1 = require("Module:User:Suzukaze-c/zh-extract")

local lang = m_languages.getByCode("zh") local varinfo = mw.loadData("Module:User:Suzukaze-c/zh/data/info").data

local match = mw.ustring.match local gsub = mw.ustring.gsub local split = mw.text.split

local match_Han = '[㐀-鿕𠀀-𬺡]'

local function abbr_gen(abbr) local page, tooltip, upper = varinfo[abbr]['rom_w'], varinfo[abbr]['var'], mw.ustring.upper(abbr) return '' .. upper .. '. ' end

function export.link(frame) local args = frame:getParent.args local varieties, word, gloss = , , ''

if match(args[1], match_Han) then -- variety specification has been left out; $1 is definitely a word here as it is in the Han script varieties = 'm'		word = args[1] or false gloss = args[2] or false elseif not args[2] then -- we have been given only a word, POSSIBLY in the Latin script, and nothing else varieties = 'm'		word = args[1] or false gloss = false else varieties = args[1] or false word = args[2] or false gloss = args[3] or false end local pos = args["pos"] or false local lit = args["lit"] or false local manual_roman = args["tr"] or false local force_simp = args["s"] or false

varieties = split(varieties, ",", true) -- link repression if match(word, "@") then word = gsub(word, "@", "") no_link = true end if match(word, "%*") then -- the usual linguistic * no_link = true end

-- cleanup word = gsub(word, "%/", "／")

local lookup_targets = word lookup_targets = gsub(lookup_targets, '[^㐀-鿕𠀀-𬺡A-Za-z0-9|%[%]／-]', '') -- filter out things like punctuation if match(word, "／") then -- allow roman to be picked up even with explicit alternate forms lookup_targets = split(lookup_targets, "／", true) lookup_targets = { [1] = lookup_targets[1] } -- save first table item into table elseif match(word, "%[%[") then -- we have been given multiple terms lookup_targets = gsub(lookup_targets, "|[^%]]+", "") -- remove link titles if present lookup_targets = gsub(lookup_targets, "[%[%]]", " ") -- replace all square brackets with spaces lookup_targets = gsub(lookup_targets, " +", " ") -- reduce consecutive spaces lookup_targets = mw.text.trim(lookup_targets) -- remove excess spaces lookup_targets = split(lookup_targets, " ", true) -- now we have a table of each linked item (theoretically) else lookup_targets = { [1] = lookup_targets } -- change to table end

-- check if all pages exist local pages_exist = '' for i, word in ipairs(lookup_targets) do		if not mw.title.new(word).exists then pages_exist = pages_exist .. 'n'		end end pages_exist = not match(pages_exist, 'n')

-- extract every pronunciation for every word local roman_for_each_word = {} if not manual_roman and pages_exist and varieties[1] ~= '' then for i, word in ipairs(lookup_targets) do			roman_for_each_word[i] = {} local roman_all = m_test1.extract_roman(word, 1) for j, variety in ipairs(varieties) do				roman_for_each_word[i][variety] = (roman_all[variety] and roman_all[variety] or error('"'..variety..'" pronunciation not found for '..word..'!')) end end end -- if true then return '\n'..require('module:debug').dump(roman_for_each_word) end

local tr = {} if roman_for_each_word[1] then for j, variety in ipairs(varieties) do			tr[j] = {} for i, word in ipairs(lookup_targets) do				table.insert(tr[j], roman_for_each_word[i][variety]) end tr[j] = table.concat(tr[j], ' ') end -- if true then return '\n'..require('module:debug').dump(tr) end

for i, roman in ipairs(tr) do tr[i] = abbr_gen(varieties[i]) .. ' ' .. tr[i] end -- if true then return '\n'..require('module:debug').dump(tr) end

tr = table.concat(tr, '; ') elseif manual_roman then manual_roman = split(manual_roman, "/", true) for i, set in ipairs(manual_roman) do			local variety, roman = match(set, "(.+):(.+)") table.insert(tr, abbr_gen(variety) .. ' ' .. roman) end tr = table.concat(tr, '; ') else tr = false end

-- finalize link if match(word, "%[%[") then -- "美國聖地亞戈"→"美國聖地亞戈／美国圣地亚戈" word = word .. "／" .. M.ts(word) elseif match(word, "／") then -- "臺灣話／台灣話／台湾话"→"臺灣話／台灣話／台湾话" word =  .. gsub(word, "／", "／") ..  elseif M.ts_determ(word) == "trad" or force_simp then -- "附著"→"附著／附着" word = "" .. word .. "／" .. M.ts(word) .. "" end

-- build the link local terminfo = {lang = lang, term = word, tr = tr, gloss = gloss, pos = pos, lit = lit} if no_link then word = m_links.remove_links(word) -- "easier to destroy than create" text = m_script_utilities.tag_text(word, lang) .. m_links.format_link_annotations(terminfo) else text = m_links.full_link(terminfo) end

return text end

return export