Module:chemical formula

-- w:Module:Chem2 local decode_entities = require("Module:string utilities").decode_entities local getArgs = require('Module:Arguments').getArgs

local export = {} -- module's table

-- Elements with wiki links local am = { H = "Hydrogen", He = "Helium", Li = "Lithium", Be = "Beryllium", B = "Boron", C = "Carbon", N = "Nitrogen", O = "Oxygen", F = "Fluorine", Ne = "Neon", Na = "Sodium", Mg = "Magnesium", Al = "Aluminium", Si = "Silicon", P = "Phosphorus", S = "Sulfur", Cl = "Chlorine", Ar = "Argon", K = "Potassium", Ca = "Calcium", Sc = "Scandium", Ti = "Titanium", V = "Vanadium", Cr = "Chromium", Mn = "Manganese", Fe = "Iron", Co = "Cobalt", Ni = "Nickel", Cu = "Copper", Zn = "Zinc", Ga = "Gallium", Ge = "Germanium", As = "Arsenic", Se = "Selenium", Br = "Bromine", Kr = "Krypton", Rb = "Rubidium", Sr = "Strontium", Y = "Yttrium", Zr = "Zirconium", Nb = "Niobium", Mo = "Molybdenum", Tc = "Technetium", Ru = "Ruthenium", Rh = "Rhodium", Pd = "Palladium", Ag = "Silver", Cd = "Cadmium", In = "Indium", Sn = "Tin", Sb = "Antimony", Te = "Tellurium", I = "Iodine", Xe = "Xenon", Cs = "Caesium", Ba = "Barium", La = "Lanthanum", Ce = "Cerium", Pr = "Praseodymium", Nd = "Neodymium", Pm = "Promethium", Sm = "Samarium", Eu = "Europium", Gd = "Gadolinium", Tb = "Terbium", Dy = "Dysprosium", Ho = "Holmium", Er = "Erbium", Tm = "Thulium", Yb = "Ytterbium", Lu = "Lutetium", Hf = "Hafnium", Ta = "Tantalum", W = "Tungsten", Re = "Rhenium", Os = "Osmium", Ir = "Iridium", Pt = "Platinum", Au = "Gold", Hg = "Mercury (element)", Tl = "Thallium", Pb = "Lead", Bi = "Bismuth", Po = "Polonium", At = "Astatine", Rn = "Radon", Fr = "Francium", Ra = "Radium", Ac = "Actinium", Th = "Thorium", Pa = "Protactinium", U = "Uranium", Np = "Neptunium", Pu = "Plutonium", Am = "Americium", Cm = "Curium", Bk = "Berkelium", Cf = "Californium", Es = "Einsteinium", Fm = "Fermium", Md = "Mendelevium", No = "Nobelium", Lr = "Lawrencium", Rf = "Rutherfordium", Db = "Dubnium", Sg = "Seaborgium", Bh = "Bohrium", Hs = "Hassium", Mt = "Meitnerium", Ds = "Darmstadtium", Rg = "Roentgenium", Cp = "Copernicium", Nh = "Nihonium", Fl = "Flerovium", Mc = "Moscovium", Lv = "Livermorium", Ts = "Tennessine", Og = "Oganesson", -- Groups etc with element-like names Bn = 'Benzyl group', Bz = 'Benzoyl group', D = 'Deuterium', Et = 'Ethyl group', Ln = 'Lanthanide', Nu = 'Nucleophile', Ph = 'Phenyl group', R = 'Substituent', T = 'Tritium', Tf = 'Trifluoromethylsulfonyl group', X = 'Halogen', }

local T_ELEM = 0        -- token types local T_NUM = 1         -- number local T_OPEN = 2        -- open '(' local T_CLOSE = 3        -- close ')' local T_PM_CHARGE = 4   -- + or − local T_WATER = 6       -- .xH2O x number local T_CRYSTAL = 9     -- .x local T_CHARGE = 8       -- charge (x+), (x-) local T_SUF_CHARGE = 10 -- suffix and charge e.g. 2+ from H2+ local T_SUF_CHARGE2 = 12 -- suffix and (charge) e.g. 2(2+) from He2(2+) local T_SPECIAL = 14    -- starting with \ e.g. \d for double bond (=) local T_SPECIAL2 = 16   -- starting with \y{x} e.g. \i{12} for isotope with mass number 12 local T_ARROW_R = 17    -- match: -> local T_ARROW_EQ = 18   -- match: <-> local T_UNDERSCORE = 19 -- _{ ... } local T_CARET = 20      -- ^{ ... } local T_LINKOPEN = 21   -- Opening of link, always like "[[target|" even if the source wasn't local T_NOCHANGE = 30    -- Anything else like ☃

function su(up, down) if up == "" then return ('%s '):format(down) end if down == "" then return ('%s '):format(up) end return (' %s %s  '):format(up, down) end

function DotIt return '&middot;' end

function item(f) -- (iterator) returns one token (type, value) at a time from the formula 'f'   local i = 1

return function local t, x = nil, nil

if (i == 1) and f:match('^[0-9]', i) then x = f:match('^[%d.]+', i); t = T_NOCHANGE; i = i + x:len;  -- matching coefficient (need a space first)

elseif i <= f:len then x = f:match('^%s+[%d.]+', i); t = T_NOCHANGE; -- matching coefficient (need a space first) if not x then x = f:match('^%s[+]', i); t = T_NOCHANGE; end      -- matching + (H2O + H2O) if not x then x = f:match('^%&%#[%w%d]+%;', i); t = T_NOCHANGE; end      -- &#...; if not x then x = f:match('^%<%-%>', i); t = T_ARROW_EQ; end      -- matching <-> if not x then x = f:match('^%-%>', i); t = T_ARROW_R; end      -- matching -> if not x then x = f:match('^%u%l*', i); t = T_ELEM; end       -- matching symbols like Aaaaa if not x then x = f:match('^%d+[+-]', i); t = T_SUF_CHARGE; end       -- matching x+, x-            if not x then x = f:match('^%d+%(%d*[+-]%)', i); t = T_SUF_CHARGE2; end        -- matching x(y+/-), x(+/-) if not x then x = f:match('^%(%d*[+-]%)', i); t = T_CHARGE; end       -- matching (x+) (xx+), (x-) (xx-) if not x then x = f:match('^[%d.]+', i); t = T_NUM; end       -- matching number if not x and (f:match('^%[%[%[[^[]', i) or f:match('^%[[^[]', i)) then           	i = i + 1; return T_OPEN, '&#91;' end -- escape [[[X or [X (relevant to auto-linking)            if not x and f:sub(i, i + 1) == '[[' then            	x = f:match('^%[%[([^]|]*)', i) -- link target            	local len = x:len + 3            	x = '[[' .. x .. '|'

if f:sub(len + i, len + i) == ']' then -- We're going to read the link twice, once as target and once as -- chemical markup, e.g. CH3 => "", "CH3" i = i + 2 else i = i + len end return T_LINKOPEN, x           end if not x then x = f:match('^[(|{|%[]', i); t = T_OPEN; end    -- matching ({[ if not x then x = f:match('^[)|}|%]]', i); t = T_CLOSE; end   -- matching )}] if not x then x = f:match('^[+-]', i); t = T_PM_CHARGE; end   -- matching + or - if not x then x = f:match('^%*[%d.]*H2O', i); t = T_WATER; end -- Crystal water if not x then x = f:match('^%*[%d.]*', i); t = T_CRYSTAL; end -- Crystal if not x then x = f:match('^[\\].{%d+}', i); t = T_SPECIAL2; end -- \y{x} if not x then x = f:match('^[\\].', i); t = T_SPECIAL; end -- \x if not x then x = f:match('^_{[^}]*}', i); t = T_UNDERSCORE; end -- _{...} if not x then x = f:match('^^{[^}]*}', i); t = T_CARET; end  -- ^{...} if not x then x = f:match('^.', i); t = T_NOCHANGE; end --the rest - one by one if x then i = i + x:len; else i = i + 999; error("Invalid character in formula! : "..f) end end return t, x   end end

function export._chem(args) local f = args[1] or ''

f = decode_entities(f) -- handle entity input (like &minus;): decode right away f = string.gsub(f, "–", "-") -- replace – with - (hyphen not ndash) f = string.gsub(f, "−", "-") -- replace – with - (hyphen not minus sign)

local formula = '' local t, x

local link = args['link'] or "" local auto = args['auto'] or "" local seen = {} local _debug = false

if not (link == '') then formula = formula .. ""; end  -- wikilink start [[link|    for t, x in item(f) do     	if _debug then    		formula = ("%s\n* %d %s"):format(formula, t, x)        elseif t == T_ELEM then            if (auto == ) or (not am[x]) or seen[x] then formula = formula .. x            else formula = ("%s[[%s|%s"):format(formula, x, x); seen[x] = true            end        elseif t == T_COEFFICIENT then formula = formula .. x        elseif t == T_NUM        then formula = formula .. su("", x);        elseif t == T_LINKOPEN   then formula = formula .. x;      -- [[Link|        elseif t == T_OPEN       then formula = formula .. x;          -- ([{        elseif t == T_CLOSE      then formula = formula .. x;          -- )]}        elseif t == T_PM_CHARGE  then formula = formula .. su(x:gsub("-", "−"), "");        elseif t == T_SUF_CHARGE then             formula = formula .. su(x:match("[+-]"):gsub("-", "−"), x:match("%d+"), "");        elseif t == T_SUF_CHARGE2 then             formula = formula .. su(x:match("%(%d*[+-]"):gsub("-", "−"):sub(2, -1), x:match("%d+"))        elseif t == T_CHARGE then        	formula = formula .. " "        	if x:match("%d+") then formula = formula .. x:match("%d+"); end        	formula = formula .. x:match("[%+-]"):gsub("-", "−") .. " ";        	-- Cannot concatenat a nil value from x:match("%d+");        elseif t == T_CRYSTAL then formula = formula .. DotIt .. string.gsub( x, "*", , 1 );        elseif t == T_SPECIAL then            parameter = x:sub(2, 2) -- x fra \x              if     parameter == "s" then formula = formula .. "−"   -- single bond            elseif parameter == "d" then formula = formula .. "="   -- double bond            elseif parameter == "t" then formula = formula .. "≡"   -- tripple bond            elseif parameter == "q" then formula = formula .. "≣"   -- Quadruple bond            elseif parameter == "h" then formula = formula .. "η"   -- η, hapticity            elseif parameter == "*" then formula = formula .. "*"   -- *, normal *            elseif parameter == "-" then formula = formula .. "-"   -- -            elseif parameter == "\\" then formula = formula .. "\\"   -- \            elseif parameter == "\'" then formula = formula .. "&#39;"   -- html-code for '            end        elseif t == T_SPECIAL2 then  -- \y{x}            parameter = x:sub(2, 2) -- y fra \y{x}             if parameter  == "h" then                    formula = formula .. "η" .. x:match('%d+') .. "-" --hapticity            elseif parameter == "m" then                    formula = formula .. "μ" .. x:match('%d+') .. "-" -- mu (bridging ligand)            end        elseif t == T_WATER then             if x:match("^%*[%d.]") then                 formula = formula .. DotIt .. x:match("%f[%.%d]%d*%.?%d*%f[^%.%d%]]") .. "H2O";            else                formula = formula .. DotIt .. "H2O";            end        elseif t == T_UNDERSCORE  then formula = formula .. su("", x:gsub("-", "−"):sub(3, -2)) -- x contains _{string}        elseif t == T_CARET       then formula = formula .. su(x:gsub("-", "−"):sub(3, -2), "") -- x contains ^{string}        elseif t == T_ARROW_R     then formula = formula .. " → "        elseif t == T_ARROW_EQ    then formula = formula .. " ⇌ "        elseif t == T_NOCHANGE    then formula = formula .. x;  -- The rest - everything which isn't captured by the regular expresions.        else error('unreachable - ???') end -- in fact, unreachable    end

if not (link == nil or link == '') then formula = formula .. "]]"; end  -- wikilink closing ]] formula = mw.getCurrentFrame:extensionTag("templatestyles", "", {src = "chemical formula/styles.css"}) .. ' ' .. formula .. ' '   return formula end

function export.chem(frame) local args = getArgs(frame) return export._chem(args) end

return export