Module:IPA/X-SAMPA

local decode_entities = require("Module:string utilities").decode_entities

local export = {}

local m_XSAMPA = mw.loadData('Module:IPA/data/X-SAMPA')

-- IPA <-> XSAMPA lookup tables local i2x_lookup = {} local function Populate_IPA_XSAMPA_LookupTables if #i2x_lookup == 0 then for XSAMPA_symbol, data in pairs(m_XSAMPA) do			local IPA_symbol = data[1] i2x_lookup[IPA_symbol] = XSAMPA_symbol local with_descender = data.with_descender if with_descender then i2x_lookup[with_descender] = XSAMPA_symbol end end end return i2x_lookup end

function export.IPA_to_XSAMPA(text) Populate_IPA_XSAMPA_LookupTables local escape = false if type(text) == 'table' then -- a frame, extract args text = text.args[1] text = text:gsub('=','='):gsub('|','|') text = decode_entities(text) -- XXX escape = true end

text = text:gsub('ːː', ':') -- this basically sums up m_symbols[2].XSAMPA text = mw.ustring.gsub(text, '.', i2x_lookup)

if escape then text = require("Module:string/nowiki")(text) end return text end

function export.XSAMPA_to_IPA(text) local data = m_XSAMPA local escape = false if type(text) == 'table' then -- a frame, extract args text = text.args[1] text = decode_entities(text) -- XXX escape = true end -- Simpler function adapted from w:Module:Sandbox/Erutuon/X-SAMPA local output, characteristics = {}, {} local angle_bracket if text:sub(1, 1) == "<" and text:sub(-1) == ">" then table.insert(output, "⟨") angle_bracket = "⟩" text = text:sub(2, -2) end local escaped = {} local emoticon = 0x1F600 - 1 local function escape(text, pattern) emoticon = emoticon + 1 return text:gsub(			pattern,			function(match)				local emoticon = mw.ustring.char(emoticon)				escaped[emoticon] = match				return emoticon			end) end --		Replace		-- HTML tags		-- character entity references 		-- numeric character references (& text = escape(text, '<[^>]+>') text = escape(text, '&%a+;') text = escape(text, '&#%d+;') text = escape(text, '&#x%x+;') while #text > 0 do		 -- skip non-ASCII bytes (that is, multi-byte characters) text = text:gsub(			'^[\128-\255]+',			function (nonASCII)				table.insert(output, nonASCII)				return ""			end) for i = 4, 1, -1 do			local potential_XSAMPA = text:sub(1, i)			local result = data[potential_XSAMPA] local IPA, with_descender, has_descender, is_diacritic if result then IPA = result[1] with_descender = result.with_descender has_descender = result.has_descender is_diacritic = result.is_diacritic if with_descender then -- Go backwords through the transcription, skipping any diacritics. local j = 0 while characteristics[#characteristics - j].is_diacritic do						j = j + 1 end --	Look at the first non-diacritic symbol before the current symbol.							If it has a descender, use the descender form of the current symbol. if characteristics[#characteristics - j].has_descender then IPA = with_descender end end elseif i == 1 then IPA = potential_XSAMPA end if IPA then text = text:sub(i + 1) table.insert(output, IPA) table.insert(characteristics, { has_descender = has_descender, is_diacritic = is_diacritic } ) break end end end table.insert(output, angle_bracket) output = table.concat(output) output = output:gsub("[\194-\244][\128-\191]+", escaped)

return output end

return export