Module:sa-convert/testcases

local tests = require('Module:UnitTests') local tr = require('Module:sa-convert').tr local m_languages = require("Module:languages") local lang = m_languages.getByCode("sa") local m_scripts = require("Module:scripts") local deva_sc = m_scripts.getByCode("Deva") local pali_fallback -- for transliteration. local taml_fallback -- for transliteration.

-- The cases are defined by the following fields: -- Deva: The Devanagari form of the word; this is the input to the transliteration. -- Beng etc.: The result of transliterating to that script code. The following special script codes are --           used: as-Beng. The special value "except" may be used to suppress the use of this test when --           the field all is supplied. The special value "fakeit" will result in a test for that --           script as though the field all had been supplied. -- all: If this field is defined, the test is performed for all 'supported' scripts. In this case, if the --     test result is not defined for a script, the --     test is the usually weaker test that the Devanagari and the transliteration transliterate to the --     Latin script the same. Note that this fallback test may wrongly fail for the Bengali and Lao --     scripts, so the required value of "except" exceptionally disables the test. -- Beng_why etc.: Justification for the required result being what it is. -- Beng_whynot etc.: Counter-argument. This is intended for setting out an argument for the test being --                    wrong until the some form of consensus is arrived at. -- link: Whether the non-Latin forms should be made into links. -- aborts: Whether there is a significant link of a conversion error causing the test as a whole --        to halt. Such cases are processed after the others.

local cases = { -- The first case, which may get commented out, is an example of how the test cases are set out. {		Deva="निर्वाण", all=1, link = true, Beng="নির্ব্বাণ", Beng_why="Commenting on Mason's citation of the spelling of the word as निरव्वान on p10 of ".. "Kaccayano's Pali Gramar, Mazard comments in a footnote in his edition of the work, ".. '"Mason here follows the Bangladeshi convention, doubling the v in nirvana. This is not '..		'commonly found today in either Romanized or Devanagari Sanskrit, but remains the norm '..		'in the classical Bengali typeset (Sadhubasa)--a relatively recent development in the '..		"presses of Mason's day (owed to Sir Charles Wilkins)."..'"', Beng_whynot="Modern Bengali writes নির্বাণ.", }, -- Burmese spelling of the above is rare and highly variable on the web! { Deva = "गङ्गा", all=1, Mymr="ဂင်္ဂါ", link=true}, { Deva="वीर", ["as-Beng"]="ৱীৰ", Beng="বীর", all=1, link=true}, { Deva="आचार्यैः", all=1, link=true}, { Deva="व्यञ्जन", ["as-Beng"]="fakeit", Beng="except", Mymr="fakeit", link=true}, { Deva="गोपन", all=1, link=true, Mymr="ဂေါပန"}, { Deva="प्राक्", all=1, link=true, Mymr="ပြာက်"}, { Deva="क्रोध", all=1, link=true, Thai="โกฺรธ", Thai_why='RID gives etymology of โกรธ as "ส. โกฺรธ".' }, { Deva="आस्ये", all=1, link=true}, { Deva='सऋक्ष', all=1, link=true}, { Deva="संस्कृतम्", all=1, Java="ꦱꦁꦱ꧀ꦏꦽꦠꦩ꧀", link=true, Java_why="See side panel at https://jv.wikipedia.org/wiki/Basa_Sangsekerta"}, { Deva="नीळ", all=1, link=true}, { Deva="विद्वांस्", link=true, Sinh="විද්‍වාංස්"}, { Deva="आक्रोशति", link=true, Sinh="ආක්‍රොශති"}, { Deva="अवोचत्", link=true, Sinh="අවොචත්"}, { Deva="अत्र", link=true, Sinh="අත්‍ර"}, { Deva="उपनह्यन्ते", link=true, Sinh="උපනහ්‍යන‍්තෙ"}, { Deva="प्रशाम्यति", link=true, Sinh="ප්‍රශාම්‍යති"}, { Deva="क्षान्त्या", link=true, Sinh="ක්‍ෂාන‍්ත්‍යා"}, { Deva="प्रज्ञा", all=1, link=true, Sinh="ප්‍රඥා"}, { Deva="प्रभङ्गुर", link=true, Sinh="ප්‍රභඞ‍්ගුර"}, { Deva="पण्डित", link=true, Sinh="පණ‍්ඩිත"}, { Deva="स्पन्दन", link=true, Sinh="ස‍්පන්‍දන"}, { Deva="तम्बुद्धमनन्तगोचरं", link=true, Sinh="තම‍්බුද‍්ධමනන‍්තගොචරං", Sinh_why="See quotation for බුද‍්ධ"}, { Deva="मांस", all=1, link=true}, { Deva="अंहु", all=1, link=true}, { Deva="दुःख", Taml="fakeit", link=true}, { Deva="जिघांसा", Taml="fakeit", link=true}, { Deva="हिंस", all=1, link=true}, { Deva="शत", all=1, link=true}, { all=1, link=true, Deva="दान"}, -- fields Taml and Taml_why would be useful. { Deva="झञ्झा", all=1, link=true}, { Deva="यौवन", all=1, link=true}, { Deva="गौतम", all=1, link=true}, {		Deva="भावम्", link=true, Taml="ப⁴ாவம்", Taml_why="This is the form displayed on p3 of https://www.unicode.org/L2/L2010/10379--extended-tamil.pdf ".. "and in https://www.unicode.org/L2/L2010/10407-ext-tamil-follow2.pdf we have the statement ".. '"in most forms of Extended Tamil (including the Gita book mentioned previously running to almost '..				"420,000 copies) "..				'the diacritics are placed between the consonant and any vowel signs placed to the right".', Taml_whynot="Google search only finds பா⁴வம். -".. "https://corp.unicode.org/pipermail/unicode/2024-January/010740.html. Moroever, the form with ".. "right matra last doesn't render properly." },	{ Deva="धर्म", Taml="த⁴ர்ம", link=true}, { Deva="एकं", link=true, Taml="ஏகம்²", Taml_why="See quotation at .", Taml_whynot="Or use Grantha anusvara!", },	{Deva="पापेभ्यो", Taml="பாபேப்⁴யோ", link=false, Taml_why="See quotation at ."}, { Deva="शुचः", all=1, link=false, Taml="ஶுச𑌃", Taml_why="See injunctive form in quotation at "}, { Deva="सर्व", Taml="ஸர்வ", link=true}, --	{ Deva="", all=1, link=true},

}

-- here be the tests local function sc_xlit(text, sc_obj, sc_name) local sc_romn = (lang:transliterate(res, sc)) or "" if sc_romn == "" then pali_fallback = pali_fallback or require("Module:pi-translit").tr		sc_romn = pali_fallback(res, lang, sc_name) or "" end if sc_name == "Taml" then taml_fallback = taml_fallback or require("Module:sa-Taml-translit").tr		sc_romn = taml_fallback(res, lang, sc_name) or "" end return sc_romn end

function tests:one_script(sc_name, risk_end) local sc = m_scripts.getByCode(sc_name) risk_end = not not risk_end -- Canonicalise for _, case in pairs(cases) do		if risk_end == not not case.aborts then local name, should, doit, just, counter, nocando should = case[sc_name] if should then if should == "except" then doit = false elseif should == "fakeit" then doit = true should = nil else doit = true end else doit = case.all end if doit then local lp				just = case[sc_name.."_why"] counter = case[sc_name.."_whynot"] res = tr(case.Deva, sc_name) if case.link then lp = "")					res = self.frame:preprocess(lp..res.."}}")				else					local deva_romn = (lang:transliterate(case.Deva, deva_sc)) or ""					local sc_romn  = (deva_romn ~= "") and sc_xlit(res, sc, sc_name)										or ""					if sc_romn == "" and deva_romn ~= "" then						doit = false; -- silently don't apply a test.					else						name = self.frame:preprocess( lp..case.Deva.."}} "..sc_name..": "..lp..res.."}}")						res = sc_romn						should = deva_romn					end				end				if doit then					if res ~= should then						if just then							should = should.." ("..self.frame:preprocess(just)..")"						end						if counter then							res = res.." ("..self.frame:preprocess(counter)..")"						end					end					tests:equals(name, res, should)				end			end		end	end end

function tests:test_all local availableScripts = lang:getScripts local scripts = {} for _, script in pairs(availableScripts) do		scripts[script:getCode] = 1; end -- New scripts can be added here. --	{Beng = 1, Mymr = 1, Thai = 1, ["as-Beng"] = 1} scripts.Deva = nil; -- Pointless and unsupported to boot. for _, risk_all in pairs({false, true}) do		for sc, _ in pairs(scripts) do			tests:one_script(sc, risk_all) end end end

return tests