User:Flubot/Adding DEFAULTSORT key to Greek words

This script is an adaptation of the one used on el.wiktionary to add sorting keys to Greek words. It adds the DEFAULTSORT key before the interwikis.

defaultsort.py
u"""
 * 1) -*- coding: utf-8  -*-

Much of this code was stolen from cosmetic_changes.py.

""" __version__ = '$Id: default_kleidaTaksinomhshs.py 4260 2007-09-12 22:12:11Z wikipedian $' import wikipedia, pagegenerators, string import sys import re

warning = u"""If you are running this bot outside en.wiktionary, please reconsider"""

docuReplacements = { '&params;': pagegenerators.parameterHelp, '&warning;': warning, }

msg_standalone = { 'en': u'Bot: defaultsort key', }
 * 1) Summary message when using this module as a stand-alone script

class EpiloghKleidaTaksinomhshsToolkit: def __init__(self, site, title, exceptions = [], debug = False): self.site = site self.debug = debug self.title = title self.exceptions = exceptions

def allagh(self, keimeno): """       Given a wiki source code text, returns the cleaned up version.        FIXME σύνοψη εδώ        """ defaultsort_templ=re.compile(u'\{\{DEFAULTSORT\:([^\}]*)\}\}') if defaultsort_templ.search(keimeno): kleidakeimenou=defaultsort_templ.search(keimeno).group(1) protypo=self.paragwghKleidaTaksinomhshsProtypo kleida=defaultsort_templ.search(protypo).group(1) if kleida==kleidakeimenou: return keimeno palioKeimeno = keimeno protypo = u'' # αφαιρούμε την επόμενη κενή γραμμή μαζί με το πρότυπο αν υπάρχει protyporegexp = u'\{\{DEFAULTSORT\:[^\}]*\}\}(\r\n\r\n|$)' keimeno = self.removeKleidaTaksinomhshsProtypo(keimeno,protyporegexp) protyporegexp1 = u'\{\{DEFAULTSORT\:[^\}]*\}\}\r\n' keimeno = self.removeKleidaTaksinomhshsProtypo(keimeno,protyporegexp1) keimeno = self.addKleidaTaksinomhshsProtypo(keimeno,protypo) if self.debug: wikipedia.showDiff(palioKeimeno, keimeno) return keimeno

def paragwghKleidaTaksinomhshsProtypo(self):

parametros = self.title.lower

mtg_apo = u'ά έ ή ί ϊ ΐ ό ύ ϋ ΰ ώ ς ά έ ή ί ό ύ ώ ᾴ ῄ ῴ ὰ ὲ ὴ ὶ ὸ ὺ ὼ ᾲ ῂ ῲ ᾶ ῆ ῖ ῦ ῶ ᾷ ῇ ῷ ῗ ῧ ῒ ῢ ΐ ΰ ᾳ ῃ ῳ ἀ ἐ ἠ' mtg_se = u'α ε η ι ι ι ο υ υ υ ω σ α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω ι υ ι υ ι υ α η ω α ε η'

mtg_apo = mtg_apo + u' ἰ ὀ ὐ ὠ ᾀ ᾐ ᾠ ἄ ἔ ἤ ἴ ὄ ὔ ὤ ᾄ ᾔ ᾤ ἂ ἒ ἢ ἲ ὂ ὒ ὢ ᾂ ᾒ ᾢ ἆ ἦ ἶ ὖ ὦ ᾆ ᾖ ᾦ ἁ ἑ ἡ ἱ ὁ' mtg_se = mtg_se    + u' ι ο υ ω α η ω α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω α ε η ι ο'

mtg_apo = mtg_apo + u' ὑ ὡ ἅ ἕ ἥ ἵ ὅ ὕ ὥ ᾅ ᾕ ᾥ ἃ ἓ ἣ ἳ ὃ ὓ ὣ ᾃ ᾓ ᾣ ἇ ἧ ἷ ὗ ὧ ᾇ ᾗ ᾧ ᾰ ῐ ῠ ᾱ ῑ ῡ ῥ' mtg_se = mtg_se    + u' υ ω α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω α ι υ α ι υ ρ'

trkeys = mtg_apo.split(u' ') for i in range(len(trkeys)): trkeys[i] = ord(trkeys[i])

trvals = mtg_se.split(u' ') trtable = dict(zip(trkeys,trvals)) parametros = parametros.translate(trtable)

parametros = re.sub(u'[^αβγδεζηθικλμνξοπρστυφχψω]', u'', parametros)

protypo = u'' return protypo

def removeKleidaTaksinomhshsProtypo(self,keimeno,protypo): u"""       remove old template if it is there        """ teliko_keimeno = re.sub(protypo,u'',keimeno) return teliko_keimeno

def addKleidaTaksinomhshsProtypo(self,keimeno,protypo): u"""       stuff provided protypo into the wikitext         right before interwiki links.        """

marker = '@@' while marker in keimeno: marker += '@'

site = self.site protypo = self.paragwghKleidaTaksinomhshsProtypo interwiki = wikipedia.getLanguageLinks(keimeno, insite = site) textnoiws = wikipedia.removeLanguageLinks(keimeno.replace(marker,'').strip, site = self.site) + site.family.category_text_separator + protypo + site.family.category_text_separator

teliko_keimeno = wikipedia.replaceLanguageLinks(textnoiws, interwiki, site = self.site) return teliko_keimeno

class EpiloghKleidaTaksinomhshsBot: def __init__(self, generator, exceptions=[], acceptall = False): self.generator = generator self.acceptall = acceptall self.exceptions = exceptions # Load default summary message. wikipedia.setAction(wikipedia.translate(wikipedia.getSite, msg_standalone))

def checkExceptions(self, original_text): """                                                                                                If one of the exceptions applies for the given text, returns the                                       substring which matches the exception. Otherwise it returns None.                                             """ for exception in self.exceptions: hit = exception.search(original_text) if hit: return hit.group(0) return None

def treat(self, page): try: # Show the title of the page we're working on. # Highlight the title in purple. wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title) ccToolkit = EpiloghKleidaTaksinomhshsToolkit(page.site, page.title, debug = True) keimeno = page.get match = self.checkExceptions(keimeno) # skip all pages that contain certain texts if match: wikipedia.output(u'Skipping %s because it contains %s' % (page.aslink, match)) else: allages = ccToolkit.allagh(keimeno) if allages != keimeno: if not self.acceptall: choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') if choice in ['a', 'A']: self.acceptall = True if self.acceptall or choice in ['y', 'Y']: page.put(allages) else: wikipedia.output(u"No changes for %s" % page.title) except wikipedia.NoPage: wikipedia.output(u"Page %s does not exist;!" % page.aslink) except wikipedia.IsRedirectPage: wikipedia.output("Page %s is a redirect, skipping." % page.aslink) except wikipedia.LockedPage: wikipedia.output(u"Page %s is locked?!" % page.aslink)

def run(self): for page in self.generator: self.treat(page) original_text = page.get

def main: #page generator gen = None PageTitles = [] exceptions=[] namespaces = [] regex = False caseInsensitive = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory

for arg in wikipedia.handleArgs: if arg.startswith('-except:'): exceptions.append(arg[8:]) elif arg == '-regex': regex = True elif arg == '-nocase': caseInsensitive = True elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append(wikipedia.input(u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) else: generator = genFactory.handleArg(arg) if generator: gen = generator else: wikipedia.showHelp for i in range(len(exceptions)): exception = exceptions[i] if not regex: exception = re.escape(exception) if caseInsensitive: exceptionR = re.compile(exception, re.UNICODE | re.IGNORECASE) else: exceptionR = re.compile(exception, re.UNICODE) exceptions[i] = exceptionR

if PageTitles: pages = [wikipedia.Page(wikipedia.getSite, PageTitle) for PageTitle in PageTitles] gen = iter(pages) if not gen: wikipedia.showHelp elif wikipedia.inputChoice(warning + u'\nDo you want to continue?', ['yes', 'no'], ['y', 'N'], 'N') == 'y': if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = EpiloghKleidaTaksinomhshsBot(preloadingGen, exceptions) bot.run

if __name__ == "__main__": try: main finally: wikipedia.stopme