User:Surjection/probotector.py

from pywikibot import Site, Page, Category, Timestamp from datetime import datetime, timedelta

enwikt = Site('en', fam='wiktionary') enwikt.login

NAMESPACES = (   828,    # Module    10,     # Template ) SCORE_TO_PROTECT = 1000 TRANSCLUDED_MULTIPLIER = { # content pages (mainspace, reconstruction) count two-fold 0: 2,   118: 2 }

DEBUG = False LOG_ONLY = True LOG_ALL = False LEVELS = ["all", "autoconfirmed", "editautopatrolprotected", "templateeditor", "sysop"] PROTECT_EDIT_AT = "autoconfirmed" PROTECT_MOVE_AT = "autoconfirmed"

def insufficient_protection(page): edit_level = page.protection.get('edit', None) if not edit_level: return True if edit_level == 'all': return True return edit_level in LEVELS and LEVELS.index(edit_level) < LEVELS.index(PROTECT_EDIT_AT)

def do_protect(page, score): reason = f'(bot) automatically protect highly visible templates/modules (reference score: {score}+ >= {SCORE_TO_PROTECT})' edit_level = page.protection.get('edit', 'all') or 'all' move_level = page.protection.get('move', 'all') or 'all'

if edit_level in LEVELS and LEVELS.index(edit_level) < LEVELS.index(PROTECT_EDIT_AT): edit_level = PROTECT_EDIT_AT if move_level in LEVELS and LEVELS.index(move_level) < LEVELS.index(PROTECT_MOVE_AT): move_level = PROTECT_MOVE_AT

protections = {'edit': edit_level, 'move': move_level}

if DEBUG or LOG_ONLY: print('\t', dict(page=page, reason=reason, protections=protections)) else: page.protect(reason=reason, protections=protections)

def ignore_page_by_title(title): if '/' not in title: return False last_token = title.split('/')[-1] if last_token in {'documentation', 'sandbox', 'testcases'}: # don't mess with documentations, sandboxes or testcases return True if title.startswith('Template:User:') or title.startswith('Module:User:'): # don't mess with user templates/modules return True if title in ('Template:sandbox', 'Module:sandbox'): # don't mess with sandboxes return True if title.startswith('Template:sandbox/') or title.startswith('Module:sandbox/'): # don't mess with sandboxes return True if ":zh/data/" in title: # literally thousands of these - just skip them all return True if ":User " in title and "-" in title: # literally thousands of these - just skip them all return True return False

for namespace in NAMESPACES: print(namespace) for page in enwikt.allpages(namespace=namespace): #, start='a'):        try:            if LOG_ALL:                print(page.title)            if not ignore_page_by_title(page.title) and insufficient_protection(page):                backlink_iterator = page.getReferences(follow_redirects=True, with_template_inclusion=True, only_template_inclusion=True, filter_redirects=False, total=SCORE_TO_PROTECT, content=False)                score = 0                for backlinker in backlink_iterator:                    score += TRANSCLUDED_MULTIPLIER.get(backlinker.namespace.id, 1)                if score >= SCORE_TO_PROTECT:                    do_protect(page, score)        except KeyboardInterrupt:            print(page.title)            raise