User:Flubot/Adding sort key to Greek nouns

This script is tested.

Its function is adding the "sort=" parameter to Greek nouns' inflexion line. For example see this, and this diff.

A second function of this script is to replace the template in the inflection line with  template (e.g. this diff. It generates the romanization and adds a note if the gender is missing (e.g this diff.

We need:
 * 1) pywikipedia
 * 2) a list of entries to modify, named list-sort and in the same directory (pywikipedia). The headwords must be written without brackets and each line of text must have one headword.
 * 3) to run it just give python taxinomos.py

taxinomos.py

 * 1) !/usr/bin/python
 * 2) -*- coding: utf-8 -*-

import wikipedia import sys, re, codecs

spaces_tag = re.compile('([ \*\.,\'\-\(\)]*)') apost_tag = re.compile(u'\’') page_tag = re.compile('(.*)') pos2_tag = re.compile(u'\{\{el-noun(?P .*)(?P \|sort\=[^\}]*)\}\}') pos1_tag = re.compile(u'\{\{el-noun\|(?P [^\}]*)(\|*)\}\}') pos3_tag = re.compile(u'\{\{el-noun') pos4_tag = re.compile(u'\|(\|+)') sc_tag = re.compile('sc=Gr(e+)k\|') infl_tag = re.compile('\{\{infl\|el\|noun\|') tr_tag= re.compile('tr\=([^\|\}]*)') genos_tag= re.compile('g=([^\|\}]*)')
 * 1) ((?P \|αριθ=[0-9]+)

debug_bul = True

ch_apo = u'γγ γχ γξ αύω αύο αύε αυγ αυδ αυλ αυμ αυν αυρ αύγ αύδ αύζ αύλ αύμ αύν αύρ αυ αύ ευη ευή ευα ευά εύω εύε ευό ευο ευί ευβ ευγ ευδ ευζ ευλ ευμ ευν ευρ εύβ εύγ εύδ εύζ εύλ εύμ εύν εύρ ευ εύ ϊ α ά β γ δ ε έ ζ η ή θ ι ί κ λ μ ν ξ ου ού ο π ρ σ ς τ υ ύ ϋ φ χ ψ ω ώ ΐ ΰ' ch_se = u'ng nch nx ávo ávo áve avg avd avl avm avn avr ávg ávd ávz ávl ávm ávn ávr af áφ evi eví eva evá évo éve evó evo eví ev evg evd evz evl evm evn evr év évg évd évz évl évm évn évr ef éf ï a á v g d e é z i í th i í k l m n x ou οú o p r s s t y ý ÿ f ch ps o ó ḯ ÿ́'
 * 1) πίνακες αντιστοίχισης

trapo = ch_apo.split(u' ') trse = ch_se.split(u' ')

def roman(trnsl): for i in range(len(trapo)): trnsl = trnsl.replace(trapo[i], trse[i]) return trnsl

mtg_apo = u'ά έ ή ί ϊ ΐ ό ύ ϋ ΰ ώ ς' mtg_se = u'α ε η ι ι ι ο υ υ υ ω σ'

trkeys = mtg_apo.split(u' ') for i in range(len(trkeys)): trkeys[i] = ord(trkeys[i]) trvals = mtg_se.split(u' ') trtable = dict(zip(trkeys,trvals))

fin = codecs.open('list-sort', 'r', 'utf-8') eof=0 while not eof: line = fin.readline if line == "": eof = 1 else: t1 = page_tag.search(line) t = t1.group(1) print(t) kleida = t.lower kleida = kleida.translate(trtable) kleida = spaces_tag.sub('',kleida) kleida = apost_tag.sub('',kleida) if kleida != t: 	kleida = '|sort=' + kleida wikipedia.setAction('Adding sort key') else: kleida = '' wikipedia.setAction(u'remove wrong or reduntant sort key') page = wikipedia.Page(wikipedia.getSite, t) arxiko = page.get(get_redirect=True)
 * 1) Which entry?

seires = arxiko.split("\n") kainoyrio = [] for seira in seires: result1 = infl_tag.search(seira) if (result1): g1 = genos_tag.search(seira) if (g1): gen=g1.group(1) else: gen="gender missing" seira = '' result = pos2_tag.search(seira) if (result): if result.group('kleidi'): seira = pos2_tag.sub(u'',seira) else: seira = pos1_tag.sub(u'',seira) result = pos3_tag.search(seira) if (result): seira = sc_tag.sub(u'|', seira) seira = pos4_tag.sub(u'|',seira) kainoyrio.append(seira) keimeno = "\n".join(kainoyrio)
 * 1) Αντικαταστάσεις

#keimeno = pos2_tag.sub(u'', arxiko) #keimeno = pos1_tag.sub(u'', keimeno)

if keimeno != page.get: # Show the title of the page we're working on. # Highlight the title in purple. wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title) ## show what was changed wikipedia.showDiff(page.get, keimeno) #choice= 'y'     if debug_bul: choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['yes', 'No', 'all'], ['y', 'N', 'a'], 'N') if choice == 'a': choice = 'y'         debug_bul = False elif choice != 'y': choice='n'     if choice == 'y': try: # Save the page page.put(keimeno) except wikipedia.IsRedirectPage: wikipedia.output(u'Skipping %s because it is a redirect' % (page.title)) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (page.title)) except wikipedia.SpamfilterError, error: wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title, error.url))
 * 1) Saving changes

fin.close