User:Interwicket/code/iwlinks


 * 1) !/usr/bin/python
 * 2) -*- coding: utf-8  -*-
 * 3) wikipath en wiktionary User:Interwicket/code/iwlinks

import wikipedia import re

renotags = re.compile(r' .*? ||$$.*?$$', \        re.IGNORECASE | re.DOTALL)

reiwiki = re.compile(r'\[\[([a-z-]{2,10}):([^\[\]\n]+)\]\]')
 * 1) match link to a-z, any non-null title, (if |, included in title, to be removed)
 * 2) Various other errors ignored


 * 1) routine to get iwiki links from entry text
 * 2) return dict of code->title
 * 3) ignores unknown codes; ignores duplicate codes (returns last found)
 * 4) explicit deletes are returned so we can remove them and reflect that in edit summary

def getiwlinks(text, flws):

mt = renotags.sub('', text)

links = { } for code, title in reiwiki.findall(mt):

if code not in flws: continue if flws[code].lockedwikt and not flws[code].deletecode: continue links[code] = title

return links

def replaceiwlinks(text, links, flw, flws):

links = links.copy # private copy (shallow, okay)

# proceed as above in finding old links, but different action # duplicate codes are silently elided (probably not best, but as before)

mt = renotags.sub('', text)

for code, title in reiwiki.findall(mt):

if code not in flws: continue text = re.sub(r'\[\[' + code + ':' + re.escape(title) + r'\]\]\s*', '', text)

# no add or remove links to locked wikts (mostly harmless, but not worth it) # do remove explicit deletes if flws[code].lockedwikt and not flws[code].deletecode and code not in links: links[code] = title

# strip WS at bottom (and top for pl.wikt) text = text.strip('\n ')

# sort if needed linklist = [ ] pf = flw.site.interwiki_putfirst if pf: for code in pf: if code in links: linklist.append("" + code + ':' + links[code] + "") del links[code] # remaining, or all in code order: for code in sorted(links): linklist.append("" + code + ':' + links[code] + "")

if flw.oneline: ls = ' '.join(linklist) else: ls = '\n'.join(linklist)

if flw.attop: newt = ls + '\n' + text else: newt = text + '\n\n' + ls

return newt


 * 1) test code

if __name__ == "__main__":

from reciprocal import flws # init all the flws:

for code in flws['en'].site.family.langs: foo = flws[code]

code = 'sw' title = 'cat'

print "sh status", flws['sh'].status, "locked", flws['sh'].lockedwikt

# get some page, try a few things

page = wikipedia.Page(flws[code].site, title)

text = page.get

links = getiwlinks(text, flws)

print title, ":", repr(links)

# if 'ta' in links: del links['ta'] # links['sw'] = title

# so following should be no-op if 'sh' in links: del links['sh']

newt = replaceiwlinks(text, links, flws[code], flws)

wikipedia.showDiff(text, newt)

page.put(newt)