User:Commander Keane/Audio workflow/xFileGenBot10.py

from __future__ import absolute_import, unicode_literals import pywikibot from pywikibot import pagegenerators import os.path import re import os import io
 * 1) !/usr/bin/python
 * 2) -*- coding: utf-8 -*-
 * 3) dumps lists from Categories into C:\Users\jim\pywikitest
 * 1) dumps lists from Categories into C:\Users\jim\pywikitest

class fileGenBot:

#global maxListLength #maxListLength=10

#global catName #catName="Bosphorus crossings" #catName="Undersea tunnels" #catName="Suburbs of Brisbane" global recurseGlobal recurseGlobal = 0 # was 5 global lines_per_file lines_per_file = 400 #300 global wikiCheck wikiCheck = False #Look up each article to see if audio already exists #global fileTitle #fileTitle=catName

#global petScanFile #petScanFile="C:\\Users\\jim\\pywikitest\\Suburbs of Brisbane.txt"

def __init__(self): #self.newRecording = [] print print("fileGenBot Initialsed") print #main

def visitCat2(self, catNameOnly): site=pywikibot.Site("en","wiktionary")  #leave blank for en.wp          uniCat = "u'Category:" +str(catNameOnly)# + '"'        #print "Uni cat: " + uniCat        print ("Cat name only: " + str(catNameOnly))        #cat = pywikibot.Category(site,uniCat)        cat = pywikibot.Category(site,catNameOnly)        #Category:Bosphorus crossings        pages=cat.articles

list1= [] lowerNumber=0 num=0

#for page in pagegenerators.PreloadingGenerator(pages, 49): for page in pagegenerators.CategorizedPageGenerator(cat, recurse=recurseGlobal): noBrackets = str(page) noFront=noBrackets[16:] noEnd=noFront[:len(noFront)-2] #noEnd=noEnd.decode('utf-8') list1.append(noEnd) #print noEnd.decode('utf-8') return list1

def printListToFile(self, listGenerated): filePathBeginning = 'C:\\Users\\jim\\pywikitest\\' #newFileName = 'C:\\Users\\jim\\pywikitest\\' + str(fileTitle) + '\\' + str(fileTitle)+ '.txt' newDir = filePathBeginning + str(fileTitle) newFileName='C:\\Users\\jim\\pywikitest\\' + str(fileTitle) + '\\' + str(fileTitle) + '.txt' num=1 dirNum=1 if os.path.isdir(newDir): while os.path.isdir(newDir): dirNum=dirNum+1 newDir = filePathBeginning + str(fileTitle) + str(dirNum) newFileName = filePathBeginning + str(fileTitle) + str(dirNum) + '\\' + str(fileTitle) + str(dirNum) + '.txt'

if os.path.isdir(newDir)==False: os.mkdir(newDir)

#newFileName = newDir + '\\' + str(fileTitle) + '.txt' with io.open(newFileName, "a", encoding='utf8') as outFile: for line in listGenerated: #print line to text file #print str(listGenerated[line]) line=line.encode('utf-8') #line=line.encode('utf-8') lineEnd = '\n' lineEnd=lineEnd.encode('utf-8') lineCombined = line + lineEnd lineCombined = lineCombined.decode('utf8') outFile.write(lineCombined) return newFileName

def checkArticle(self, bigList, torF): if torF==False: return bigList else: newBigList=[] for line in bigList: #print line site=pywikibot.Site #page=pywikibot.Page(site,"u" + str(line) + "") #uniLine = 'u'+str(line) page=pywikibot.Page(site,line) #textUnEn=page.text text=page.text #text2=text.encode('utf-8') #print text2 if re.search(u'\.ogg',text) == None and re.search(u'\.oga',text) == None: #print "good to record: ", line newBigList.append(line)

#print "length of list is: ",len(newBigList) return newBigList

def petScan2simpleListFile(self): #newDirtyList=PetScanFile with open(petScanFile, "r") as longFile: for line in longFile: if str(line)[:3]=="| [[":                   print (str(line))

def splitBigFile(self, newFileName): shortcutsFile = r'C:\Users\jim\pywikibot\Command shortcuts\Generated lists.txt' smallfile = None #newFile = 'C:\\Users\\jim\\pywikitest\\' + str(fileTitle)+ '.txt' with io.open(newFileName, encoding='utf8') as bigfile: folderNames = open(shortcutsFile,"a") for lineno, line in enumerate(bigfile): if lineno % lines_per_file == 0: if smallfile: smallfile.close startOfFileName = newFileName[:-4] small_filename = startOfFileName+'_sf_{}.txt'.format(lineno + lines_per_file) smallfile = io.open(small_filename, "w", encoding='utf8') smallfile.write(line) if smallfile: folderNameStripped = small_filename[small_filename.rfind('\\') +1 :-4] folderNames.write(folderNameStripped + ' \n') smallfile.close folderNames.close

def main(*args): local_args = pywikibot.handle_args(args) args = local_args argCatName = args[0] print ("arg cat name: " + str(argCatName)) global fileTitle fileTitle=argCatName

bot = fileGenBot listFromCat = bot.visitCat2(argCatName)

list2 = bot.checkArticle(listFromCat, wikiCheck) #False to not check if audio exists fileName1 = bot.printListToFile(list2) bot.splitBigFile(fileName1) if __name__ == '__main__': main