User:Scsbot/wikised

host= baseurl= login="?title=Special:Userlogin"

driverscript= reason= minoredit=

okaytocreate= mustcreate=

delay=			# sleep between edits (seconds)

checkscript= editscript= filter= postcheckscript=

checkdiffs=no expecteddelete= expectedinsert=

rm="/bin/rm -f"

username= password=

loginconfig=

masterscript=

configs="wikised.configs"


 * 1) argument parse

while expr "$1" : '^-' &gt; /dev/null do	case $1 in		-\?|-h|-help|--help)			echo $usage			echo "options:"			echo "	-host h	set host of wiki"			echo "	-url u	set full base URL of wiki"			echo "	-why m	set edit summary message"			echo "	-m	mark edits as minor"			echo "	-d f	set driver/data file"			echo "	-cre	okay to create new pages"			echo "	-new	don't overwrite old pages (must create)"			echo "	-s s	sleep for s seconds between edits"			echo "	-pre s	set pre-check script"			echo "	-post s	set post-check script"			echo "	-edit s	set main edit script"			echo "	-filter	edit script is a pure filter"			echo "	-chkd	check diffs"			echo "	-ins n	expected insert"			echo "	-del n	expected delete"			echo "	-user u	set username"			echo "	-pass p	set password"			echo "	-cfg c	login configuration c"			echo "	-f s	master setup script s"			echo "	-?,-h	print this help"			printedhelp=yes;;

-host)	host=$2			shift;;		-u|-url|-baseurl) baseurl=$2 shift;; -why|-reason|-message|-summary|-editsummary)			reason=$2			shift;;		-m|-minor) minoredit=yes;; -d|-driver|-driverscript|-data)			driverscript=$2			shift;;		-cre|-create|-okaytocreate) okaytocreate=yes;; -new|-mustcreate)			mustcreate=yes;;		-s|-sleep|-delay) delay=$2 shift;; -pre|-precheck|-precheckscript|-checkscript)			checkscript=$2			shift;;		-post|-postcheckscript) postcheckscript=$2 shift;; -edit|-editscript)			editscript=$2			shift;;		-filter) filter=yes;; -chkd|-checkdiff|-checkdiffs)			checkdiffs=yes;;		-ins|-nins|-ninsert|-expectedinsert) expectedinsert=$2 shift;; -del|-ndel|-ndelete|-expecteddelete)			expecteddelete=$2			shift;;		-user|-username) username=$2 shift;; -pass|-password)			password=$2			shift;;		-cfg|-config) loginconfig=$2 shift;; -f|-masterscript)			masterscript=$2			shift;;		*)	echo "$0: unrecognized option $1" 1&gt;&amp;2;; esac shift done

if test $# -ge 1 then if test -n "$masterscript" then	echo "master script specified twice (use -f or argument)" &gt;&amp;2 exit 1 fi masterscript=$1 shift fi


 * 1) login config shortcut (but don't override cmd line)

if test -n "$loginconfig" then ent=`dbgrep -i -k name $loginconfig $configs` if test -z "$ent" then echo "no such configuration $loginconfig" &gt;&amp;2 # exit? else nl=`echo "$ent" | wc -l` i=1 while test $i -le $nl do line=`echo "$ent" | line $i` k=`expr "$line" : '\([^ 	]*\).*'` v=`expr "$line" : '[^ 	]*[ 	]*\(.*\)'` v=`echo "$v" | sed 's/ *$//'` case $k in				host)	if test -z "$host"; then host=$v; fi;;				baseurl) if test -z "$baseurl"; then baseurl=$v; fi;; defaultlogin)					defaultlogin=$v;;				confirm) confirm=$v;; esac i=`expr $i + 1` done fi fi


 * 1) "master script" (but don't override cmd line)

if test -n "$masterscript" then if test ! -r "$masterscript" then echo "master script $masterscript does not exist or is not readable" &gt;&amp;2 exit fi

# would like to use "while read k v &lt; $masterscript", # but that sets vars in subshell and so is useless

nl=`wc -l &lt; $masterscript` i=1 while test $i -le $nl do line=`line $i $masterscript` k=`expr "$line" : '\([^ 	]*\).*'` v=`expr "$line" : '[^ 	]*[ 	]*\(.*\)'` v=`echo "$v" | sed 's/ *$//'` case $k in			host)	if test -z "$host"; then host=$v; fi;;			baseurl) if test -z "$baseurl"; then baseurl=$v; fi;; driverscript)	if test -z "$driverscript"; then driverscript=$v; fi;;			reason)	if test -z "$reason"; then reason=$v; fi;; minoredit)				if test -z "$minoredit"				then					 if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"					then	minoredit=no					else	minoredit=yes					fi				fi;;			okaytocreate) if test -z "$okaytocreate" then if test "$v" = "0" -o "$v" = "no" -o "$v" = "false" then	okaytocreate=no else	okaytocreate=yes fi fi;; mustcreate)				if test -z "$mustcreate"				then					 if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"					then	mustcreate=no					else	mustcreate=yes					fi				fi;;			delay)	if test -z "$delay"; then delay=$v; fi;; checkscript) if test -z "$checkscript"; then checkscript=$v; fi;;			editscript) if test -z "$editscript"; then editscript=$v; fi;; filter)				if test -z "$filter"				then					 if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"					then	filter=no					else	filter=yes					fi				fi;;			postcheckscript) if test -z "$postcheckscript"; then postcheckscript=$v; fi;; checkdiffs)				if test -z "$checkdiffs"				then					 if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"					then	checkdiffs=no					else	checkdiffs=yes					fi				fi;;			expecteddelete) if test -z "$expecteddelete"; then expecteddelete=$v; fi;; expectedinsert)	if test -z "$expectedinsert"; then expectedinsert=$v; fi;;			username)	if test -z "$username"; then username=$v; fi;; password)	if test -z "$password"; then password=$v; fi;;		esac		i=`expr $i + 1`	done fi


 * 1) Most args/flags are required.  Check them all.

errs=0

if test -z "$baseurl" then if test -n "$host" then	baseurl="http://$host/w/index.php" fi fi

if test -z "$baseurl" then echo "base url not specified; use -url, or -h to specify host" &gt;&amp;2 errs=`expr $errs + 1` fi

if test -z "$driverscript" then echo "driver script not specified; use -d to specify" &gt;&amp;2 errs=`expr $errs + 1` fi

if test -z "$minoredit" then	minoredit=no fi

if test -z "$okaytocreate" then	okaytocreate=no fi

if test -z "$mustcreate" then	mustcreate=no fi

if test -z "$delay" then	delay=60 fi

if test -z "$editscript" then echo "edit script not specified; use -edit to specify" &gt;&amp;2 errs=`expr $errs + 1` fi

if test -z "$filter" then	filter=no fi

if test -z "$checkdiffs" then	checkdiffs=no fi

if test -z "$username" then	username=$defaultlogin fi

if test -z "$username" then echo "login user name not specified; use -user to specify" &gt;&amp;2 errs=`expr $errs + 1` fi

if test -z "$password" then : attempt to look up fi

if test $errs -gt 0 then	exit 1 fi

if test -z "$password" then if test "$confirm" = yes then	echo -n "enter password for $username to edit live wiki: " else	echo -n "enter password for $username: " fi read password elif test "$confirm" = yes then	echo -n "Confirm editing live wiki: " read ans if expr "$ans" : '[Yy]' &gt; /dev/null then	: else	exit fi fi

logfilebase=$driverscript.`date +%Y$m$d.%H%M` acceptlog=$logfilebase.accept rejectlog=$logfilebase.reject

tf=/tmp/tmpfile$$ tf4=/tmp/tmpfile$$.4

echo -n "Fetching login page..." httpget -cookies -cookiefile $tf.cook "$baseurl$login" &gt; $tf echo " got it."

formsetup formtag$$ $tf || exit

formselect formtag$$ userlogin || exit

formcheckfields formtag$$ wpName wpPassword || exit

echo $username &gt; `formgettmpfile formtag$$ wpName` echo $password &gt; `formgettmpfile formtag$$ wpPassword`

formsubmitbutton formtag$$ wpLoginattempt || exit

echo -n "Submitting login information..." formsubmit formtag$$ $baseurl $tf.cook &gt; $tf4 stat=$? echo " got it."

if test $stat -ne 0; then exit 1; fi

if grep -q "Incorrect password entered" $tf4 then echo "bad password; login failed" &gt;&amp;2 exit 1 fi

formfinish formtag$$



cat $driverscript | while read article editscriptparms do

sleep $delay

echo -n "Fetching \"$article\" to edit..." httpget -cookies -cookiefile $tf.cook "$baseurl?title=$article&amp;action=edit" &gt; $tf echo " got it."

if grep -q "&lt;title&gt;User is blocked" $tf || grep -q "Your user name or IP address has been blocked" $tf then echo "bot user $username blocked!" &gt;&amp;2 exit fi

formsetup formtag$$ $tf || exit

formselect formtag$$ editform || exit

formcheckfields formtag$$ wpTextbox1 || exit

tftextarea=`formgettmpfile formtag$$ wpTextbox1`




 * 1) Imperfect test for missing page, since it depends on text that can
 * 2) be and typically is customized per wiki.  I'm using two different
 * 3) strings from each of generic mediawiki, Wikipedia, and Wiktionary.
 * 4) The failure mode if it fails to notice that the page doesn't exist
 * 5) isn't too bad, because the check and/or edit scripts will usually
 * 6) fail on a blank textarea.

missing=no

if grep -q "does not yet have an entry for $article" $tf || grep -q "start .* entry.* type .* box .* click .*[Ss]ave.*changes .* visible immediately" $tf || grep -q "followed a link to a page that doesn't exist yet" $tf || grep -q "To create the page, start typing in the box below" $tf || grep -q "does not have an article with this exact name" $tf || grep -q "To start a page called $article, type .* box .* and .* [Ss]ave .* changes .* visible immediately" $tf then missing=yes fi

if test $mustcreate = yes -a $missing = no then echo "$article: page already exists" &gt;&amp;2 # XXX duplicated formfinish formtag$$ echo "$article	$editscriptparms	already exists" &gt;&gt; $rejectlog continue elif test $okaytocreate = no -a $missing = yes then echo "$article: no such page yet" &gt;&amp;2 # XXX duplicated formfinish formtag$$ echo "$article	$editscriptparms	no such page" &gt;&gt; $rejectlog continue fi

if grep -i -q '^#redirect' &lt; $tftextarea then # XXX duplicated formfinish formtag$$ echo "$article is a redirect" &gt;&amp;2 echo "$article	$editscriptparms	is redirect" &gt;&gt; $rejectlog continue fi

if test -z "$checkscript" || sh $checkscript $checkscriptparms &lt; $tftextarea then	: else # XXX duplicated formfinish formtag$$ echo "$article	$editscriptparms	check script failure" &gt;&gt; $rejectlog continue fi



filter "sed -e 's/&amp;lt;/&lt;/g' -e 's/&amp;gt;/&gt;/g' -e 's/&amp;amp;/\&amp;/g' -e 's/&amp;quot;/\"/g'" $tftextarea
 * 1) XXX questionable who/where should do this unescaping, and how aggressively

edit_script_output=""

if test "$filter" = "yes" then	filter "sh $editscript $editscriptparms" $tftextarea else	edit_script_output=`sh $editscript $tftextarea $editscriptparms` fi

if test $? -ne 0 then echo $edit_script_output &gt;&amp;2 # XXX duplicated formfinish formtag$$ echo "$article	$editscriptparms	edit script failure" &gt;&gt; $rejectlog continue fi

if test -n "$edit_script_output" then if echo "$edit_script_output" | grep -q 'expect.*insertions' then expectedinsert=`expr "$edit_script_output" : 'expect \([0-9]*\) insertions'` fi fi



if test "$checkdiffs" = "yes" then

formsubmitbutton formtag$$ wpDiff || exit

echo -n "Fetching diffs..." formsubmit formtag$$ $baseurl $tf.cook &gt; $tf4 stat=$? echo " done."

if test $stat -ne 0; then exit 1; fi

formfinish formtag$$

ndel=`grep -c "td class=['\\"]diff-deletedline['\\"]" $tf4` nadd=`grep -c "td class=['\\"]diff-addedline['\\"]" $tf4`

if test $ndel -ne $expecteddelete -o $nadd -ne $expectedinsert then echo "edit would cause $nadd/$ndel insertions/deletions, but expected $expectedinsert/$expecteddelete" &gt;&amp;2 echo "Canceling." &gt;&amp;2 echo "$article	$editscriptparms	diff check failure" &gt;&gt; $rejectlog continue fi

formsetup formtag$$ $tf4 || exit

formselect formtag$$ editform || exit

formcheckfields formtag$$ wpTextbox1 || exit

tftextarea=`formgettmpfile formtag$$ wpTextbox1`

filter "sed -e 's/&amp;lt;/&lt;/g' -e 's/&amp;gt;/&gt;/g' -e 's/&amp;amp;/\&amp;/g' -e 's/&amp;quot;/\"/g'" $tftextarea
 * 1) XXX questionable who/where should do this unescaping, and how aggressively
 * 2) XXX also this is in two places, though I missed it at first, leading
 * 3) to a bad edit *not* caught by the checkdiffs bad-edit check :-( # :-( :-(

fi



tfwatch=`formgettmpfile formtag$$ wpWatchthis` if test -n "$tfwatch" then $rm $tfwatch fi

tfminor=`formgettmpfile formtag$$ wpMinoredit` if test -n "$tfminor" then if test $minoredit = yes then	echo 1 &gt; $tfminor else	$rm $tfminor fi fi

tfsummary=`formgettmpfile formtag$$ wpSummary` if test -n "$tfsummary" then echo "edited by robot" &gt; $tfsummary if test -n "$reason" then (echo 1s/$/:/; echo w) | ed - $tfsummary echo "$reason" &gt;&gt; $tfsummary fi fi

formsubmitbutton formtag$$ wpSave || exit

echo -n "Submitting edits..." formsubmit formtag$$ $baseurl $tf.cook &gt; $tf4 stat=$? echo " done."

if test $stat -ne 0; then exit 1; fi

formfinish formtag$$

if grep -q "&lt;title&gt;.*[Ee]dit [Cc]onflict" $tf4 then echo "edit conflict" &gt;&amp;2 echo "$article	$editscriptparms	edit conflict" &gt;&gt; $rejectlog continue fi

echo "$article	$editscriptparms" &gt;&gt; $acceptlog

done

$rm $tf $tf4 $tf.cook