infocalypse

(djk)
2010-05-02: run_wikibot.py fixes.

run_wikibot.py fixes.

diff --git a/infocalypse/fmsbot.py b/infocalypse/fmsbot.py
--- a/infocalypse/fmsbot.py
+++ b/infocalypse/fmsbot.py
@@ -28,6 +28,11 @@ import time
 import fms
 from fms import IFmsMessageSink
 
+def make_bot_path(storage_dir, bot_name, file_name):
+    """ Helper function makes to make a bot instance specific file name. """
+    assert file_name.find(os.path.sep) == -1
+    return os.path.join(storage_dir, "%s_%s" %(bot_name, file_name))
+
 class FMSBotRunner(IFmsMessageSink):
     """ Container class which owns and runs one or more FMSBots. """
     def __init__(self, params):
@@ -166,9 +171,9 @@ class FMSBotRunner(IFmsMessageSink):
 
     def get_path(self, bot, fname):
         """ Get a bot specific path. """
-        assert fname.find(os.path.sep) == -1
-        return os.path.join(self.params['BOT_STORAGE_DIR'],
-                            "%s_%s" %(bot.name, fname))
+        return make_bot_path(self.params['BOT_STORAGE_DIR'],
+                             bot.name,
+                             fname)
 
     def queue_msg(self, msg_tuple):
         """ Queue an outgoing message.
diff --git a/infocalypse/run_wikibot.py b/infocalypse/run_wikibot.py
--- a/infocalypse/run_wikibot.py
+++ b/infocalypse/run_wikibot.py
@@ -1,6 +1,8 @@
 """ Set up and run a single wikibot instance.
 
-    Copyright (C) 2009 Darrell Karbott
+    Uses *nix specific apis! Only tested on Linux.
+
+    Copyright (C) 2009, 2010 Darrell Karbott
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU General Public
@@ -19,7 +21,11 @@
     Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks
 """
 
+import errno
 import os
+import signal
+import sys
+
 from ConfigParser import ConfigParser
 
 from fcpclient import FCPClient, get_usk_hash
@@ -29,16 +35,58 @@ from bundlecache import is_writable
 
 from fmsstub import FMSStub
 
-from fmsbot import FMSBotRunner, run_event_loops
+from fmsbot import FMSBotRunner, run_event_loops, make_bot_path
 from wikibot import WikiBot
 
+############################################################
+# FCP info
+FCP_HOST = '127.0.0.1'
+FCP_PORT = 19481
+
+# FMS info
+FMS_HOST = '127.0.0.1'
+FMS_PORT = 11119
+# NOTE: fms id for bot is read from fnwiki.cfg.
+
+# Latest known repo usk index
+# MUST set this when starting for the first time or re-bootstrapping.
+INDEX_HINT = 0
+
+# vebosity of logging output (NOT FCP 'Verbosity')
+VERBOSITY = 5
+
+# Root directory for temporary files.
+BASE_DIR = '/tmp/fnikibot'
+
+# File containing the private SSK key.
+# String is filled in with the usk hash for the wikitext repo.
+#
+# MUST match the public key for the wiki_repo_usk in the
+# fnwiki.cfg file.
+KEY_FILE_FMT = '~/wikibot_key_%s.txt'
+
+# Set this True to post repo update notifications to
+# infocalypse.notify.  You MUST set this for users to
+# be able to see the bot's update notifications with
+# the default configuration of fn-fmsread.
+#
+# BUT please don't set it True when testing, to avoid
+# spewing garbage into the infocalypse.notify group.
+POST_TO_INFOCALYPSE_NOTIFY = False
+
+# Usually, you won't need to tweek parameters below this line.
+#
+# Additional configuration info is read from the fnwiki.cfg
+# file for the wiki. See read_fniki_cfg.
+#----------------------------------------------------------#
 
 def read_fnwiki_cfg(cfg_file):
     """ Quick and dirty helper w/o hg deps. to read cfg file."""
     parser = ConfigParser()
     parser.read(cfg_file)
     if not parser.has_section('default'):
-        raise IOError("Can't read default section of config file?")
+        raise IOError("Can't read default section of config file: %s"
+                      % cfg_file)
 
     # Hmmm some param key strings are different than config.py.
     return {'WIKI_ROOT':parser.get('default', 'wiki_root'),
@@ -49,46 +97,50 @@ def read_fnwiki_cfg(cfg_file):
             'FMS_ID':parser.get('default', 'wiki_server_id').split('@')[0],
             'WIKI_REPO_USK':parser.get('default', 'wiki_repo_usk')}
 
+def get_dirs(base_dir, create=False):
+    " Get, and optionally create the required working directories."
+    ret = (os.path.join(base_dir, '__wikibot_tmp__'),
+            os.path.join(base_dir, 'hgrepo'),
+            os.path.join(base_dir, 'bot_storage'),
+            os.path.join(os.path.join(base_dir, 'bot_storage'), # required?
+                         '.hg'))
+
+    if create:
+        for value in ret:
+            if os.path.exists(value):
+                raise IOError("Directory already exists: %s" % value)
+        print
+        for value in ret:
+            os.makedirs(value)
+            if not is_writable(value):
+                raise IOError("Couldn't write to: %s" % value)
+            print "Created: %s" % value
+
+        print
+        print "You need to MANUALLY fn-pull the wikitext repo into:"
+        print ret[1]
+
+    else:
+        for value in ret:
+            if not is_writable(value):
+                raise IOError("Directory doesn't exist or isn't writable: %s"
+                              % value)
+    return ret[:3]
+
 # LATER: load from a config file
-def get_params():
+def get_params(base_dir):
     """ Return the parameters to run a WikiBot. """
 
-    # Directory containing all bot related stuff.
-    base_dir = '/tmp/wikibots'
-
-    # File containing the private SSK key.
-    key_file_fmt = key_file = '~/wikibot_key_%s.txt'
-
-    # FCP info
-    fcp_host = '127.0.0.1'
-    fcp_port = 9481
-
-    # FMS info
-    fms_host = '127.0.0.1'
-    fms_port = 1119
-    # NOTE: fms id for bot is read from fnwiki.cfg.
-
-    # Latest known repo usk index
-    index_hint = 0
-
-    # vebosity of logging output (NOT FCP 'Verbosity')
-    verbosity = 5
-
-    # MUST exist
-    tmp_dir = os.path.join(base_dir, '__wikibot_tmp__')
-    # MUST exist and contain wikitext hg repo.
-    repo_dir = os.path.join(base_dir, 'hgrepo')
-    # MUST exist
-    bot_storage_dir = os.path.join(base_dir, 'bot_storage')
-
-    #----------------------------------------------------------#
-    assert is_writable(tmp_dir)
-    assert os.path.exists(os.path.join(repo_dir, '.hg'))
+    # Get working directories.
+    (tmp_dir,         # MUST exist
+     repo_dir,        # MUST exist and contain wikitext hg repo.
+     bot_storage_dir, # MUST exist
+     )  = get_dirs(base_dir)
 
     params = read_fnwiki_cfg(os.path.join(repo_dir, 'fnwiki.cfg'))
 
     # MUST contain SSK private key
-    key_file = key_file_fmt % get_usk_hash(params['WIKI_REPO_USK'])
+    key_file = KEY_FILE_FMT % get_usk_hash(params['WIKI_REPO_USK'])
     print "Read insert key from: %s" % key_file
 
     # Load private key for the repo from a file..
@@ -103,7 +155,7 @@ def get_params():
 
     # Then invert the request_uri from it.
     print "Inverting public key from private one..."
-    request_uri = FCPClient.connect(fcp_host, fcp_port). \
+    request_uri = FCPClient.connect(FCP_HOST, FCP_PORT). \
                   get_request_uri(insert_uri)
     print request_uri
     if get_usk_hash(request_uri) != get_usk_hash(params['WIKI_REPO_USK']):
@@ -131,25 +183,26 @@ def get_params():
         'Verbosity':1023, # MUST set this to get progress messages.
 
         # FCPConnection / RequestRunner
-        'FCP_HOST':fcp_host,
-        'FCP_PORT':fcp_port,
+        'FCP_HOST':FCP_HOST,
+        'FCP_PORT':FCP_PORT,
         'FCP_POLL_SECS':0.25,
         'N_CONCURRENT':4,
-        'CANCEL_TIME_SECS': 7 * 60,
+        'CANCEL_TIME_SECS': 15 * 60,
 
         # FMSBotRunner
-        'FMS_HOST':fms_host,
-        'FMS_PORT':fms_port,
+        'FMS_HOST':FMS_HOST,
+        'FMS_PORT':FMS_PORT,
         'FMS_POLL_SECS': 3 * 60,
         'BOT_STORAGE_DIR':bot_storage_dir,
 
         # WikiBot
-        'FMS_NOTIFY_GROUP':'infocalypse.notify', # extra group to notify.
-        'LATEST_INDEX':index_hint, # Just a hint, it is also stored in shelve db
+        'FMS_NOTIFY_GROUP': ('infocalypse.notify' if POST_TO_INFOCALYPSE_NOTIFY
+                             else ''),  # extra group to notify.
+        'LATEST_INDEX':INDEX_HINT, # Just a hint, it is also stored in shelve db
         'SITE_KEY':insert_ssk,
         'INSERT_URI':insert_uri,
         'REQUEST_URI':request_uri,
-        'VERBOSITY':verbosity,
+        'VERBOSITY':VERBOSITY,
         'TMP_DIR':tmp_dir,
         'NO_SEARCH':False, # REQUIRED
         'USK_HASH':get_usk_hash(request_uri),
@@ -206,5 +259,106 @@ def run_wikibot(params):
 
 
 
+############################################################
+# Use explict dispatch table in order to avoid conditional
+# gook.
+def cmd_setup(dummy):
+    """ Setup the working directories used by the wikibot."""
+    get_dirs(BASE_DIR, True)
+
+def cmd_start(params):
+    """ Start the bot. REQUIRES already setup."""
+    run_wikibot(params)
+
+def cmd_stop(params):
+    """ Stop the bot."""
+    try:
+        pid = int(open(make_bot_path(params['BOT_STORAGE_DIR'],
+                                 'wikibot_' + params['USK_HASH'],
+                                 'pid'), 'rb').read().strip())
+
+        print "Stopping, pid: %i..." % pid
+        os.kill(pid, signal.SIGINT)
+        os.waitpid(pid, 0)
+        print "Stopped."
+    except IOError: # no pid file
+        print "Not running."
+    except OSError, err:
+        if err.errno ==  errno.ECHILD:
+            # Process died before waitpid.
+            print "Stopped."
+        else:
+            print "Failed: ", err
+
+def cmd_status(params):
+    """ Check if the bot is running."""
+
+    print "wikibot_%s:" % params['USK_HASH']
+    print "storage: %s" % params['BOT_STORAGE_DIR']
+
+    # Attribution:
+    # http://stackoverflow.com/questions/38056/how-do-you-check-in-linux-with- \
+    #       python-if-a-process-is-still-running
+    try:
+        pid = int(open(make_bot_path(params['BOT_STORAGE_DIR'],
+                                 'wikibot_' + params['USK_HASH'],
+                                 'pid'), 'rb').read().strip())
+
+        print "pid: %i" % pid
+        os.kill(pid, 0)
+        print "STATUS: Running"
+    except IOError: # no pid file
+        print "STATUS: Stopped"
+    except OSError, err:
+        if err.errno == errno.ESRCH:
+            print "STATUS: Crashed!"
+        elif err.errno == errno.EPERM:
+            print "No permission to signal this process! Maybe run whoami?"
+        else:
+            print "Unknown error checking pid!"
+
+def cmd_catchup(params):
+    """ Rebuild local working files rebuilding IGNORING all
+        submission messages.
+
+        This is used to re-bootstrap the bot when the local database
+        files have been lost or deleted.  e.g. moving the bot to
+        a different machine.
+
+        BUG: Doesn't restore the processed submission CHK list.
+"""
+    params['CATCH_UP'] = True
+    params['FMS_POLL_SECS'] = 1
+    run_wikibot(params)
+
+def cmd_help(dummy):
+    """ Print a help message."""
+
+    print """USAGE:
+run_wikibot.py <cmd>
+
+where <cmd> is %s""" % (', '.join(DISPATCH_TABLE.keys()))
+
+DISPATCH_TABLE = {"setup":cmd_setup,
+                  "start":cmd_start,
+                  "stop":cmd_stop,
+                  "status":cmd_status,
+                  "catchup":cmd_catchup,
+                  "help":cmd_help}
+
+############################################################
+
+def main():
+    """ CLI entry point."""
+    cmd = sys.argv[1] if len(sys.argv) == 2 else 'help'
+    try:
+        parameters = (None if cmd == 'setup' or cmd == 'help'
+                      else get_params(BASE_DIR))
+    except IOError, err:
+        print "FAILED: %s" % str(err)
+        return
+
+    DISPATCH_TABLE[cmd](parameters)
+
 if __name__ == "__main__":
-    run_wikibot(get_params())
+    main()
diff --git a/infocalypse/wikibot.py b/infocalypse/wikibot.py
--- a/infocalypse/wikibot.py
+++ b/infocalypse/wikibot.py
@@ -245,6 +245,11 @@ class WikiBot(FMSBot, RequestQueue):
         """
         self.trace(context_to_str(self.ctx))
         self.ctx.synch_dbs()
+        if self.params.get('CATCH_UP', False):
+            self.debug("Exiting because CATCH_UP was set.")
+            self.warn("REQUESTING BOT SHUTDOWN!")
+            self.exit = True
+            return
 
         if self.ctx.should_notify():
             self._send_update_notification()
@@ -290,6 +295,10 @@ class WikiBot(FMSBot, RequestQueue):
         # Hmmm... accessor? ctx.mark_recvd() or put in ctx.wants() ???
         self.ctx.store_handled_ids[msg_id] = "" # (ab)use as hashset
 
+        if self.params.get('CATCH_UP', False):
+            # Ignore all messages.
+            return
+
         sender_fms_id = items[2]
         submission = parse_submission(sender_fms_id, lines,
                                       self.params['USK_HASH'])