Implemented pseudo-anonymous submission via wikibot.
diff --git a/fniki/default_files/wikitext/EditingTips b/fniki/default_files/wikitext/EditingTips --- a/fniki/default_files/wikitext/EditingTips +++ b/fniki/default_files/wikitext/EditingTips @@ -1,41 +1,41 @@ -Names of pages have to LookLikeThis. -It's called a WikiName. -If you write a word that LookLikeThis, it will be automatically turned into a link. - -Be aware that PikiPiki does not lock pages during editing, so if you pause for a long time on the edit screen you may overwrite somebody else's changes. - -Leave blank lines between paragraphs. - -You can also write ''italics'' and '''bold'''. -To write italics, enclose the text in double single quotes. -To write bold, enclose the text in triple single quotes. -If you can't figure out what I mean, click EditText to look at the source for this page. - -If you indent text - - like this, then it is indented in the output - - you can have multiple levels of indent - -And if you put asterisks at the start of the line - - * you get a - * bulleted - * list - * which can also be indented - * to several levels - -To insert program source without reformatting in a monospace font, use three curly braces: - -{{{ - public static void main(String[] args) { - out.println("Hello"); - } -}}} - -Just like that. - -Now click EditText in the footer to see the markup for this page, and try it yourself in PikiSandBox. - ----- +Names of pages have to LookLikeThis. +It's called a WikiName. +If you write a word that LookLikeThis, it will be automatically turned into a link. + +Be aware that PikiPiki does not lock pages during editing, so if you pause for a long time on the edit screen you may overwrite somebody else's changes. + +Leave blank lines between paragraphs. + +You can also write ''italics'' and '''bold'''. +To write italics, enclose the text in double single quotes. +To write bold, enclose the text in triple single quotes. +If you can't figure out what I mean, click EditText to look at the source for this page. + +If you indent text + + like this, then it is indented in the output + + you can have multiple levels of indent + +And if you put asterisks at the start of the line + + * you get a + * bulleted + * list + * which can also be indented + * to several levels + +To insert program source without reformatting in a monospace font, use three curly braces: + +{{{ + public static void main(String[] args) { + out.println("Hello"); + } +}}} + +Just like that. + +Now click EditText in the footer to see the markup for this page, and try it yourself in PikiSandBox. + +---- Contributors and PikiGurus: JohnFarrell, MartinPool \ No newline at end of file diff --git a/fniki/default_files/wikitext/FindPage b/fniki/default_files/wikitext/FindPage --- a/fniki/default_files/wikitext/FindPage +++ b/fniki/default_files/wikitext/FindPage @@ -1,28 +1,28 @@ -Here are some good starting points for browsing: - - * FrontPage -- top of this system - * RecentChanges recorded automatically - * TitleIndex -- all pages in the database - * WordIndex -- all words in all titles - ----- -The following interface only works if you are running the wiki -locally. No in Freenet search ;-) ----- - -You can use this page to search all entries in PikiPiki. Searches are not case sensitive. - -Here's a title search. Try something like ''manager'': - - [[TitleSearch]] - -Here's a full-text search. - - [[FullSearch]] - -You can also use regular expressions, such as - -{{{ seriali[sz]e}}} - -Or go direct to a page, or create a new page by entering its name here: +Here are some good starting points for browsing: + + * FrontPage -- top of this system + * RecentChanges recorded automatically + * TitleIndex -- all pages in the database + * WordIndex -- all words in all titles + +---- +The following interface only works if you are running the wiki +locally. No in Freenet search ;-) +---- + +You can use this page to search all entries in PikiPiki. Searches are not case sensitive. + +Here's a title search. Try something like ''manager'': + + [[TitleSearch]] + +Here's a full-text search. + + [[FullSearch]] + +You can also use regular expressions, such as + +{{{ seriali[sz]e}}} + +Or go direct to a page, or create a new page by entering its name here: [[GoTo]] \ No newline at end of file diff --git a/fniki/default_files/wikitext/FreenetOnlyFeatures b/fniki/default_files/wikitext/FreenetOnlyFeatures --- a/fniki/default_files/wikitext/FreenetOnlyFeatures +++ b/fniki/default_files/wikitext/FreenetOnlyFeatures @@ -1,27 +1,27 @@ -Links to freenet URIs are automatically hypertexted but they only work when the wiki is inserted into Freenet. - -freenet:USK@qd-hk0vHYg7YvK2BQsJMcUD5QSF0tDkgnnF6lnWUH0g,xTFOV9ddCQQk6vQ6G~jfL6IzRUgmfMcZJ6nuySu~NUc,AQACAAE/activelink-index-text/81/ - -Normal 'http:' links are also hypertexted. They will automatically be converted into the _CHECKED_HTTP_ links when the wiki is inserted into freenet. - -Images with links to Freenet URIs are supported, but you can only see them when the wiki is inserted into Freenet. - -''Remember'' to use 'freenet:' at the start of the URI. - -[[[freenet:USK@qd-hk0vHYg7YvK2BQsJMcUD5QSF0tDkgnnF6lnWUH0g,xTFOV9ddCQQk6vQ6G~jfL6IzRUgmfMcZJ6nuySu~NUc,AQACAAE/activelink-index-text/81/activelink.png|(alt tag for image)|(title tag for image]]] - -The file: - -{{{ -wiki_root/www/activelink.png -}}} - -Is used for the Freenet active link image. - -There's a markup macro that allows you to embed it in any wiki page, like this: - -[[ActiveLink]] - - - +Links to freenet URIs are automatically hypertexted but they only work when the wiki is inserted into Freenet. + +freenet:USK@qd-hk0vHYg7YvK2BQsJMcUD5QSF0tDkgnnF6lnWUH0g,xTFOV9ddCQQk6vQ6G~jfL6IzRUgmfMcZJ6nuySu~NUc,AQACAAE/activelink-index-text/81/ + +Normal 'http:' links are also hypertexted. They will automatically be converted into the _CHECKED_HTTP_ links when the wiki is inserted into freenet. + +Images with links to Freenet URIs are supported, but you can only see them when the wiki is inserted into Freenet. + +''Remember'' to use 'freenet:' at the start of the URI. + +[[[freenet:USK@qd-hk0vHYg7YvK2BQsJMcUD5QSF0tDkgnnF6lnWUH0g,xTFOV9ddCQQk6vQ6G~jfL6IzRUgmfMcZJ6nuySu~NUc,AQACAAE/activelink-index-text/81/activelink.png|(alt tag for image)|(title tag for image]]] + +The file: + +{{{ +wiki_root/www/activelink.png +}}} + +Is used for the Freenet active link image. + +There's a markup macro that allows you to embed it in any wiki page, like this: + +[[ActiveLink]] + + + Use the edit text link below to view the markup for these features. \ No newline at end of file diff --git a/fniki/default_files/wikitext/FrontPage b/fniki/default_files/wikitext/FrontPage --- a/fniki/default_files/wikitext/FrontPage +++ b/fniki/default_files/wikitext/FrontPage @@ -1,24 +1,24 @@ -'''Minimal wiki skeleton directory to start your wiki from''' - -fniki is based on PikiPiki. - -TitleIndex has a list of all pages. - -These pages have examples of wiki markup: - * EditingTips, HorizontalRules - * FreenetOnlyFeatures - - -Removing pages isn't supported though the wiki interface (yet!). -You can manually remove pages by removing files from the: -{{{ -<repo root>/wiki_root/wikitext -}}} -directory. - -These pages have embedded macros. You might want to take a look at -the document source before you delete them. - - * FindPage,RecentChanges,TitleIndex, WordIndex - - +'''Minimal wiki skeleton directory to start your wiki from''' + +fniki is based on PikiPiki. + +TitleIndex has a list of all pages. + +These pages have examples of wiki markup: + * EditingTips, HorizontalRules + * FreenetOnlyFeatures + + +Removing pages isn't supported though the wiki interface (yet!). +You can manually remove pages by removing files from the: +{{{ +<repo root>/wiki_root/wikitext +}}} +directory. + +These pages have embedded macros. You might want to take a look at +the document source before you delete them. + + * FindPage,RecentChanges,TitleIndex, WordIndex + + diff --git a/fniki/default_files/wikitext/HorizontalRules b/fniki/default_files/wikitext/HorizontalRules --- a/fniki/default_files/wikitext/HorizontalRules +++ b/fniki/default_files/wikitext/HorizontalRules @@ -1,7 +1,7 @@ -You can rule a line across the page by typing four or more dashes. In PikiPiki the number of dashes in a horizontal rule markup determine how thick it is: use them in moderation. (A suggestion from JohnFarrell.) - ---- ----- ------ ------- +You can rule a line across the page by typing four or more dashes. In PikiPiki the number of dashes in a horizontal rule markup determine how thick it is: use them in moderation. (A suggestion from JohnFarrell.) + +--- +---- +----- +------ ------- \ No newline at end of file diff --git a/fniki/default_files/wikitext/LocalChanges b/fniki/default_files/wikitext/LocalChanges new file mode 100644 --- /dev/null +++ b/fniki/default_files/wikitext/LocalChanges @@ -0,0 +1,2 @@ +The following pages have overlayed local changes: [[LocalChanges]] + diff --git a/fniki/default_files/wikitext/PikiPiki b/fniki/default_files/wikitext/PikiPiki --- a/fniki/default_files/wikitext/PikiPiki +++ b/fniki/default_files/wikitext/PikiPiki @@ -1,7 +1,7 @@ -I'm using piki as the wiki engine for this project, mostly since it is so small and easy to understand / modify even if it is a little old. Thanks to MartinPool for making this code available. I started hacking from: - -sha1:e71779f4f8fea7dc851ffb1e74f18c613a94b086 piki-1.62.zip - -which I got here: - -http://sourcefrog.net/projects/piki/ +I'm using piki as the wiki engine for this project, mostly since it is so small and easy to understand / modify even if it is a little old. Thanks to MartinPool for making this code available. I started hacking from: + +sha1:e71779f4f8fea7dc851ffb1e74f18c613a94b086 piki-1.62.zip + +which I got here: + +http://sourcefrog.net/projects/piki/ diff --git a/fniki/default_files/wikitext/RemoteChanges b/fniki/default_files/wikitext/RemoteChanges new file mode 100644 --- /dev/null +++ b/fniki/default_files/wikitext/RemoteChanges @@ -0,0 +1,4 @@ +Edits recently committed by the server in Freenet. + +[[RemoteChanges]] + diff --git a/fniki/default_files/wikitext/TitleIndex b/fniki/default_files/wikitext/TitleIndex --- a/fniki/default_files/wikitext/TitleIndex +++ b/fniki/default_files/wikitext/TitleIndex @@ -1,10 +1,10 @@ -This is an index of all pages in PikiPiki. - -See also: - - * WordIndex -- a permuted index of all words occuring in titles - * RecentChanges - ----- - +This is an index of all pages in PikiPiki. + +See also: + + * WordIndex -- a permuted index of all words occuring in titles + * RecentChanges + +---- + [[TitleIndex]] \ No newline at end of file diff --git a/fniki/default_files/wikitext/WordIndex b/fniki/default_files/wikitext/WordIndex --- a/fniki/default_files/wikitext/WordIndex +++ b/fniki/default_files/wikitext/WordIndex @@ -1,10 +1,10 @@ -This is an index of all words occuring in page titles. - -See also: - - * TitleIndex -- a shorter index - * RecentChanges - ----- - +This is an index of all words occuring in page titles. + +See also: + + * TitleIndex -- a shorter index + * RecentChanges + +---- + [[WordIndex]] \ No newline at end of file diff --git a/fniki/default_files/www/piki.css b/fniki/default_files/www/piki.css --- a/fniki/default_files/www/piki.css +++ b/fniki/default_files/www/piki.css @@ -1,3 +1,6 @@ BODY { background-color: #FFFFFF; color: #000000 } A { color: #1f6b9e } -A.nonexistent { background-color: #CCCCCC } \ No newline at end of file +A.nonexistent { background-color: #CCCCCC } +A.forkedtitle {color: #585858} +A.removedfork {color: #00FF00} +A.existingfork {color: #FF0000} diff --git a/fniki/fileoverlay.py b/fniki/fileoverlay.py new file mode 100644 --- /dev/null +++ b/fniki/fileoverlay.py @@ -0,0 +1,265 @@ +""" Classes to support overlayed file writing so that that piki can edit + without modifying the original copy. + + Copyright (C) 2009 Darrell Karbott + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2.0 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks +""" +import codecs +import os +import stat + +# NOTE: There are hard coded references to utf8 in piki.py, submission.py +# and hgoverlay.py. Look there before changing this value. +WIKITEXT_ENCODING = "utf8" + +class IFileFunctions: + """ An ABC for file system operations. """ + def __init__(self, base_path): + self.base_path = base_path + def overlay_path(self, path): + """ Return the path that writes should be written to. """ + def write(self, path, bytes, mode='wb'): + """ Write a file. """ + raise NotImplementedError() + def read(self, path, mode='rb', dummy_non_overlayed=False): + """ Read a file. """ + raise NotImplementedError() + def exists(self, path, dummy_non_overlayed=False): + """ Return True if the file exists, False otherwise. """ + raise NotImplementedError() + def modtime(self, path, dummy_non_overlayed=False): + """ Return the modtime for the file.""" + raise NotImplementedError() + def list_pages(self, path, dummy_non_overlayed=False): + """ Return a list of all pages. """ + raise NotImplementedError() + def has_overlay(self, path): + """ Return True if there's an overlay for the file, False otherwise. """ + raise NotImplementedError() + def remove_overlay(self, path): + """ Remove the overlayed version of the file. """ + raise NotImplementedError + def is_overlayed(self): + """ Return True if the instance supports overlaying, False + otherwise. """ + raise NotImplementedError + +class DirectFiles(IFileFunctions): + """ An IFileFunctions implementation which writes directly to + the file system. """ + def __init__(self, base_path): + IFileFunctions.__init__(self, base_path) + + def overlay_path(self, path): + """ IFileFunctions implementation. """ + return path + + def write(self, path, bytes, mode='wb'): + """ IFileFunctions implementation. """ + # There were hacks in the original piki code + # to handle nt refusing to rename to an existing + # file name. Not sure if it is a problem on + # modern windows. + tmp_name = path + '.__%s__' % str(os.getpid()) + try: + out_file = codecs.open(tmp_name, mode, WIKITEXT_ENCODING) + try: + if len(bytes) > 0: # Truncating is allowed. + out_file.write(bytes) + finally: + out_file.close() + + if os.path.exists(path): + os.remove(path) + + os.rename(tmp_name, path) + finally: + if os.path.exists(tmp_name): + os.remove(tmp_name) + + def read(self, path, mode='rb', dummy_non_overlayed=False): + """ IFileFunctions implementation. """ + + in_file = codecs.open(path, mode, WIKITEXT_ENCODING) + try: + return in_file.read() + finally: + in_file.close() + + def exists(self, path, dummy_non_overlayed=False): + """ IFileFunctions implementation. """ + return os.path.exists(path) + + def modtime(self, path, dummy_non_overlayed=False): + """ IFileFunctions implementation. """ + return os.stat(path)[stat.ST_MTIME] + + def list_pages(self, path, dummy_non_overlayed=False): + """ IFileFunctions implementation. """ + return [name for name in os.listdir(path) + if (os.path.isfile(os.path.join(path, name)) and + not os.path.islink(os.path.join(path, name)))] + + def has_overlay(self, dummy_path): + """ IFileFunctions implementation. """ + return False + + def remove_overlay(self, dummy_path): + """ IFileFunctions implementation. """ + assert False + + def is_overlayed(self): + """ IFileFunctions implementation. """ + return False + +OVERLAY_DIR = 'OVERLAY' + +class OverlayedFiles(DirectFiles): + """ An IFileFunctions implementation which overlays writes into a separate + parallel OVERLAY directory. + + e.g. if: + base_dir == /foo/bar/baz + then, + path == /foo/bar/baz/snafu.txt + maps to, + overlay == /foo/bar/OVERLAY/snafu.txt + """ + def __init__(self, base_path): + DirectFiles.__init__(self, base_path) + + def overlay_path(self, path): + """ Return the path that overlayed writes should be written to. """ + path = os.path.abspath(path) + assert path.startswith(self.base_path) + rest = path[len(self.base_path):] + if rest.startswith(os.sep): + rest = rest[len(os.sep):] + + overlay_base = os.path.split(self.base_path)[0] # Hmmm... errors? + + overlayed = os.path.join(os.path.join(overlay_base, OVERLAY_DIR), + rest) + return overlayed + + # You cannot write to the non-overlayed files. + def write(self, path, bytes, mode='wb'): + """ IFileFunctions implementation. """ + DirectFiles.write(self, self.overlay_path(path), bytes, mode) + + def read(self, path, mode='rb', non_overlayed=False): + """ IFileFunctions implementation. """ + if non_overlayed: + return DirectFiles.read(self, path, mode) + + overlayed = self.overlay_path(path) + if os.path.exists(overlayed): + return DirectFiles.read(self, overlayed, mode) + + return DirectFiles.read(self, path, mode) + + # Zero length file means delete. + def exists(self, path, non_overlayed=False): + """ IFileFunctions implementation. """ + if non_overlayed: + return DirectFiles.exists(self, path) + + overlay = self.overlay_path(path) + if os.path.exists(overlay): + if os.path.getsize(overlay) == 0: + return False + else: + return True + + return os.path.exists(path) + + def modtime(self, path, non_overlayed=False): + """ IFileFunctions implementation. """ + if non_overlayed: + return DirectFiles.modtime(self, path) + + overlay = self.overlay_path(path) + if os.path.exists(overlay) and os.path.getsize(overlay) > 0: + return DirectFiles.modtime(self, overlay) + + return DirectFiles.modtime(self, path) + + def list_pages(self, path, non_overlayed=False): + """ IFileFunctions implementation. """ + if non_overlayed: + return DirectFiles.list_pages(self, path) + + overlay = self.overlay_path(path) + overlay_pages = set([]) + if os.path.exists(overlay): + overlay_pages = set(DirectFiles.list_pages(self, overlay)) + + deleted = set([]) + for name in overlay_pages: + if os.path.getsize(os.path.join(overlay, name)) == 0: + deleted.add(name) + + return list(overlay_pages.union( + set(DirectFiles.list_pages(self, path)) - deleted)) + + # Hmmmm... Returns True for zero length file. i.e. "mark to delete" + def has_overlay(self, path): + """ IFileFunctions implementation. """ + return os.path.exists(self.overlay_path(path)) + + def remove_overlay(self, path): + """ IFileFunctions implementation. """ + overlay = self.overlay_path(path) + if os.path.exists(overlay): + os.remove(overlay) + + def is_overlayed(self): + """ IFileFunctions implementation. """ + return True + +def get_file_funcs(base_path, is_overlayed=False): + """ Returns an overlayed IFileFunctions implementation if + is_overlayed is True, and a direct implementation otherwise. """ + if not is_overlayed: + return DirectFiles(base_path) + + return OverlayedFiles(base_path) + +def remove_redundant_files(overlay, wikitext_dir, out_func=lambda msg:None): + """ Removes files which are identical in the overlayed and non-overlayed + directories. + + Also removes empty deletion marker files for files which have + been deleted from the non-overlayed directory. """ + + assert overlay.is_overlayed() + for name in overlay.list_pages(wikitext_dir): + full_path = os.path.join(wikitext_dir, name) + if overlay.has_overlay(full_path): + if not overlay.exists(full_path, True): + if len(overlay.read(full_path, 'rb', False)) == 0: + overlay.remove_overlay(full_path) + out_func("Removed redundant overlayed file: %s" % name) + continue + + if (overlay.read(full_path, 'rb', False) == + overlay.read(full_path, 'rb', True)): + overlay.remove_overlay(full_path) + out_func("Removed redundant overlayed file: %s" % name) + continue + diff --git a/fniki/piki.py b/fniki/piki.py --- a/fniki/piki.py +++ b/fniki/piki.py @@ -21,15 +21,22 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA +#pylint: disable-msg=C0111, W0331, W0106, C0103 __version__ = '$Revision: 1.62 $'[11:-2]; -import cgi, sys, string, os, re, errno, time, stat -from cgi import log +import cgi, codecs, sys, string, os, re, errno, time from os import path, environ from socket import gethostbyaddr from time import localtime, strftime -from cStringIO import StringIO +# NOTE: cStringIO doesn't work for unicode. +from StringIO import StringIO +import fileoverlay +filefuncs = None + +# File to redirect sys.stderr to. +#STDERR_FILE = '/tmp/piki_err' # REDFLAG: Comment out this line +STDERR_FILE = None # Silently drop all output to stderr PIKI_PNG = 'pikipiki-logo.png' PIKI_CSS = 'piki.css' @@ -39,15 +46,29 @@ ACTIVELINK = 'activelink.png' # HTTP server doesn't need to serve any other files to make piki work. PIKI_REQUIRED_FILES = (PIKI_PNG, PIKI_CSS, ACTIVELINK) +# class UnicodeStringIO(StringIO): +# """ Debugging hack fails early when non low-ASCII 8-bit strings are +# printed. """ +# def __init__(self, arg=u''): +# StringIO.__init__(self, arg) +# def write(self, bytes): +# if not isinstance(bytes, unicode): +# # Non-unicode strings should be 7-bit ASCII. +# # This will raise if the are not. +# bytes = bytes.decode('ascii') +# return StringIO.write(self,bytes) + scrub_links = False -def scrub(link_text, ss_class=None): +LINKS_DISABLED_PAGE = "LinksDisabledWhileEditing" + +def scrub(link_text, ss_class=None, force=False): """ Cleanup href values so the work with Freenet. """ - if not scrub_links: + if (not scrub_links) and (not force): return link_text # Handle pages which haven't been written yet gracefully. - if ss_class == 'nonexistent' : - return "NotWrittenYet"; + if ss_class == 'nonexistent': + return "NotWrittenYet" if link_text.startswith('/'): link_text = link_text[1:] @@ -60,53 +81,18 @@ def scrub(link_text, ss_class=None): return link_text def emit_header(): - print "Content-type: text/html" + print "Content-type: text/html; charset=utf-8" print # Regular expression defining a WikiWord (but this definition # is also assumed in other places. word_re_str = r"\b([A-Z][a-z]+){2,}\b" word_anchored_re = re.compile('^' + word_re_str + '$') +versioned_page_re_str = (r"\b(?P<wikiword>([A-Z][a-z]+){2,})" + + r"(_(?P<version>([a-f0-9]{40,40})))?\b") +versioned_page_re = re.compile('^' + versioned_page_re_str + '$') command_re_str = "(search|edit|fullsearch|titlesearch)\=(.*)" -# Editlog ----------------------------------------------------------- - -# Functions to keep track of when people have changed pages, so we can -# do the recent changes page and so on. -# The editlog is stored with one record per line, as tab-separated -# words: page_name, host, time - -# TODO: Check values written in are reasonable - -def editlog_add(page_name, host): - if editlog_name is None: - return - editlog = open(editlog_name, 'a+') - try: - # fcntl.flock(editlog.fileno(), fcntl.LOCK_EX) - editlog.seek(0, 2) # to end - editlog.write(string.join((page_name, host, `time.time()`), "\t") + "\n") - finally: - # fcntl.flock(editlog.fileno(), fcntl.LOCK_UN) - editlog.close() - - -def editlog_raw_lines(): - if editlog_name is None: - return [] - - editlog = open(editlog_name, 'rt') - try: - # fcntl.flock(editlog.fileno(), fcntl.LOCK_SH) - return editlog.readlines() - finally: - # fcntl.flock(editlog.fileno(), fcntl.LOCK_UN) - editlog.close() - - - - - # Formatting stuff -------------------------------------------------- @@ -114,16 +100,20 @@ def get_scriptname(): return environ.get('SCRIPT_NAME', '') -def send_title(text, link=None, msg=None): +def send_title(text, link=None, msg=None, is_forked=False): print "<head><title>%s</title>" % text if css_url: - print '<link rel="stylesheet" type="text/css" href="%s">' % scrub(css_url) + print '<link rel="stylesheet" type="text/css" href="%s">' % \ + scrub(css_url) print "</head>" print '<body><h1>' if get_logo_string(): - print link_tag('RecentChanges', get_logo_string()) + print link_tag('RemoteChanges', get_logo_string()) if link: - print '<a href="%s">%s</a>' % (scrub(link), text) + classattr = '' + if is_forked: + classattr = ' class="forkedtitle" ' + print '<a%s href="%s">%s</a>' % (classattr, scrub(link), text) else: print text print '</h1>' @@ -141,7 +131,8 @@ def link_tag(params, text=None, ss_class classattr = '' return '<a %s href="%s">%s</a>' % (classattr, - scrub("%s/%s" % (get_scriptname(),params), ss_class), + scrub("%s/%s" % (get_scriptname(), + params), ss_class), text) # Search --------------------------------------------------- @@ -199,16 +190,70 @@ def print_search_stats(hits, searched): def do_edit(pagename): Page(pagename).send_editor() +def do_viewsource(pagename): + Page(pagename).send_editor(True) + +def do_viewunmodifiedsource(pagename): + assert not is_versioned(pagename) + if not filefuncs.exists(Page(pagename)._text_filename(), + True): + # Doesn't exist! + send_title("Page Doesn't Exist!", None, + "The original version doesn't have a %s page." % + pagename) + else: + Page(pagename).send_editor(True, True) def do_savepage(pagename): global form pg = Page(pagename) - pg.save_text(form['savetext'].value) - msg = """<b>Thankyou for your changes. Your attention to - detail is appreciated.</b>""" - + text = '' + if 'savetext' in form: + text = form['savetext'].value + if text.strip() == '': + text = '' + msg = """<b>Locally deleting blank page.</b>""" + else: + msg = """<b>Saved local changes. They won't be applied to the + wiki in Freenet until you explictly <em>submit</em> them. </b>""" + + # Decode the utf8 text from the browser into unicode. + text = text.decode('utf8') + + # Normalize to UNIX line terminators. + pg.save_text(text.replace('\r\n', '\n')) + pg.send_page(msg=msg) +def do_removepage(page_name): + if not is_versioned(page_name): + msg = """<b>Locally removed page.</b>""" + else: + msg = """<b>Locally marked fork as resolved.</b>""" + pg = Page(page_name) + pg.save_text('') + pg.send_page(msg=msg) + +def do_unmodified(pagename): + if not filefuncs.exists(Page(pagename)._text_filename(), + True): + # Doesn't exist! + send_title("Page Doesn't Exist!", None, + "The original version doesn't have a %s page." % + pagename) + else: + # Send original. + Page(pagename).send_page('Original Version', True) + +def do_deletelocal(pagename): + filefuncs.remove_overlay(Page(pagename)._text_filename()) + + send_title("Removed Local Edits", None, + "Removed local edits to %s page." % + pagename) + print "Local changes to %s have been deleted. <p>" % Page( + pagename).link_to() + print "Here's a link to the %s." % Page('FrontPage').link_to() def make_index_key(): s = '<p><center>' @@ -219,23 +264,12 @@ def make_index_key(): return s -def page_list(): - return filter(word_anchored_re.match, os.listdir(text_dir)) - - -def print_footer(name, editable=1, mod_string=None): - base = get_scriptname() - print '<hr>' - if editable: - print link_tag('?edit='+name, 'EditText') - print "of this page" - if mod_string: - print "(last modified %s)" % mod_string - print '<br>' - print link_tag('FindPage?value='+name, 'FindPage') - print " by browsing, searching, or an index" - print "<p>" - print "<em>Editing dir: %s </em>" % data_dir +def page_list(include_versioned=False): + if include_versioned: + return filter(versioned_page_re.match, + filefuncs.list_pages(text_dir)) + return filter(word_anchored_re.match, + filefuncs.list_pages(text_dir)) # ---------------------------------------------------------- # Macros @@ -247,16 +281,16 @@ def _macro_FullSearch(): def _macro_search(type): if form.has_key('value'): - default = form["value"].value + default = form["value"].value.encode('utf8') else: default = '' - return """<form method=get> + return """<form method=get accept-charset="UTF-8"> <input name=%s size=30 value="%s"> <input type=submit value="Go"> </form>""" % (type, default) def _macro_GoTo(): - return """<form method=get><input name=goto size=30> + return """<form method=get accept-charset="UTF-8"><input name=goto size=30> <input type=submit value="Go"> </form>""" # isindex is deprecated, but it gives the right result here @@ -281,9 +315,12 @@ def _macro_WordIndex(): if letter <> last_letter: s = s + '<a name="%s"><h3>%s</h3></a>' % (letter, letter) last_letter = letter - + s = s + '<b>%s</b><ul>' % word - links = map[word] + #links = map[word] # <-- Has duplicate links! + # + # Quick and dirty fix for muliple link BUG. Revisit. + links = list(set(map[word])) links.sort() last_page = None for name in links: @@ -309,43 +346,166 @@ def _macro_TitleIndex(): return s -def _macro_RecentChanges(): - lines = editlog_raw_lines() - lines.reverse() - - ratchet_day = None - done_words = {} - buf = StringIO() - for line in lines: - page_name, addr, ed_time = string.split(line, '\t') - # year, month, day, DoW - time_tuple = localtime(float(ed_time)) - day = tuple(time_tuple[0:3]) - if day <> ratchet_day: - buf.write('<h3>%s</h3>' % strftime(date_fmt, time_tuple)) - ratchet_day = day - - if done_words.has_key(page_name): - continue - - done_words[page_name] = 1 - buf.write(Page(page_name).link_to()) - if show_hosts: - buf.write(' . . . . ') - try: - buf.write(gethostbyaddr(addr)[0]) - except: - buf.write("(unknown)") - if changed_time_fmt: - buf.write(time.strftime(changed_time_fmt, time_tuple)) - buf.write('<br>') - - return buf.getvalue() - def _macro_ActiveLink(): return '<img src="%s" />' % scrub('/' + ACTIVELINK) + +def get_unmerged_versions(overlay, wikitext_dir, page_names): + # name -> version list + ret = {} + for name in page_names: + ret[name] = set([]) # hmmm paranoia? list ok? + + # REDFLAG: O(N) in total number of pages. hmmmm... + for name in overlay.list_pages(wikitext_dir): + fields = name.split('_') + if len(fields) < 2: + continue + if not fields[0].strip() in page_names: + continue + if len(fields[1].strip()) != 40: + continue + # hmmmm... validate? + ret[fields[0].strip()].add(fields[1].strip()) + + for name in ret.keys()[:]: # hmmm copy required? + ret[name] = list(ret[name]) + ret[name].sort() + + return ret + +def fork_link(overlay, text_dir_, name, version): + full_name = '%s_%s' % (name, version) + css_class = "removedfork" + if overlay.exists(os.path.join(text_dir_, + full_name)): + css_class = "existingfork" + + if scrub_links and css_class == "removedfork": + # Prevent broken linx when dumping freesite. + full_name = 'AlreadyResolved' + + return link_tag(full_name, '(' + version[:12] + ')', + css_class) + +def get_fork_html(overlay, text_dir_, name, table): + + return ''.join([fork_link(overlay, text_dir_, name, ver) + for ver in table[name]]) + +def _macro_LocalChanges(): + if not filefuncs.is_overlayed(): + return "<br>Not using overlayed editing!<br>" + + local = set([]) + for name in filefuncs.list_pages(text_dir): + if filefuncs.has_overlay(os.path.join(text_dir, + name)): + local.add(name.split('_')[0]) + local = list(local) + local.sort() + if len(local) == 0: + return "<br>No locally edited pages.<br>" + + fork_table = get_unmerged_versions(filefuncs, text_dir, local) + return '<br>' + '<br>'.join(["%s %s" % + (Page(name).link_to(), + get_fork_html(filefuncs, text_dir, + name, fork_table)) + for name in local]) + '<br>' + +def get_page_ref(page_name): + match = versioned_page_re.match(page_name) + if not match: + return "ILLEGAL_NAME" + name = match.group('wikiword') + ver = match.group('version') + if not ver: + return Page(page_name).link_to() + + return "<em>%s(%s)</em>" % (name, ver[:12]) + +def _macro_RemoteChanges(): + words = ('created', 'modified', 'removed', 'forked') + reject_reasons = { + 0:"Unknown", + 1:"Server couldn't read submission CHK", + 2:"Insufficient trust", + 3:"Already applied", # Fully applied + 4:"Already applied", # Partially applied. (not used anymore) + 5:"Conflict", + 6:"Illegal or Malformed submission" + } + + def file_changes(changes): + tmps = [] + + for index, change in enumerate(changes): + if len(change) == 0: + continue + if index == len(words) - 1: + # Special case forked files. + wiki_names = change.keys() + wiki_names.sort() + + tmps.append("%s:%s" % (words[index], + ','.join([(Page(name).link_to() + " " + + get_fork_html(filefuncs, + text_dir, + name, + change)) + for name in wiki_names]))) + continue + + tmps.append("%s:%s" % (words[index], + ','.join([get_page_ref(name) + for name in change]))) + return ','.join(tmps) + + def accept_summary(entry, time_tuple): + return ('<strong>%s accepted from %s</strong><br>%s</br>' % + (time.strftime(changed_time_fmt, time_tuple), + entry[2], + file_changes(entry[4]))) + + def reject_summary(entry, time_tuple): + return ('<strong>%s rejected from %s</strong><br>%s</br>' % + (time.strftime(changed_time_fmt, time_tuple), + entry[2], + reject_reasons.get(int(entry[4]), + "unknown code:%i" % int(entry[4])))) + accepted, rejected = read_log_file_entries(data_dir, 20) + by_time = [(entry[1], True, entry) for entry in accepted] + for entry in rejected: + by_time.append((entry[1], False, entry)) + by_time.sort(reverse=True) # Since 2.4. Ok. + + buf = StringIO() + ratchet_day = None + for sort_tuple in by_time: + entry = sort_tuple[2] + # year, month, day, DoW + time_tuple = time.gmtime(float(entry[1])) + day = tuple(time_tuple[0:3]) + if day <> ratchet_day: + #buf.write('<h3>%s</h3>' % strftime(date_fmt, time_tuple)) + buf.write('<h3>%s</h3>' % strftime(date_fmt, time_tuple)) + ratchet_day = day + if sort_tuple[1]: + buf.write(accept_summary(entry, time_tuple)) + else: + buf.write(reject_summary(entry, time_tuple)) + return buf.getvalue() # ---------------------------------------------------------- + +# REDFLAG: faster way to do this? does it matter? +def has_illegal_chars(value): + """ Catch illegal characters in image macros. """ + for char in ('<', '>', '&', '\\', "'", '"'): + if value.find(char) != -1: + return True + return False + class PageFormatter: """Object that turns Wiki markup into HTML. @@ -380,6 +540,9 @@ class PageFormatter: def _url_repl(self, word): + if not scrub_links: + return '<a href="%s">%s</a>' % (LINKS_DISABLED_PAGE, word) + return '<a href="%s">%s</a>' % (scrub(word), word) def _img_repl(self, word): @@ -387,25 +550,32 @@ class PageFormatter: # [[[freenet:keyvalue|alt text|title text]]] word = word[3:-3] # grrrrr... _macro_repl is doing this too. fields = word.strip().split('|') - uri = fields[0] - alt_attrib = "" + if has_illegal_chars(word): + return (" <br>[ILLEGAL IMAGE MACRO IN WIKITEXT: " + + " illegal characters! ]<br> ") + + uri = scrub(fields[0], None, True) + + # ONLY static images are allowed! + if not (uri.startswith("/CHK@") or uri.startswith("/SSK@")): + return (" <br>[ILLEGAL IMAGE MACRO IN WIKITEXT: " + + " only CHK@ and SSK@ images allowed! ]<br> ") + + if not scrub_links: + uri = "" # Images disabled when editing. + alt_attrib = ' alt="[WIKITEXT IMAGE MACRO MISSING ALT TAG!]" ' title_attrib = "" - if len(fields) > 1: + if len(fields) > 1 and len(fields[1].strip()) > 0: alt_attrib = ' alt="%s" ' % fields[1] - if len(fields) > 2: + if len(fields) > 2 and len(fields[2].strip()) > 0: title_attrib = ' title="%s" ' % fields[2] - return ' <img src="%s"%s%s/> ' % (scrub(uri), alt_attrib, title_attrib) - - def _email_repl(self, word): - return '<a href="mailto:%s">%s</a>' % (scrub(word), word) - + return ' <img src="%s"%s%s/> ' % (uri, alt_attrib, title_attrib) def _ent_repl(self, s): return {'&': '&', '<': '<', '>': '>'}[s] - def _li_repl(self, match): return '<li>' @@ -452,7 +622,6 @@ class PageFormatter: return apply(getattr(self, '_' + type + '_repl'), (hit,)) else: raise "Can't handle match " + `match` - def print_html(self): # For each line, we scan through looking for magic @@ -463,13 +632,13 @@ class PageFormatter: + r"|(?P<word>\b(?:[A-Z][a-z]+){2,}\b)" + r"|(?P<rule>-{4,})" + r"|(?P<img>\[\[\[(freenet\:[^\]]+)\]\]\])" - + r"|(?P<url>(freenet|http|ftp|nntp|news|mailto)\:[^\s'\"]+\S)" - #+ r"|(?P<email>[-\w._+]+\@[\w.-]+)" + + r"|(?P<url>(freenet|http)\:[^\s'\"]+\S)" + r"|(?P<li>^\s+\*)" + r"|(?P<pre>(\{\{\{|\}\}\}))" + r"|(?P<macro>\[\[(TitleSearch|FullSearch|WordIndex" - + r"|TitleIndex|RecentChanges|ActiveLink|GoTo)\]\])" + + r"|TitleIndex|ActiveLink" + + r"|LocalChanges|RemoteChanges|GoTo)\]\])" + r")") blank_re = re.compile("^\s*$") bullet_re = re.compile("^\s+\*") @@ -487,37 +656,38 @@ class PageFormatter: print re.sub(scan_re, self.replace, line) if self.in_pre: print '</pre>' print self._undent() - # ---------------------------------------------------------- class Page: def __init__(self, page_name): self.page_name = page_name + def wiki_name(self): + return self.page_name.split('_')[0] + + def version(self): + fields = self.page_name.split('_') + if len(fields) < 2: + return '' + return fields[1] + def split_title(self): # look for the end of words and the start of a new word, # and insert a space there - return re.sub('([a-z])([A-Z])', r'\1 \2', self.page_name) + fields = self.page_name.split('_') + version = "" + if len(fields) > 1: + version = "(" + fields[1][:12] + ")" + + return re.sub('([a-z])([A-Z])', r'\1 \2', fields[0]) + version def _text_filename(self): return path.join(text_dir, self.page_name) - def _tmp_filename(self): - return path.join(text_dir, ('#' + self.page_name + '.' + `os.getpid()` + '#')) - - def exists(self): - try: - os.stat(self._text_filename()) - return 1 - except OSError, er: - if er.errno == errno.ENOENT: - return 0 - else: - raise er - + return filefuncs.exists(self._text_filename()) def link_to(self): word = self.page_name @@ -530,79 +700,181 @@ class Page: return link_tag(word, word, 'nonexistent') - def get_raw_body(self): + def get_raw_body(self, unmodified=False): try: - return open(self._text_filename(), 'rt').read() + return filefuncs.read(self._text_filename(), 'rb', unmodified) except IOError, er: if er.errno == errno.ENOENT: # just doesn't exist, use default return 'Describe %s here.' % self.page_name else: raise er - - def send_page(self, msg=None): - link = get_scriptname() + '?fullsearch=' + self.page_name - send_title(self.split_title(), link, msg) - PageFormatter(self.get_raw_body()).print_html() - print_footer(self.page_name, 1, self._last_modified()) + def handled_versioned_page(self, msg=None, unmodified=False): + if not is_versioned(self.page_name): + return + msg = None # Hmmmm... + full_path = os.path.join(text_dir, self.page_name) + removed = not filefuncs.exists(full_path, True) + resolved = filefuncs.has_overlay(full_path) + link = get_scriptname() + '?fullsearch=' + self.wiki_name() + send_title(self.split_title(), link, msg, bool(self.version())) + if unmodified: + PageFormatter(self.get_raw_body(unmodified)).print_html() + else: + if removed: + print "<b>Already resolved.</b>" + elif resolved: + print "<b>Locally marked resolved.</b>" + else: + PageFormatter(self.get_raw_body(unmodified)).print_html() + + self.send_footer(True, + self._last_modified(), + self._text_filename(), unmodified) + + return True + + def send_footer(self, versioned, mod_string=None, page_path=None, + unmodified=False): + + #base = get_scriptname() # Hmmm... forget what this was for. + print '<hr>' + if is_read_only(data_dir, self.page_name): + print "<em>The bot owner has marked this page read only.</em>" + return + + if unmodified: + print ("<em>Read only original version " + + "of a locally modified page.</em>") + print (('<br><a href="?viewunmodifiedsource=%s">' % + self.page_name) + '[View page source]</a><br>') + return + + if versioned: + if page_path is None: + # Hmmmm... + return + + if filefuncs.has_overlay(page_path): + print (('<br><a href="?unmodified=%s">' % self.page_name) + + '[Show original version]</a><br>') + print (('<a href="?deletelocal=%s">' % self.page_name) + + '[Mark unresolved, without confirmation!]</a><br>') + + else: + if filefuncs.exists(page_path, True): + print "<em>This is an unmerged fork of another page!</em>" + print (('<br><a href="?viewsource=%s">' % + self.page_name) + + '[View page source]</a><br>') + print (('<br><a href="?removepage=%s">' % + self.page_name) + + '[Locally mark resolved, ' + + 'without confirmation!]</a><br>') + + print "<p><em>Wiki dir: %s </em>" % data_dir + return + + if not page_path is None and filefuncs.has_overlay(page_path): + print "<strong>This page has local edits!</strong><br>" + + if not page_path is None: + name = os.path.split(page_path)[1] + fork_table = get_unmerged_versions(filefuncs, text_dir, + (name,)) + if len(fork_table[name]) > 0: + print ("<strong>This page has forks: %s!</strong><br>" % + get_fork_html(filefuncs, text_dir, name, fork_table)) + + print link_tag('?edit='+name, 'EditText') + print "of this page" + if mod_string: + print "(last modified %s)" % mod_string + print '<br>' + print link_tag('FindPage?value='+name, 'FindPage') + print " by browsing, searching, or an index" + + if page_path is None: + print "<p><em>Wiki dir: %s </em>" % data_dir + return + + if filefuncs.has_overlay(page_path): + print (('<br><a href="?unmodified=%s">' % name) + + '[Show original version]</a><br>') + print (('<a href="?removepage=%s">' % name) + + '[Locally delete this page without confirmation!]</a><br>') + print (('<a href="?deletelocal=%s">' % name) + + '[Undo local edits without confirmation!]</a><br>') + + print "<p><em>Wiki dir: %s </em>" % data_dir + + + def send_page(self, msg=None, unmodified=False): + if self.handled_versioned_page(msg, unmodified): + return + + link = get_scriptname() + '?fullsearch=' + self.wiki_name() + send_title(self.split_title(), link, msg, bool(self.version())) + PageFormatter(self.get_raw_body(unmodified)).print_html() + self.send_footer(False, self._last_modified(), + self._text_filename(), unmodified) def _last_modified(self): if not self.exists(): return None - modtime = localtime(os.stat(self._text_filename())[stat.ST_MTIME]) + modtime = localtime(filefuncs.modtime(self._text_filename())) return strftime(datetime_fmt, modtime) + # hmmm... change function name? + def send_editor(self, read_only=False, unmodified=False): + title = 'Edit ' + read_only_value='' + if read_only: + title = "View Page Source: " + read_only_value = 'readonly' - def send_editor(self): - send_title('Edit ' + self.split_title()) - print '<form method="post" action="%s">' % (get_scriptname()) - print '<input type=hidden name="savepage" value="%s">' % (self.page_name) - raw_body = string.replace(self.get_raw_body(), '\r\n', '\n') + send_title(title + self.split_title()) + # IMPORTANT: Ask browser to send us utf8 + print '<form method="post" action="%s" accept-charset="UTF-8">' % (get_scriptname()) + print '<input type=hidden name="savepage" value="%s">' % \ + (self.page_name) + # Encode outgoing raw wikitext into utf8 + raw_body = string.replace(self.get_raw_body(unmodified), + '\r\n', '\n') print """<textarea wrap="virtual" name="savetext" rows="17" - cols="120">%s</textarea>""" % raw_body - print """<br><input type=submit value="Save"> - <input type=reset value="Reset"> - """ + cols="120" %s >%s</textarea>""" % ( + read_only_value, raw_body) + if not read_only: + print """<br><input type=submit value="Save"> + <input type=reset value="Reset"> + """ print "<br>" - print Page("UploadFile").link_to() - print "<input type=file name=imagefile>" - print "(not enabled yet)" print "</form>" - print "<p>" + Page('EditingTips').link_to() - + if not read_only: + print "<p>" + Page('EditingTips').link_to() def _write_file(self, text): - tmp_filename = self._tmp_filename() - open(tmp_filename, 'wt').write(text) - text = self._text_filename() - if os.name == 'nt': - # Bad Bill! POSIX rename ought to replace. :-( - try: - os.remove(text) - except OSError, er: - if er.errno <> errno.ENOENT: raise er - os.rename(tmp_filename, text) - + filefuncs.write(self._text_filename(), text, 'wb') def save_text(self, newtext): self._write_file(newtext) remote_name = environ.get('REMOTE_ADDR', '') - editlog_add(self.page_name, remote_name) -# See set_data_dir_from_cfg(), reset_root_cfg +# See set_data_dir_from_cfg(), reset_root_dir data_dir = None text_dir = None -editlog_name = None # disabled by default: see reset_root_dir() cgi.logfile = None def get_logo_string(): # Returning '' is allowed return '<img src="%s" border=0 alt="pikipiki">' % scrub('/' + PIKI_PNG) -changed_time_fmt = ' . . . . [%I:%M %p]' -date_fmt = '%a %d %b %Y' +#changed_time_fmt = ' . . . . [%I:%M %p]' +changed_time_fmt = '[%I:%M %p]' +#date_fmt = '%a %d %b %Y' +date_fmt = '%a %d %b %Y UTC' datetime_fmt = '%a %d %b %Y %I:%M %p' show_hosts = 0 # show hostnames? css_url = '/' + PIKI_CSS # stylesheet link, or '' @@ -619,25 +891,35 @@ def serve_one_page(): handlers = { 'fullsearch': do_fullsearch, 'titlesearch': do_titlesearch, 'edit': do_edit, - 'savepage': do_savepage } + 'viewsource': do_viewsource, + 'viewunmodifiedsource': do_viewunmodifiedsource, + 'savepage': do_savepage, + 'unmodified': do_unmodified, + 'deletelocal': do_deletelocal, + 'removepage': do_removepage} for cmd in handlers.keys(): if form.has_key(cmd): - apply(handlers[cmd], (form[cmd].value,)) + apply(handlers[cmd], (form[cmd].value.decode('utf8'),)) break else: path_info = environ.get('PATH_INFO', '') if form.has_key('goto'): - query = form['goto'].value + query = form['goto'].value.decode('utf8') elif len(path_info) and path_info[0] == '/': query = path_info[1:] or 'FrontPage' else: query = environ.get('QUERY_STRING', '') or 'FrontPage' - word_match = re.match(word_re_str, query) + #word_match = re.match(word_re_str, query) + word_match = re.match(versioned_page_re_str, query) + #sys.stderr.write("query: %s [%s]\n" % (repr(query), + # repr(word_match))) if word_match: - word = word_match.group(0) + word = word_match.group('wikiword') + if not word_match.group('version') is None: + word = "%s_%s" % (word, word_match.group('version')) Page(word).send_page() else: print "<p>Can't work out query \"<pre>" + query + "</pre>\"" @@ -647,24 +929,119 @@ def serve_one_page(): sys.stdout.flush() + ############################################################ -# Gross, but at least it keeps the hacks in one place. +def is_versioned(page_name): + match = versioned_page_re.match(page_name) + if match is None: + return False + return bool(match.group('version')) + +############################################################ +# See wikibot.py, update_change_log + +# Hmmmm... too much code. +# Returns WikiName->(version, ...) table +def make_fork_list(versioned_names): + if len(versioned_names) == 0: + return () + + table = {} + for name in versioned_names: + result = versioned_page_re.match(name) + assert not result is None + wiki_name = result.group('wikiword') + assert not wiki_name is None + version = result.group('version') + assert not version is None + entry = table.get(wiki_name, []) + entry.append(version) + table[wiki_name] = entry + + for value in table.values(): + value.sort() + + return table + +def is_read_only(base_dir, page_name): + full_path = os.path.join(base_dir, 'readonly.txt') + if not os.path.exists(full_path): + return False + in_file = open(full_path, 'rb') + try: + return page_name in [value.strip() + for value in in_file.read().splitlines()] + finally: + in_file.close() + +def read_log_file_entries(base_dir, max_entries): + accepted = [] + full_path = os.path.join(base_dir, 'accepted.txt') + if os.path.exists(full_path): + in_file = open(full_path, 'rb') + try: + changes = {} + # LATER: find/write reverse line iterator? + for line in reversed(in_file.readlines()): + if len(accepted) >= max_entries: + break + fields = line.split(':') + if fields[0] in ('C', 'M', 'R', 'F'): + for index in range(1, len(fields)): + fields[index] = fields[index].strip() + changes[fields[0]] = fields[1:] + else: + fields = fields[:4] + fields.append((changes.get('C', ()), + changes.get('M', ()), + changes.get('R', ()), + make_fork_list(changes.get('F', ())))) + accepted.append(tuple(fields)) + changes = {} + finally: + in_file.close() + + rejected = [] + full_path = os.path.join(base_dir, 'rejected.txt') + if os.path.exists(full_path): + in_file = open(full_path, 'rb') + try: + changes = {} + # LATER: find/write reverse line iterator? + for line in reversed(in_file.readlines()): + if len(rejected) >= max_entries: + break + rejected.append(tuple(line.split(':')[:5])) + finally: + in_file.close() + + return tuple(accepted), tuple(rejected) + + class FreenetPage(Page): def __init__(self, page_name): Page.__init__(self, page_name) - def send_page(self, msg=None): - link = get_scriptname() + '?fullsearch=' + self.page_name - send_title(self.split_title(), link, msg) - PageFormatter(self.get_raw_body()).print_html() - #print_footer(self.page_name, 1, self._last_modified()) + + def send_footer(self, versioned, dummy_mod_string=None, + page_path=None, + dummy_unmodified=False): print "<hr>" print "%s %s %s" % (link_tag('FrontPage', 'FrontPage'), link_tag('TitleIndex', 'TitleIndex'), link_tag('WordIndex', 'WordIndex')) + if not page_path is None and not versioned: + name = os.path.split(page_path)[1] + fork_table = get_unmerged_versions(filefuncs, text_dir, + (name,)) + if len(fork_table[name]) > 0: + print (("<hr><strong>This page has forks: %s! " % + get_fork_html(filefuncs, text_dir, name, fork_table)) + + + "Please consider merging them.</strong><br>") -def reset_root_dir(root_dir, disable_edit_log=True): - global data_dir, text_dir, editlog_name +def reset_root_dir(root_dir, overlayed=False): + global data_dir, text_dir, filefuncs if not os.path.exists(root_dir) or not os.path.isdir(root_dir): raise IOError("Base wiki dir doesn't exist: %s" % root_dir) @@ -673,11 +1050,13 @@ def reset_root_dir(root_dir, disable_edi if not os.path.exists(text_dir) or not os.path.isdir(text_dir): raise IOError("Wikitext dir doesn't exist: %s" % text_dir) - if disable_edit_log: - editlog_name = None - else: - editlog_name = path.join(data_dir, 'editlog') cgi.logfile = path.join(data_dir, 'cgi_log') + filefuncs = fileoverlay.get_file_funcs(root_dir, overlayed) + if overlayed: + # Only overlay 'wikitext', not 'www' + full_path = filefuncs.overlay_path(text_dir) + if not os.path.exists(full_path): + os.makedirs(full_path) CFG_FILE = 'fnwiki.cfg' WIKIROOT = 'wiki_root' @@ -687,7 +1066,6 @@ def set_data_dir_from_cfg(base_path=None if base_path is None: # REDFLAG: test on windoze. base_path = os.getcwd() - cfg_file = os.path.join(base_path, CFG_FILE) parser = ConfigParser() parser.read(cfg_file) @@ -699,7 +1077,11 @@ def set_data_dir_from_cfg(base_path=None else: root_dir = os.path.join(base_path, WIKIROOT) - reset_root_dir(root_dir) + overlayed = True + if parser.has_option('default','overlayedits'): + overlayed = parser.getboolean('default','overlayedits') + + reset_root_dir(root_dir, overlayed) import shutil def create_default_wiki(base_path): @@ -710,22 +1092,23 @@ def create_default_wiki(base_path): 'default_files'), base_path) -def dump(output_dir, wiki_root): +def dump(output_dir, wiki_root, overlayed=False): global form, scrub_links + form = {} scrub_links = True - - reset_root_dir(wiki_root) + reset_root_dir(wiki_root, overlayed) old_out = sys.stdout try: - pages = list(page_list()) + pages = list(page_list(True)) for name in pages: file_name = os.path.join(output_dir, name) - out = open(file_name, "wb") + out = codecs.open(file_name, "wb", 'utf8') # Write utf8 try: page = FreenetPage(name) sys.stdout = out + print '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">' page.send_page() sys.stdout.flush() out.close() @@ -741,6 +1124,11 @@ def dump(output_dir, wiki_root): out.write("That page doesn't exist in the wiki yet!\n") out.close() + if not os.path.exists(os.path.join(data_dir, 'AlreadyResolved')): + out = open(os.path.join(output_dir, 'AlreadyResolved'), 'wb') + out.write("That fork was already resolved.\n") + out.close() + # .css, .png www_dir = os.path.join(data_dir, 'www') for name in PIKI_REQUIRED_FILES: @@ -750,12 +1138,23 @@ def dump(output_dir, wiki_root): # "builtin" when execfile()'d by servepiki.py if __name__ == "__main__" or __name__ == "__builtin__": - # Uncomment for testing but DCI. - # sys.stderr = open('/tmp/piki_err', 'at') + if not STDERR_FILE is None: + sys.stderr = open(STDERR_FILE, 'ab') + else: + # Disable stderr. hmmm... + sys.stderr = StringIO() - # Suppress all output. Would choke on unicode but shouldn't - # see unicode, right? - sys.stderr = StringIO() # REDFLAG: revisit. - set_data_dir_from_cfg() - serve_one_page() + # Redirect "print" output into a StringIO so + # we can encode the html as UTF-8. + real_out = sys.stdout + buf = StringIO() + sys.stdout = buf + try: + set_data_dir_from_cfg() + serve_one_page() + finally: + sys.stdout = real_out + print buf.getvalue().encode('utf8') + + diff --git a/fniki/servepiki.py b/fniki/servepiki.py --- a/fniki/servepiki.py +++ b/fniki/servepiki.py @@ -29,6 +29,7 @@ import cStringIO import re import piki +from fileoverlay import remove_redundant_files # Absolute path to the cgi python script. SCRIPT_PATH = piki.__file__ @@ -259,7 +260,14 @@ def serve_wiki(port=8081, bind_to='local out_func(piki.text_dir + " (wiki text)") www_dir = os.path.join(piki.data_dir, 'www') out_func(www_dir + " (.css, .png)") - print + if not piki.filefuncs.is_overlayed(): + out_func("NOT OVERLAYED! Writing changes directly into wiki text dir.") + else: + out_func("Writing overlayed changes to:") + # Hmmm... crappy style, not polymorphic. + out_func(piki.filefuncs.overlay_path(piki.data_dir)) + remove_redundant_files(piki.filefuncs, piki.text_dir, out_func) + out_func("") bound_to = bind_to if bound_to == '': bound_to = 'all interfaces!' diff --git a/infocalypse/__init__.py b/infocalypse/__init__.py --- a/infocalypse/__init__.py +++ b/infocalypse/__init__.py @@ -364,7 +364,7 @@ from infcmds import get_config_info, exe from fmscmds import execute_fmsread, execute_fmsnotify, get_uri_from_hash from sitecmds import execute_putsite, execute_genkey -from wikicmds import execute_wiki +from wikicmds import execute_wiki, execute_wiki_apply from arccmds import execute_arc_create, execute_arc_pull, execute_arc_push, \ execute_arc_reinsert @@ -588,14 +588,23 @@ def infocalypse_fmsnotify(ui_, repo, **o """ params, stored_cfg = get_config_info(ui_, opts) insert_uri = stored_cfg.get_dir_insert_uri(repo.root) - if not insert_uri: + if not insert_uri and not (opts['submitbundle'] or + opts['submitwiki']): ui_.warn("You can't notify because there's no stored " + "insert URI for this repo.\n" + "Run from the directory you inserted from.\n") return params['ANNOUNCE'] = opts['announce'] - params['SUBMIT'] = opts['submit'] + params['SUBMIT_BUNDLE'] = opts['submitbundle'] + params['SUBMIT_WIKI'] = opts['submitwiki'] + if params['SUBMIT_WIKI'] or params['SUBMIT_BUNDLE']: + request_uri = stored_cfg.get_request_uri(repo.root) + if not request_uri: + ui_.warn("There is no stored request URI for this repo.\n") + raise util.Abort("No request URI.") + params['REQUEST_URI'] = request_uri + params['DRYRUN'] = opts['dryrun'] params['INSERT_URI'] = insert_uri execute_fmsnotify(ui_, repo, params, stored_cfg) @@ -654,13 +663,24 @@ def infocalypse_wiki(ui_, repo, **opts): if os.getcwd() != repo.root: raise util.Abort("You must be in the repository root directory.") - subcmds = ('run', 'createconfig') + subcmds = ('run', 'createconfig', 'apply') required = sum([bool(opts[cmd]) for cmd in subcmds]) if required == 0: - raise util.Abort("You must specify either --run or --createconfig.") + raise util.Abort("You must specify either --run, " + + "--createconfig, --apply") if required > 1: - raise util.Abort("Use either --run or --createconfig.") - # hmmmm.... useless copy? + raise util.Abort("Use either --run, --createconfig, or --apply") + + if opts['apply'] != '': + params, stored_cfg = get_config_info(ui_, opts) + params['REQUEST_URI'] = opts['apply'] + execute_wiki_apply(ui_, repo, params, stored_cfg) + return + + if opts['fcphost'] != '' or opts['fcpport'] != 0: + raise util.Abort("--fcphost, --fcpport only for --apply") + + # hmmmm.... useless copy? params = {'WIKI' : [cmd for cmd in subcmds if opts[cmd]][0], 'HTTP_PORT': opts['http_port'], 'HTTP_BIND': opts['http_bind']} @@ -841,8 +861,10 @@ cmdtable = { "fn-fmsnotify": (infocalypse_fmsnotify, [('', 'dryrun', None, "don't send fms message"), ('', 'announce', None, "include full URI update"), - ('', 'submit', None, "insert patch bundle and send an " + - "fms notification"),] + ('', 'submitbundle', None, "insert patch bundle and " + + "send an fms notification"), + ('', 'submitwiki', None, "insert overlayed wiki " + + "changes and send an fms notification"),] + FCP_OPTS, # Needs to invert the insert uri "[options]"), @@ -856,14 +878,17 @@ cmdtable = { "[options]"), "fn-wiki": (infocalypse_wiki, - [('', 'run', None, "start a local http server " + - "displaying a wiki"), - ('', 'createconfig', None, "create default fnwiki.cfg " + - "and skeleton wiki_root dir"), - ('', 'http_port', 8081, "port for http server"), - ('', 'http_bind', 'localhost', "interface http " + - "listens on, '' to listen on all"),], - "[options]"), + [('', 'run', None, "start a local http server " + + "displaying a wiki"), + ('', 'createconfig', None, "create default fnwiki.cfg " + + "and skeleton wiki_root dir"), + ('', 'http_port', 8081, "port for http server"), + ('', 'http_bind', 'localhost', "interface http " + + "listens on, '' to listen on all"), + ('', 'apply', '', "apply changes to the wiki from the " + + "supplied Request URI ")] + + FCP_OPTS, + "[options]"), "fn-genkey": (infocalypse_genkey, FCP_OPTS, diff --git a/infocalypse/config.py b/infocalypse/config.py --- a/infocalypse/config.py +++ b/infocalypse/config.py @@ -113,6 +113,7 @@ def detect_and_fix_default_bug(ui_, file fixed_file.close() ui_.warn("Applied fix.\n") +# Why didn't I subclass dict? # Eventually set state from fms feed. i.e. latest repo updates. class Config: """ Persisted state used by the Infocalypse mercurial extension. """ @@ -376,6 +377,22 @@ class Config: finally: out_file.close() +def set_wiki_params(parser, params): + """ Helper reads wiki specific parameters from site config files. """ + params['WIKI_ROOT'] = parser.get('default', 'wiki_root') + params['OVERLAYED'] = False + + if parser.has_option('default', 'overlayedits'): + params['OVERLAYED'] = parser.getboolean('default', 'overlayedits') + + if parser.has_option('default', 'wiki_group'): + params['CLIENT_WIKI_GROUP'] = parser.get('default', 'wiki_group') + if parser.has_option('default', 'wiki_server_id'): + params['CLIENT_WIKI_ID'] = parser.get('default', 'wiki_server_id') + if parser.has_option('default', 'wiki_repo_usk'): + params['CLIENT_WIKI_USK'] = parser.get('default', 'wiki_repo_usk') + + # HACK: This really belongs in sitecmds.py but I wanted # all ConfigParser dependencies in one file because of # the ConfigParser import hack. See top of file. @@ -404,7 +421,7 @@ def read_freesite_cfg(ui_, repo, params, # wiki specific if params['ISWIKI']: - params['WIKI_ROOT'] = parser.get('default', 'wiki_root') + set_wiki_params(parser, params) else: params['SITE_DIR'] = parser.get('default', 'site_dir') @@ -497,6 +514,17 @@ site_name = default # # File to display by default. default_file = FrontPage +# +# Local editing +# By default, write direcly into the wikitext directory. +# If you're sending changes via hg fn-fmsnotify --submitwiki +# this should be set True. +overlayedits = False + +# Remote server configuration. +#wiki_group = infocalypse.tst +#wiki_server_id = <fms_id of your wikibot > +#wiki_repo_usk = <request uri of your wikitext infocalypse repo> """ if os.path.exists(file_name): raise util.Abort("Already exists: %s" % file_name) diff --git a/infocalypse/fcpclient.py b/infocalypse/fcpclient.py --- a/infocalypse/fcpclient.py +++ b/infocalypse/fcpclient.py @@ -353,6 +353,7 @@ def prefetch_usk(client, usk_uri, allowe client.message_callback = message_callback client.in_params.default_fcp_params['ReturnType'] = 'none' try: + # BUG: HANGS version = get_version(client.get(usk_uri, allowed_redirects)[1]['URI']) except FCPError: @@ -681,43 +682,56 @@ class FCPClient(MinimalClient): self.in_params.definition = PUT_COMPLEX_DIR_DEF self.in_params.fcp_params = {'URI': uri} - for field in self.in_params.default_fcp_params: - if field.startswith("Files"): - raise ValueError("You can't set file entries via " - + " default_fcp_params.") - if 'DefaultName' in self.in_params.default_fcp_params: - raise ValueError("You can't set 'DefaultName' via " - + "default_fcp_params.") - - files = {} - index = 0 - for info in file_infos: - mime_type = info[2] - if not mime_type: - # First try to guess from the extension. - type_tuple = mimetypes.guess_type(info[0]) - if type_tuple: - mime_type = type_tuple[0] - if not mime_type: - # Fall back to the default. - mime_type = default_mime_type - - files['Files.%i.Name' % index] = info[0] - files['Files.%i.UploadFrom' % index] = 'direct' - files['Files.%i.DataLength' % index] = info[1] - files['Files.%i.Metadata.ContentType' % index] = mime_type - - index += 1 - - self.in_params.fcp_params['Files'] = files - self.in_params.fcp_params['DefaultName'] = file_infos[0][0] - - #REDFLAG: Fix - self.in_params.send_data = True - # IMPORTANT: Don't set the data length. return self.conn.start_request(self, - FileInfoDataSource(file_infos), False) + dir_data_source(file_infos, + self.in_params, + default_mime_type), + False) + + +# Break out implementation helper so I can use it elsewhere. +def dir_data_source(file_infos, in_params, default_mime_type): + """ Return an IDataSource for a list of file_infos. + + NOTE: Also sets up Files.* fields in in_params as a + side effect. """ + + for field in in_params.default_fcp_params: + if field.startswith("Files"): + raise ValueError("You can't set file entries via " + + " default_fcp_params.") + if 'DefaultName' in in_params.default_fcp_params: + raise ValueError("You can't set 'DefaultName' via " + + "default_fcp_params.") + + files = {} + index = 0 + for info in file_infos: + mime_type = info[2] + if not mime_type: + # First try to guess from the extension. + type_tuple = mimetypes.guess_type(info[0]) + if type_tuple: + mime_type = type_tuple[0] + if not mime_type: + # Fall back to the default. + mime_type = default_mime_type + + files['Files.%i.Name' % index] = info[0] + files['Files.%i.UploadFrom' % index] = 'direct' + files['Files.%i.DataLength' % index] = info[1] + files['Files.%i.Metadata.ContentType' % index] = mime_type + + index += 1 + + in_params.fcp_params['Files'] = files + in_params.fcp_params['DefaultName'] = file_infos[0][0] + + #REDFLAG: Fix + in_params.send_data = True + + return FileInfoDataSource(file_infos) ############################################################ # Helper function for hg changeset bundle handling. diff --git a/infocalypse/fms.py b/infocalypse/fms.py --- a/infocalypse/fms.py +++ b/infocalypse/fms.py @@ -22,6 +22,7 @@ import os import sys import StringIO +import time from fcpclient import get_usk_hash, get_version, is_usk_file, \ get_usk_for_usk_version @@ -53,6 +54,10 @@ except ImportError, err: # Can't catch ImportError? Always aborts. ??? import nntplib +def get_connection(fms_host, fms_port, user_name): + """ Create an fms NNTP connection. """ + return nntplib.NNTP(fms_host, fms_port, user_name) + MSG_TEMPLATE = """From: %s Newsgroups: %s Subject: %s @@ -60,28 +65,102 @@ Subject: %s %s""" # Please use this function for good and not evil. -def send_msgs(fms_host, fms_port, msg_tuples): +def send_msgs(server, msg_tuples, send_quit=False): """ Send messages via fms. - msg_tuple format is: (sender, group, subject, text) + msg_tuple format is: (sender, group, subject, text, send_callback) + + send_callback is optional. + + If it is present and not None send_callback(message_tuple) + is invoked after each message is sent. + + It is legal to include additional client specific fields. """ - server = nntplib.NNTP(fms_host, fms_port) + for msg_tuple in msg_tuples: + raw_msg = MSG_TEMPLATE % (msg_tuple[0], + msg_tuple[1], + msg_tuple[2], + msg_tuple[3]) + in_file = StringIO.StringIO(raw_msg) + try: + server.post(in_file) + if len(msg_tuple) > 4 and not msg_tuple[4] is None: + # Sent notifier + msg_tuple[4](msg_tuple) + + if send_quit: + server.quit() + finally: + in_file.close() + +def get_nntp_trust(server, kind, fms_id): + """ INTERNAL: Helper to make a single XGETTRUST request. """ + assert not server is None + result = server.shortcmd("XGETTRUST %s %s" % + (kind, fms_id)).split(' ') try: - for msg_tuple in msg_tuples: - raw_msg = MSG_TEMPLATE % (msg_tuple[0], - msg_tuple[1], - msg_tuple[2], - msg_tuple[3]) - in_file = StringIO.StringIO(raw_msg) - #print raw_msg - try: - server.post(in_file) - finally: - in_file.close() - finally: - server.quit() + code = int(result[0]) + except ValueError: + raise nntplib.NNTPError("Couldn't parse return code from XGETTRUST.") + if code < 200 or code > 299: + raise nntplib.NNTPError("Unexpected return code[%i] from XGETTRUST." % + code) + if result[1] == 'null': + return None + + return int(result[1]) + +def get_trust(server, fms_id): + """ INTERNAL: Fetch trust values via multiple XGETTRUST calls. """ + return tuple([get_nntp_trust(server, kind, fms_id) for kind in + ('MESSAGE','TRUSTLIST', 'PEERMESSAGE', 'PEERTRUSTLIST')]) + +class TrustCache: + """ Cached interface to FMS trust values. """ + + # REQUIRES: server was connected with auth_fms_id we want trust info from. + def __init__(self, server, timeout_secs=1*60*60): + # fms_id -> (timeout_secs, trust_tuple) + self.table = {} + self.timeout_secs = timeout_secs + self.server = server + + def flush(self): + """ Flush the cache. """ + self.table = {} + + def prefetch_trust(self, fms_ids): + """ Fetch and cache trust values as nescessary. + + If you know the required fms_ids call this + once with the ids before get_trust() to + minimize load on the FMS server. """ + + for fms_id in fms_ids: + if (not self.table.get(fms_id, None) is None and + self.table[fms_id][0] > time.time()): + print "%s cached for %i more secs. (prefetch)" % ( + fms_id, (self.table[fms_id][0] - time.time())) + continue + self.table[fms_id] = (time.time() + self.timeout_secs, + get_trust(self.server, fms_id)) + def get_trust(self, fms_id): + """ Return (MESSAGE, TRUSTLIST, PEERMESSAGE, PEERTRUSTLIST) + trust values. + + Can contain None entries if the trust was 'null'. """ + + cached = self.table.get(fms_id, None) + if cached is None or cached[0] < time.time(): + self.prefetch_trust((fms_id, )) + assert fms_id in self.table + print "%s cached for %i more secs. (get)" % ( + fms_id, (self.table[fms_id][0] - time.time())) + + return self.table[fms_id][1] class IFmsMessageSink: """ Abstract interface for an fms message handler. """ @@ -107,37 +186,115 @@ class IFmsMessageSink: # raise NotImplementedError() pass -def recv_msgs(fms_host, fms_port, msg_sink, groups): + +def article_range(first, last, old_last): + """ INTERNAL: Helper to determine which articles are required. """ + first = int(first) + last = int(last) + + if old_last is None: # first fetch + return (first, last) + + to_fetch = last - old_last + if to_fetch == 0: + return (last, last) + + # I doubt this is a problem in practice, but if it is, at + # least fail explicitly. + + # Couldn't find info on wrapping in RFC 977 + assert to_fetch > 0 + + return (last - to_fetch + 1, last) + +def recv_msgs(server, msg_sink, groups, max_articles=None, send_quit=False): """ Read messages from fms. """ - server = nntplib.NNTP(fms_host, fms_port) + + if max_articles is None: + max_articles = {} + + for group in groups: + if max_articles.get(group, 'dummy') == 'dummy': + #print "ADDING ", group + max_articles[group] = None + + for group in groups: + recv_group_msgs(server, group, msg_sink, max_articles) + + if send_quit: + server.quit() + +def recv_group_msgs(server, group, msg_sink, max_articles): + """ INTERNAL: Helper dispatches messages for a single group. """ + if not group or group.strip() == '': + raise ValueError("Empty group names are not allowed.") + try: - for group in groups: - if not group or group.strip() == '': - raise ValueError("Empty group names are not allowed.") - result = server.group(group) - if result[1] == '0': - continue - # Doesn't return msg lines as shown in python doc? - # http://docs.python.org/library/nntplib.html - # Is this an fms bug? - result, items = server.xover(result[2], result[3]) - if result.split(' ')[0] != '224': - # REDFLAG: untested code path - raise Exception(result) - for item in items: - if not msg_sink.wants_msg(group, item): - continue - result = server.article(item[0]) - if result[0].split(' ')[0] != '220': - # REDFLAG: untested code path - raise Exception(result[0]) - pos = result[3].index('') - lines = [] - if pos != -1: - lines = result[3][pos + 1:] - msg_sink.recv_fms_msg(group, item, lines) - finally: - server.quit() + result = server.group(group) + except nntplib.NNTPTemporaryError, err1: + # Ignore 411 errors which happen before the local FMS + # instance has learned about the group. + print "Skipped: %s because of error: %s" % (group, str(err1)) + return + + if result[1] == '0': + return + + first, last = article_range(result[2], result[3], + max_articles[group]) + + #print "READING %s: (%i, %i, %i)" % \ + # (group, first, last, max(max_articles[group], -1)) + if not max_articles[group] is None and last <= max_articles[group]: + #print "No articles to fetch." + #print "continue(0)" + return # Already fetched. + + # Doesn't return msg lines as shown in python doc? + # http://docs.python.org/library/nntplib.html + # Is this an fms bug? + result, items = server.xover(str(first), str(last)) + + if result.split(' ')[0] != '224': + # REDFLAG: untested code path + raise Exception(result) + + for item in items: + if not msg_sink.wants_msg(group, item): + #print "continue(1)" + continue # Hmmmm... were does this continue? + try: + result = server.article(item[0]) + except nntplib.NNTPProtocolError, nntp_err: + # REDFLAG: + # djk20091224 I haven't seen this trip in a month or so. + # Research: + # 0) Database corruption? + # 1) FMS bug? + # 2) nntplib bug? + # + # djk20091023 If I use execquery.htm to on the message ID + # that causes this I get nothing back. == db corruption? + print "SAW NNTPProtocolError: ", items[4] + if str(nntp_err) != '.': + print "CAN'T HACK AROUND IT. Sorry :-(" + raise + print "TRYING TO HACK AROUND IT..." + msg_sink.recv_fms_msg(group, item, []) + print "continue(2)" + continue + + if result[0].split(' ')[0] != '220': + # REDFLAG: untested code path + raise Exception(result[0]) + pos = result[3].index('') + lines = [] + if pos != -1: + lines = result[3][pos + 1:] + msg_sink.recv_fms_msg(group, item, lines) + + # Only save if all the code above ran without error. + max_articles[group] = last ############################################################ # Infocalypse specific stuff. diff --git a/infocalypse/fmsbot.py b/infocalypse/fmsbot.py new file mode 100644 --- /dev/null +++ b/infocalypse/fmsbot.py @@ -0,0 +1,336 @@ +""" Classes to run bots over FMS. + + Please use for good and not evil. + + Copyright (C) 2009 Darrell Karbott + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2.0 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks +""" + +import os +import socket +import time + +import fms +from fms import IFmsMessageSink + +class FMSBotRunner(IFmsMessageSink): + """ Container class which owns and runs one or more FMSBots. """ + def __init__(self, params): + IFmsMessageSink.__init__(self) + self.bots = [] + self.msg_targets = [] + self.groups = set([]) + self.max_articles = {} + self.outgoing_msgs = [] + self.nntp = fms # So it can be swapped out for testing. + self.nntp_server = None + self.params = params.copy() + assert self.params.get('FMS_HOST') + assert self.params.get('FMS_PORT') + assert self.params.get('BOT_STORAGE_DIR') + + if not (os.path.exists(params['BOT_STORAGE_DIR']) and + os.path.isdir(params['BOT_STORAGE_DIR'])): + raise ValueError(("Storage dir doesn't exist: %s") % + self.params['BOT_STORAGE_DIR']) + + def log(self, msg): + """ Print a log message. """ + print msg + + def nntp_reconnect(self, suppress_events=False): + """ Connect to fms. """ + if not self.nntp_server is None: + return self.nntp_server + + try: + fms_id = self.params.get('FMS_ID', None) + self.nntp_server = self.nntp.get_connection(self.params['FMS_HOST'], + self.params['FMS_PORT'], + fms_id) + except Exception, err: # DCI: what else do I need to catch? + self.log("FMSBotRunner.nntp_reconnect -- failed: %s" % str(err)) + return None + + if not suppress_events: + for bot in self.bots: + bot.on_fms_change(True) + + return self.nntp_server + + def nntp_close(self): + """ Disconnect from fms. """ + if self.nntp_server is None: + return + try: + try: + self.nntp_server.quit() + except IOError, err: + self.log("FMSBotRunner.nntp_close -- failed: %s" % str(err)) + except EOFError, err: + self.log("FMSBotRunner.nntp_close -- failed: %s" % str(err)) + finally: + self.nntp_server = None + + for bot in self.bots: + bot.on_fms_change(False) + + def nntp_send(self): + """ Send pending fms messages. """ + if not self.outgoing_msgs: + return False + if self.nntp_server is None: + self.log("FMSBotRunner.nntp_send -- nntp_send not connected!") + return False + try: + raised = True + try: + self.nntp.send_msgs(self.nntp_server, + self.outgoing_msgs) + # i.e. Don't clear if there was an exception. + self.outgoing_msgs = [] + raised = False + finally: + if raised: + self.nntp_close() + except Exception, err: # DCI: what else do I need to catch? + # ??? fail silently??? + self.log("FMSBotRunner.nntp_send -- send_msgs failed: %s" % + str(err)) + return False + return True + + def wants_msg(self, group, items): + """ IFmsMessageSink implementation. """ + # REDFLAG: unwind recv_msgs instead of dorky hack? + self.msg_targets = [bot for bot in self.bots + if not bot.exit and bot.wants_msg(group, items)] + return len(self.msg_targets) > 0 + + def recv_fms_msg(self, group, items, lines): + """ IFmsMessageSink implementation. """ + for bot in self.msg_targets: + assert not bot.exit + bot.recv_fms_msg(group, items, lines) + + # REDFLAG: exceptions + def startup(self): + """ Run on_startup() handler on all bots. """ + self.nntp_reconnect(True) # Suppress events. Bots not started. + for bot in self.bots: + bot.on_startup() + + def shutdown(self, why): + """ Run on_startup() handler on all bots which haven't exited. """ + for bot in self.bots: + bot.on_shutdown(why) + + # Allow bots to send messages on shutdown. + self.nntp_send() + + def idle(self): + """ Run on_idle() handler on all bots which haven't exited """ + + self.nntp_reconnect() + self.nntp_send() + + for bot in self.bots[:]: + if bot.exit: + bot.on_shutdown('Set exit=True') + self.bots.remove(bot) + continue + bot.on_idle() + + def is_running(self): + """ Returns True if the runner has at least one bot that + hasn't exited, False otherwise. """ + for bot in self.bots: + if not bot.exit: + return True + return False + + def get_path(self, bot, fname): + """ Get a bot specific path. """ + assert fname.find(os.path.sep) == -1 + return os.path.join(self.params['BOT_STORAGE_DIR'], + "%s_%s" %(bot.name, fname)) + + def queue_msg(self, msg_tuple): + """ Queue an outgoing message. + + You can set a callback in the msg_tuple which is + called when the message is actually sent. + See fms.send_msgs(). + """ + self.outgoing_msgs.append(msg_tuple) + + def recv_msgs(self): + """ Poll for new fms messages and dispatch them to registed bots. """ + if not self.nntp_server: + self.log("FMSBotRunner.recv_msgs -- not connected") + return False + + try: + raised = True + try: + self.nntp.recv_msgs(self.nntp_server, + self, self.groups, self.max_articles) + raised = False + finally: + if raised: + self.nntp_close() + except Exception, err: # DCI: what else do I need to catch? + self.log("FMSBotRunner.recv_msgs -- failed: %s" % str(err)) + raise # DCI: NEED TO FIX THIS + return False + + return True + + def register_bot(self, bot, groups): + """ Add a bot to the FMSBotRunner. + + Adds groups to bot.groups as a side effect. + + REQUIRES: No other bot already registered with bot.name. + """ + assert bot.name and len(bot.name.strip()) + assert not groups is None # Empty is ok, None is not + assert bot not in self.bots + + groups = set(groups) + + bot.parent = self + bot.groups.update(groups) + + self.groups.update(groups) + self.bots.append(bot) + +class FMSBot(IFmsMessageSink): + """ Abstract base class for bots which run over FMS. """ + + def __init__(self, name): + IFmsMessageSink.__init__(self) + self.parent = None + self.name = name # UNIQUE, PERSISTENT NAME + self.groups = set([]) + self.exit = False + + def log(self, text): + """ Display log messages. """ + print "%s:%s" % (self.name, text) + + def on_startup(self): + """ Event handler which is run once when the bot is started. """ + # setup shelves db + pass + + def on_shutdown(self, why): + """ Event handler which is run once when the bot is shutdown. """ + # tear down shelves db + pass + + # Hook to kick a state machine. + def on_idle(self): + """ Event handler called intermittenly when the bot is idle. """ + pass + + # Filter messages + def wants_msg(self, group, dummy_items): + """ Return True if the bot should handle the message, + False otherwise. + """ + return group in self.groups + + # Handle a single message + def recv_fms_msg(self, group, items, lines): + """ Handle a single message. """ + pass + + # DCI: Too hacky? + def on_fms_change(self, dummy_connected): + """ Called when the fms server drops or reconnects """ + pass + +def run_bots(bot_runner, poll_time, sleep_func=time.sleep): + """ Run the bot_runner until all it's bots exit. """ + bot_runner.startup() + reason = "Unknown exception" # REDFLAG: Do better! + try: + while bot_runner.is_running(): + bot_runner.recv_msgs() + if not bot_runner.is_running(): + break # Shutdown while recv'ing + bot_runner.idle() + if not bot_runner.is_running(): + break # Shutdown while idle() + sleep_func(poll_time) + reason = "Clean exit" + finally: + bot_runner.shutdown(reason) + +# Hmmm not wikibot specific +def run_event_loops(bot_runner, request_runner, + bot_poll_secs = 5 * 60, + fcp_poll_secs = 0.25, + out_func = lambda msg:None): + """ Graft the event loops for the FMSBotRunner and RequestQueue together.""" + assert bot_poll_secs > fcp_poll_secs + connection = request_runner.connection + assert not connection is None + shutdown_msg = "unknown error" + try: + bot_runner.recv_msgs() + timeout = time.time() + bot_poll_secs + while True: + # Run the FCP event loop (frequent) + try: + if not connection.socket.poll(): + out_func("Exiting because FCP poll exited.\n") + break + # Nudge the state machine. + request_runner.kick() + except socket.error: # Not an IOError until 2.6. + out_func("Exiting because of an error on the FCP socket.\n") + raise + except IOError: + out_func("Exiting because of an IO error.\n") + raise + + if time.time() < timeout: + # Rest a little. :-) + time.sleep(fcp_poll_secs) + continue + + # Run FMSBotRunner event loop (infrequent) + bot_runner.recv_msgs() + if not bot_runner.is_running(): + out_func("Exiting because the FMS bot runner exited.\n") + break # Shutdown while recv'ing + bot_runner.idle() + if not bot_runner.is_running(): + out_func("Exiting because the FMS bot runner " + + "exited while idle.\n") + break # Shutdown while idle() + + timeout = time.time() + bot_poll_secs # Wash. Rinse. Repeat. + shutdown_msg = "orderly shutdown" + finally: + connection.close() + bot_runner.shutdown(shutdown_msg) + + diff --git a/infocalypse/fmscmds.py b/infocalypse/fmscmds.py --- a/infocalypse/fmscmds.py +++ b/infocalypse/fmscmds.py @@ -19,6 +19,8 @@ Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks """ + +# REDFLAG: Go back and fix all the places where you return instead of Abort() from mercurial import util from fcpclient import get_usk_hash @@ -26,10 +28,11 @@ from fcpclient import get_usk_hash from knownrepos import KNOWN_REPOS from fms import recv_msgs, to_msg_string, MSG_TEMPLATE, send_msgs, \ - USKNotificationParser, show_table + USKNotificationParser, show_table, get_connection from config import Config, trust_id_for_repo, untrust_id_for_repo, known_hashes from infcmds import do_key_setup, setup, cleanup, execute_insert_patch +from wikicmds import execute_wiki_submit def handled_list(ui_, params, stored_cfg): """ INTERNAL: HACKED""" @@ -46,8 +49,9 @@ def handled_list(ui_, params, stored_cfg parser = USKNotificationParser(trust_map) parser.add_default_repos(KNOWN_REPOS) - recv_msgs(stored_cfg.defaults['FMS_HOST'], - stored_cfg.defaults['FMS_PORT'], + recv_msgs(get_connection(stored_cfg.defaults['FMS_HOST'], + stored_cfg.defaults['FMS_PORT'], + None), parser, stored_cfg.fmsread_groups) show_table(parser, ui_.status) @@ -132,8 +136,9 @@ def execute_fmsread(ui_, params, stored_ ui_.status("Raking through fms messages. This may take a while...\n") parser = USKNotificationParser() - recv_msgs(stored_cfg.defaults['FMS_HOST'], - stored_cfg.defaults['FMS_PORT'], + recv_msgs(get_connection(stored_cfg.defaults['FMS_HOST'], + stored_cfg.defaults['FMS_PORT'], + None), parser, stored_cfg.fmsread_groups) @@ -184,6 +189,29 @@ def is_none(value): """ Return True if value is None or 'None', False otherwise. """ return value is None or value == 'None' +def check_fms_cfg(ui_, params, stored_cfg): + """ INTERNAL: Helper aborts on bad fms configuration. """ + if (is_none(stored_cfg.defaults['FMS_ID']) or + stored_cfg.defaults['FMS_ID'].strip() == ''): + ui_.warn("Can't notify because the fms ID isn't set in the " + + "config file.\n") + raise util.Abort("Fix the fms_id = line in the config file and " + + "and try again.\n") + + if stored_cfg.defaults['FMS_ID'].find('@') != -1: + ui_.warn("The fms_id line should only " + + "contain the part before the '@'.\n") + raise util.Abort("Fix the fms_id = line in the config file and " + + "and try again.\n") + + if (is_none(stored_cfg.defaults['FMSNOTIFY_GROUP']) or + (stored_cfg.defaults['FMSNOTIFY_GROUP'].strip() == '') and + not params.get('SUBMIT_WIKI', False)): + ui_.warn("Can't notify because fms group isn't set in the " + + "config file.\n") + raise util.Abort("Update the fmsnotify_group = line and try again.\n") + + def execute_fmsnotify(ui_, repo, params, stored_cfg): """ Run fmsnotify command. """ update_sm = None @@ -192,41 +220,38 @@ def execute_fmsnotify(ui_, repo, params, update_sm = setup(ui_, repo, params, stored_cfg) request_uri, dummy = do_key_setup(ui_, update_sm, params, stored_cfg) + if request_uri is None: # Just assert? ui_.warn("Only works for USK file URIs.\n") return + check_fms_cfg(ui_, params, stored_cfg) + usk_hash = get_usk_hash(request_uri) index = stored_cfg.get_index(usk_hash) - if index is None and not params['SUBMIT']: + if index is None and not (params.get('SUBMIT_BUNDLE', False) or + params.get('SUBMIT_WIKI', False)): ui_.warn("Can't notify because there's no stored index " + "for %s.\n" % usk_hash) return - if is_none(stored_cfg.defaults['FMS_ID']): - ui_.warn("Can't notify because the fms ID isn't set in the " - + "config file.\n") - ui_.status("Update the fms_id = line and try again.\n") - return - - if is_none(stored_cfg.defaults['FMSNOTIFY_GROUP']): - ui_.warn("Can't notify because fms group isn't set in the " - + "config file.\n") - ui_.status("Update the fmsnotify_group = line and try again.\n") - return - + group = stored_cfg.defaults.get('FMSNOTIFY_GROUP', None) subject = 'Update:' + '/'.join(request_uri.split('/')[1:]) if params['ANNOUNCE']: text = to_msg_string(None, (request_uri, )) - elif params['SUBMIT']: - params['REQUEST_URI'] = request_uri # REDFLAG: DCI. Think + elif params['SUBMIT_BUNDLE']: + params['REQUEST_URI'] = request_uri # REDFLAG: Think through. text = execute_insert_patch(ui_, repo, params, stored_cfg) subject = 'Patch:' + '/'.join(request_uri.split('/')[1:]) + elif params['SUBMIT_WIKI']: + params['REQUEST_URI'] = request_uri # REDFLAG: Think through. + text, group = execute_wiki_submit(ui_, repo, params, stored_cfg) + subject = 'Submit:' + '/'.join(request_uri.split('/')[1:]) else: text = to_msg_string(((usk_hash, index), )) msg_tuple = (stored_cfg.defaults['FMS_ID'], - stored_cfg.defaults['FMSNOTIFY_GROUP'], + group, subject, text) @@ -234,23 +259,38 @@ def execute_fmsnotify(ui_, repo, params, ui_.status('Sender : %s\nGroup : %s\nSubject: %s\n%s\n' % (stored_cfg.defaults['FMS_ID'], - stored_cfg.defaults['FMSNOTIFY_GROUP'], + group, subject, text)) if params['VERBOSITY'] >= 5: - raw_msg = MSG_TEMPLATE % (msg_tuple[0], - msg_tuple[1], - msg_tuple[2], - msg_tuple[3]) - ui_.status('--- Raw Message ---\n%s\n---\n' % raw_msg) + ui_.status('--- Raw Message ---\n%s\n---\n' % ( + MSG_TEMPLATE % (msg_tuple[0], msg_tuple[1], + msg_tuple[2], msg_tuple[3]))) if params['DRYRUN']: ui_.status('Exiting without sending because --dryrun was set.\n') return - send_msgs(stored_cfg.defaults['FMS_HOST'], - stored_cfg.defaults['FMS_PORT'], - (msg_tuple, )) + # REDFLAG: for testing! + if 'MSG_SPOOL_DIR' in params: + ui_.warn("DEBUG HACK!!! Writing fms msg to local spool:\n%s\n" % + params['MSG_SPOOL_DIR']) + import fmsstub + + # LATER: fix config file to store full fmsid? + # grrrr... hacks piled upon hacks. + lut = {'djk':'djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks'} + fmsstub.FMSStub(params['MSG_SPOOL_DIR'], group, + lut).send_msgs( + get_connection(stored_cfg.defaults['FMS_HOST'], + stored_cfg.defaults['FMS_PORT'], + None), + (msg_tuple, ), True) + else: + send_msgs(get_connection(stored_cfg.defaults['FMS_HOST'], + stored_cfg.defaults['FMS_PORT'], + None), + (msg_tuple, ), True) ui_.status('Notification message sent.\n' 'Be patient. It may take up to a day to show up.\n') @@ -319,10 +359,13 @@ def get_uri_from_hash(ui_, dummy, params parser.add_default_repos(KNOWN_REPOS) ui_.status("Raking through fms messages. This may take a while...\n") - recv_msgs(stored_cfg.defaults['FMS_HOST'], - stored_cfg.defaults['FMS_PORT'], + recv_msgs(get_connection(stored_cfg.defaults['FMS_HOST'], + stored_cfg.defaults['FMS_PORT'], + None), parser, - stored_cfg.fmsread_groups) + stored_cfg.fmsread_groups, + None, + True) target_usk = None fms_id_map, announce_map, update_map = parser.invert_table() diff --git a/infocalypse/fmsstub.py b/infocalypse/fmsstub.py new file mode 100644 --- /dev/null +++ b/infocalypse/fmsstub.py @@ -0,0 +1,142 @@ +#pylint: disable-msg=C0111 +import os +import stat +import time +import traceback + +from fms import MSG_TEMPLATE +from fcpconnection import make_id + +def read_msg(full_path, default_sender, default_subject, default_group): + article_num = os.stat(full_path)[stat.ST_MTIME] + msg_id = "<fake_%s>" % str(article_num) + reading_header = True + blank_count = 0 + headers = {} + lines = [] + for line in open(full_path, 'rb').readlines(): + line = line.strip() + #print "LINE:", line + if reading_header: + if line.strip() == '': + blank_count += 1 + if blank_count > 0: #DCI: get rid of useless code + reading_header = False + #print "SAW END OF HEADERS" + continue + else: + blank_count = 0 + + fields = line.split(':') + if len(fields) < 2: + continue + + headers[fields[0].lower().strip()] = ':'.join(fields[1:]).strip() + continue # on purpose. + + lines.append(line) + + # fake xover article tuple + group + #(article number, subject, poster, date, id, references, size, lines) + return (article_num, + headers.get('subject', default_subject), + headers.get('from', default_sender), + None, # unused + msg_id, + (), + None, # unused + lines, # fms doesn't return these + headers.get('newsgroups', default_group),) + +FAKE_TRUST = 65 # Trust value returned for all queries. +class NNTPStub: + def quit(self): + print "NNTPStub.quit -- called." + traceback.print_stack() + #raise Exception("DCI: forcing stack trace") + def shortcmd(self, cmd): + assert cmd.startswith("XGETTRUST") + return "200 %i" % FAKE_TRUST + +class FMSStub: + def __init__(self, base_dir, group, sender_lut=None): + self.base_dir = os.path.join(base_dir, '__msg_spool__') + self.group = group + if sender_lut is None: + sender_lut = {} + self.sender_lut = sender_lut + if not os.path.exists(self.base_dir): + os.makedirs(self.base_dir) + + def get_connection(self, fms_host, fms_port, user_name): + """ Create a fake fms NNTP connection. """ + return NNTPStub() + + def send_msgs(self, dummy_server, msg_tuples, send_quit=False): + for msg_tuple in msg_tuples: + # HACK: use lut to map partial -> full fms ids. + #print "msg_tuple[0]: ", msg_tuple[0] + #print "sender_lut: ", self.sender_lut + sender = self.sender_lut.get(msg_tuple[0].split('@')[0], + msg_tuple[0]) + print "sender: ", sender + if sender != msg_tuple[0]: + print "fmsstub: FIXED UP %s->%s" % (msg_tuple[0], sender) + assert sender.find('@') != -1 + + full_path = os.path.join(self.base_dir, + 'out_going_%s.txt' % make_id()) + out_file = open(full_path, 'wb') + try: + out_file.write(MSG_TEMPLATE % (sender, + msg_tuple[1], + msg_tuple[2], + msg_tuple[3])) + time.sleep(0.25) # Hack to make sure that modtimes are unique. + finally: + out_file.close() + + # OK to bend the rules a little for testing stubs. + def recv_msgs(self, dummy_server, msg_sink, groups, + max_articles=None, dummy_send_quit=False): + #print "FMSStub.recv_msgs -- called" + assert not max_articles is None + assert tuple(groups) == (self.group, ) + if not self.group in max_articles: + max_articles[self.group] = 0 + + by_mtime = {} + for name in os.listdir(self.base_dir): + #print name + mod_time = os.stat(os.path.join(self.base_dir, + name))[stat.ST_MTIME] + assert not mod_time in by_mtime + by_mtime[mod_time] = name + + if len(by_mtime) < 1: + #print "BAILING OUT, no files." + return + + times = by_mtime.keys() + times.sort() + if times[-1] <= max_articles[self.group]: + #print "BAILING OUT, no new files." + return + + for mod_time in times: + if mod_time <= max_articles[self.group]: + #print "Skipping, ", mod_time + continue + max_articles[self.group] = max(max_articles[self.group], mod_time) + items = read_msg(os.path.join(self.base_dir, by_mtime[mod_time]), + 'djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks', + 'unknown_subject', + self.group) + if items[-1] != self.group: + continue + + if not msg_sink.wants_msg(self.group, items): + print "fmsstub: Rejected by sink: %s" % by_mtime[mod_time] + continue + + msg_sink.recv_fms_msg(self.group, items, items[-2]) diff --git a/infocalypse/hgoverlay.py b/infocalypse/hgoverlay.py new file mode 100644 --- /dev/null +++ b/infocalypse/hgoverlay.py @@ -0,0 +1,171 @@ +""" An IFileFunctions subclass which reads files from a particular version of + an hg repo. + + Copyright (C) 2009 Darrell Karbott + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2.0 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks +""" + + +import os +from mercurial import cmdutil + +from pathhacks import add_parallel_sys_path +add_parallel_sys_path('fniki') +from fileoverlay import OverlayedFiles, DirectFiles, WIKITEXT_ENCODING + +# ATTRIBUTION: Pillaged from commands.cat() in the hg source. +def get_hg_file(repo, file_name, rev, tmp_file_name, dump_to_file = False): + """ INTERNAL: read a file from the hg repo. + If dump_to_file, the data is written into tmp_file_name. + Otherwise, the data is returned and tmp_file_name is deleted. + """ + #print "get_hg_file -- ", file_name, rev + + file_name = os.path.join(repo.root, file_name) + ctx = repo[rev] + bytes = None + err = True + matches = cmdutil.match(repo, (file_name,)) + for abs_ in ctx.walk(matches): + assert err # Wacky. Why are we looping again? + # REDFLAG: ripped out decode code. Will I need that on windows? + file_ptr = None # Hmmmm starting to look like crappy Java code :-( + in_file = None + try: + file_ptr = cmdutil.make_file(repo, tmp_file_name, ctx.node(), + pathname=abs_) + file_ptr.write(ctx[abs_].data()) + file_ptr.close() + file_ptr = None + if not dump_to_file: + in_file = open(tmp_file_name) + bytes = in_file.read() + finally: + if file_ptr: + file_ptr.close() + if in_file: + in_file.close() + if not dump_to_file and os.path.exists(tmp_file_name): + os.remove(tmp_file_name) + + err = False + if err: + raise KeyError("File: %s doesn't exist in version: %s" \ + % (file_name, rev)) + if dump_to_file: + return "The data was written into: %s" % tmp_file_name + + return bytes + + +class HgFileOverlay(OverlayedFiles): + """ An IFileOverlay that reads files from a mercurial revision.""" + def __init__(self, ui_, repo, base_dir, tmp_file): + OverlayedFiles.__init__(self, os.path.join(repo.root, base_dir)) + self.base_dir = base_dir # i.e. root wrt repo + self.ui_ = ui_ + self.repo = repo + self.version = 'tip' + self.tmp_file = tmp_file + + def repo_path(self, path): + """ Return path w.r.t. the repository root. """ + path = os.path.abspath(path) + assert path.startswith(self.base_path) + assert path.startswith(self.repo.root) + + rest = path[len(self.repo.root):] + if rest.startswith(os.sep): + rest = rest[len(os.sep):] + + return rest + + def repo_pages(self, path): + """ INTERNAL: Enumerate files in a repo subdirectory. """ + if not path.endswith('wikitext'): + raise ValueError("Dunno how to enumerate wikitext pages from: %s" + % path) + wikitext_dir = self.repo_path(path) + # Hmmmm... won't work for files in root. use -1? + return tuple([os.path.split(name)[1] for name in + self.repo.changectx(self.version). + manifest().keys() if name.startswith(wikitext_dir)]) + + def exists_in_repo(self, path): + """ INTERNAL: Return True if the file exists in the repo, + False otherwise. """ + return (self.repo_path(path) in + self.repo.changectx(self.version).manifest()) + + def read(self, path, mode='rb', non_overlayed=False): + """ Read a file. """ + if non_overlayed: + return unicode( + get_hg_file(self.repo, self.repo_path(path), + self.version, self.tmp_file), + WIKITEXT_ENCODING) + overlayed = self.overlay_path(path) + if os.path.exists(overlayed): + return DirectFiles.read(self, overlayed, mode) + + return unicode(get_hg_file(self.repo, self.repo_path(path), + self.version, self.tmp_file), + WIKITEXT_ENCODING) + + def exists(self, path, non_overlayed=False): + """ Return True if the file exists, False otherwise. """ + if non_overlayed: + return self.exists_in_repo(path) + + overlay = self.overlay_path(path) + if os.path.exists(overlay): + if os.path.getsize(overlay) == 0: + return False + else: + return True + + return self.exists_in_repo(path) + + def modtime(self, path, non_overlayed=False): + """ Return the modtime for the file.""" + if non_overlayed: + # Hmmm commit time for changeset, not file. Good enough. + return int(self.repo.changectx(self.version).date()[0]) + + overlay = self.overlay_path(path) + if os.path.exists(overlay) and os.path.getsize(overlay) > 0: + return DirectFiles.modtime(self, overlay) + + return int(self.repo.changectx(self.version).date()[0]) + + def list_pages(self, path, non_overlayed=False): + """ IFileFunctions implementation. """ + if non_overlayed: + return self.repo_pages() + + overlay_pages = set([]) + overlay = self.overlay_path(path) + if os.path.exists(overlay): + overlay_pages = set(DirectFiles.list_pages(self, overlay)) + + deleted = set([]) + for name in overlay_pages: + if os.path.getsize(os.path.join(overlay, name)) == 0: + deleted.add(name) + + return list(overlay_pages.union(set(self.repo_pages(path)) - deleted)) diff --git a/infocalypse/infcmds.py b/infocalypse/infcmds.py --- a/infocalypse/infcmds.py +++ b/infocalypse/infcmds.py @@ -43,9 +43,9 @@ from bundlecache import BundleCache, is_ from updatesm import UpdateStateMachine, QUIESCENT, FINISHING, REQUESTING_URI, \ REQUESTING_GRAPH, REQUESTING_BUNDLES, INVERTING_URI, \ REQUESTING_URI_4_INSERT, INSERTING_BUNDLES, INSERTING_GRAPH, \ - INSERTING_URI, FAILING, REQUESTING_URI_4_COPY, CANCELING, \ + INSERTING_URI, FAILING, REQUESTING_URI_4_COPY, \ REQUIRES_GRAPH_4_HEADS, REQUESTING_GRAPH_4_HEADS, \ - RUNNING_SINGLE_REQUEST, CleaningUp, UpdateContext + RUNNING_SINGLE_REQUEST, UpdateContext from archivesm import ArchiveStateMachine, ArchiveUpdateContext @@ -61,14 +61,17 @@ DEFAULT_PARAMS = { 'PriorityClass':1, 'DontCompress':True, # hg bundles are already compressed. 'Verbosity':1023, # MUST set this to get progress messages. - #'GetCHKOnly':True, # REDFLAG: DCI! remove + #'GetCHKOnly':True, # REDFLAG: For testing only. Not sure this still works. # Non-FCP stuff 'N_CONCURRENT':4, # Maximum number of concurrent FCP requests. 'CANCEL_TIME_SECS': 10 * 60, # Bound request time. 'POLL_SECS':0.25, # Time to sleep in the polling loop. + + # Testing HACKs #'TEST_DISABLE_GRAPH': True, # Disable reading the graph. #'TEST_DISABLE_UPDATES': True, # Don't update info in the top key. + #'MSG_SPOOL_DIR':'/tmp/fake_msgs', # Stub out fms } MSG_TABLE = {(QUIESCENT, REQUESTING_URI_4_INSERT) @@ -128,14 +131,6 @@ class UICallbacks: def monitor_callback(self, update_sm, client, msg): """ FCP message status callback which writes to a ui. """ - # REDFLAG: remove when 1209 comes out. - if (msg[0] == 'PutFailed' and get_code(msg) == 9 and - update_sm.params['FREENET_BUILD'] == '1208' and - update_sm.ctx.get('REINSERT', 0) > 0): - self.ui_.warn('There is a KNOWN BUG in 1208 which ' - + 'causes code==9 failures for re-inserts.\n' - + 'The re-insert might actually have succeeded.\n' - + 'Who knows???\n') if self.verbosity < 2: return @@ -263,33 +258,20 @@ def check_uri(ui_, uri): + "\nUse --aggressive instead. \n") raise util.Abort("Negative USK %s\n" % uri) - -def disable_cancel(updatesm, disable=True): - """ INTERNAL: Hack to work around 1208 cancel kills FCP connection bug. """ - if disable: - if not hasattr(updatesm.runner, 'old_cancel_request'): - updatesm.runner.old_cancel_request = updatesm.runner.cancel_request - msg = ("RequestRunner.cancel_request() disabled to work around " - + "1208 bug\n") - updatesm.runner.cancel_request = ( - lambda dummy : updatesm.ctx.ui_.status(msg)) - else: - if hasattr(updatesm.runner, 'old_cancel_request'): - updatesm.runner.cancel_request = updatesm.runner.old_cancel_request - updatesm.ctx.ui_.status("Re-enabled canceling so that " - + "shutdown works.\n") -class PatchedCleaningUp(CleaningUp): - """ INTERNAL: 1208 bug work around to re-enable canceling. """ - def __init__(self, parent, name, finished_state): - CleaningUp.__init__(self, parent, name, finished_state) - - def enter(self, from_state): - """ Override to back out 1208 cancel hack. """ - disable_cancel(self.parent, False) - CleaningUp.enter(self, from_state) +def set_debug_vars(verbosity, params): + """ Set debug dumping switch variables based on verbosity. """ + if verbosity > 2 and params.get('DUMP_GRAPH', None) is None: + params['DUMP_GRAPH'] = True + if verbosity > 3 and params.get('DUMP_UPDATE_EDGES', None) is None: + params['DUMP_UPDATE_EDGES'] = True + if verbosity > 4 and params.get('DUMP_CANONICAL_PATHS', None) is None: + params['DUMP_CANONICAL_PATHS'] = True + if verbosity > 4 and params.get('DUMP_URIS', None) is None: + params['DUMP_URIS'] = True + if verbosity > 4 and params.get('DUMP_TOP_KEY', None) is None: + params['DUMP_TOP_KEY'] = True # REDFLAG: remove store_cfg -# DCI: retest! esp. infocalypse stuff def setup(ui_, repo, params, stored_cfg): """ INTERNAL: Setup to run an Infocalypse extension command. """ # REDFLAG: choose another name. Confusion w/ fcp param @@ -304,21 +286,13 @@ def setup(ui_, repo, params, stored_cfg) % stored_cfg.defaults['TMP_DIR']) verbosity = params.get('VERBOSITY', 1) - if verbosity > 2 and params.get('DUMP_GRAPH', None) is None: - params['DUMP_GRAPH'] = True - if verbosity > 3 and params.get('DUMP_UPDATE_EDGES', None) is None: - params['DUMP_UPDATE_EDGES'] = True - if verbosity > 4 and params.get('DUMP_CANONICAL_PATHS', None) is None: - params['DUMP_CANONICAL_PATHS'] = True - if verbosity > 4 and params.get('DUMP_URIS', None) is None: - params['DUMP_URIS'] = True - if verbosity > 4 and params.get('DUMP_TOP_KEY', None) is None: - params['DUMP_TOP_KEY'] = True + set_debug_vars(verbosity, params) callbacks = UICallbacks(ui_) callbacks.verbosity = verbosity if not repo is None: + # BUG:? shouldn't this be reading TMP_DIR from stored_cfg cache = BundleCache(repo, ui_, params['TMP_DIR']) try: @@ -356,23 +330,6 @@ def setup(ui_, repo, params, stored_cfg) # Modify only after copy. update_sm.params['FREENET_BUILD'] = runner.connection.node_hello[1]['Build'] - # REDFLAG: Hack to work around 1208 cancel bug. Remove. - if update_sm.params['FREENET_BUILD'] == '1208': - ui_.warn("DISABLING request canceling to work around 1208 FCP bug.\n" - "This may cause requests to hang. :-(\n\n") - disable_cancel(update_sm) - - # Patch state machine to re-enable canceling on shutdown. - #CANCELING:CleaningUp(self, CANCELING, QUIESCENT), - #FAILING:CleaningUp(self, FAILING, QUIESCENT), - #FINISHING:CleaningUp(self, FINISHING, QUIESCENT), - update_sm.states[CANCELING] = PatchedCleaningUp(update_sm, - CANCELING, QUIESCENT) - update_sm.states[FAILING] = PatchedCleaningUp(update_sm, - FAILING, QUIESCENT) - update_sm.states[FINISHING] = PatchedCleaningUp(update_sm, - FINISHING, QUIESCENT) - return update_sm def run_until_quiescent(update_sm, poll_secs, close_socket=True): @@ -416,7 +373,6 @@ def cleanup(update_sm): if not update_sm.runner is None: update_sm.runner.connection.close() - # DCI: what will force cleanup of archive temp files? if not update_sm.ctx.bundle_cache is None: update_sm.ctx.bundle_cache.remove_files() @@ -674,7 +630,10 @@ def execute_push(ui_, repo, params, stor '\n'.join(update_sm.get_state(INSERTING_URI). get_request_uris())) else: - ui_.status("Push failed.\n") + extra = '' + if update_sm.ctx.get('UP_TO_DATE', False): + extra = '. Local changes already in Freenet' + ui_.status("Push failed%s.\n" % extra) handle_updating_config(repo, update_sm, params, stored_cfg) finally: diff --git a/infocalypse/insertingbundles.py b/infocalypse/insertingbundles.py --- a/infocalypse/insertingbundles.py +++ b/infocalypse/insertingbundles.py @@ -97,6 +97,9 @@ class InsertingBundles(RequestQueueState self.set_new_edges(graph) except UpToDate, err: # REDFLAG: Later, add FORCE_INSERT parameter? + # REDFLAG: rework UpToDate exception to include versions, stuff + # versions in ctx? + self.parent.ctx['UP_TO_DATE'] = True self.parent.ctx.ui_.warn(str(err) + '\n') # Hmmm self.parent.transition(FAILING) # Hmmm... hard coded state name return @@ -261,6 +264,7 @@ class InsertingBundles(RequestQueueState def _check_new_edges(self, msg): """ INTERNAL: Helper function to raise if new_edges is empty. """ if len(self.new_edges) == 0: + self.parent.ctx['UP_TO_DATE'] = True raise UpToDate(msg) def set_new_edges(self, graph): diff --git a/infocalypse/requestqueue.py b/infocalypse/requestqueue.py --- a/infocalypse/requestqueue.py +++ b/infocalypse/requestqueue.py @@ -35,6 +35,7 @@ class QueueableRequest(MinimalClient): self.message_callback = None # set by RequestRunner # The time after which this request should be canceled. self.cancel_time_secs = None # RequestQueue.next_request() MUST set this + self.custom_data_source = None class RequestRunner: """ Class to run requests scheduled on one or more RequestQueues. """ @@ -110,8 +111,8 @@ class RequestRunner: assert client.queue == self.request_queues[self.index] client.in_params.async = True client.message_callback = self.msg_callback - self.running[self.connection.start_request(client)] \ - = client + self.running[self.connection.start_request( + client, client.custom_data_source)] = client else: idle_queues += 1 self.index = (self.index + 1) % len(self.request_queues) diff --git a/infocalypse/run_wikibot.py b/infocalypse/run_wikibot.py new file mode 100644 --- /dev/null +++ b/infocalypse/run_wikibot.py @@ -0,0 +1,205 @@ +""" Set up and run a single wikibot instance. + + Copyright (C) 2009 Darrell Karbott + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2.0 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks +""" + +import os +from ConfigParser import ConfigParser + +from fcpclient import FCPClient, get_usk_hash +from fcpconnection import FCPConnection, PolledSocket +from requestqueue import RequestRunner +from bundlecache import is_writable + +from fmsstub import FMSStub + +from fmsbot import FMSBotRunner, run_event_loops +from wikibot import WikiBot + + +def read_fnwiki_cfg(cfg_file): + """ Quick and dirty helper w/o hg deps. to read cfg file.""" + parser = ConfigParser() + parser.read(cfg_file) + if not parser.has_section('default'): + raise IOError("Can't read default section of config file?") + + # Hmmm some param key strings are different than config.py. + return {'WIKI_ROOT':parser.get('default', 'wiki_root'), + 'SITE_NAME':parser.get('default', 'site_name'), + 'SITE_DEFAULT_FILE':parser.get('default', 'default_file'), + 'FMS_GROUP':parser.get('default', 'wiki_group'), + # Only the part before '@'. + 'FMS_ID':parser.get('default', 'wiki_server_id').split('@')[0], + 'WIKI_REPO_USK':parser.get('default', 'wiki_repo_usk')} + +# LATER: load from a config file +def get_params(): + """ Return the parameters to run a WikiBot. """ + + # Directory containing all bot related stuff. + base_dir = '/tmp/wikibots' + + # File containing the private SSK key. + key_file_fmt = key_file = '~/wikibot_key_%s.txt' + + # FCP info + fcp_host = '127.0.0.1' + fcp_port = 9481 + + # FMS info + fms_host = '127.0.0.1' + fms_port = 1119 + # NOTE: fms id for bot is read from fnwiki.cfg. + + # Latest known repo usk index + index_hint = 0 + + # vebosity of logging output (NOT FCP 'Verbosity') + verbosity = 5 + + # MUST exist + tmp_dir = os.path.join(base_dir, '__wikibot_tmp__') + # MUST exist and contain wikitext hg repo. + repo_dir = os.path.join(base_dir, 'hgrepo') + # MUST exist + bot_storage_dir = os.path.join(base_dir, 'bot_storage') + + #----------------------------------------------------------# + assert is_writable(tmp_dir) + assert os.path.exists(os.path.join(repo_dir, '.hg')) + + params = read_fnwiki_cfg(os.path.join(repo_dir, 'fnwiki.cfg')) + + # MUST contain SSK private key + key_file = key_file_fmt % get_usk_hash(params['WIKI_REPO_USK']) + print "Read insert key from: %s" % key_file + + # Load private key for the repo from a file.. + insert_ssk = open(os.path.expanduser(key_file), 'rb').read().strip() + assert insert_ssk.startswith('SSK@') + # Raw SSK insert key. + insert_ssk = insert_ssk.split('/')[0].strip() + + # Make insert URI from request URI in config file. + human = '/'.join(params['WIKI_REPO_USK'].split('/')[1:]) + insert_uri = 'U' + insert_ssk[1:] + '/' + human + + # Then invert the request_uri from it. + print "Inverting public key from private one..." + request_uri = FCPClient.connect(fcp_host, fcp_port). \ + get_request_uri(insert_uri) + print request_uri + if get_usk_hash(request_uri) != get_usk_hash(params['WIKI_REPO_USK']): + print "The insert SSK doesn't match WIKI_REPO_USK in fnwiki.cfg!" + assert False + + # LATER: Name convention. + # USK@/foo.wikitext.R1/0 -- wiki source + # USK@/foo/0 -- freesite + + #print "Reading latest index from Freenet... This can take minutes." + #index = prefetch_usk(FCPClient.connect(fcp_host, fcp_port), + # request_uri) + #insert_uri = get_usk_for_usk_version(insert_uri, index) + #request_uri = get_usk_for_usk_version(request_uri, index) # needed? + + + # Hmmmm... freesite index is read from 'I_<n>' tags in + # repo. There is no way to set it. + params.update({ + # FCP 2.0 + 'MaxRetries':3, + 'PriorityClass':1, + #'DontCompress':True, + 'Verbosity':1023, # MUST set this to get progress messages. + + # FCPConnection / RequestRunner + 'FCP_HOST':fcp_host, + 'FCP_PORT':fcp_port, + 'FCP_POLL_SECS':0.25, + 'N_CONCURRENT':4, + 'CANCEL_TIME_SECS': 7 * 60, + + # FMSBotRunner + 'FMS_HOST':fms_host, + 'FMS_PORT':fms_port, + 'FMS_POLL_SECS': 3 * 60, + 'BOT_STORAGE_DIR':bot_storage_dir, + + # WikiBot + 'FMS_NOTIFY_GROUP':'infocalypse.notify', # extra group to notify. + 'LATEST_INDEX':index_hint, # Just a hint, it is also stored in shelve db + 'SITE_KEY':insert_ssk, + 'INSERT_URI':insert_uri, + 'REQUEST_URI':request_uri, + 'VERBOSITY':verbosity, + 'TMP_DIR':tmp_dir, + 'NO_SEARCH':False, # REQUIRED + 'USK_HASH':get_usk_hash(request_uri), + 'FNPUSH_COALESCE_SECS':60, # Time to wait before pushing + 'SITE_COALESCE_SECS':60, # Time to wait before inserting. + 'NOTIFY_COALESCE_SECS':60, # Time 2w8b4 sending fms repo update msg + 'COMMIT_COALESCE_SECS':-1, # Hack to force immediate commit + 'FMS_TRUST_CACHE_SECS': 1 * 60 * 60, + 'FMS_MIN_TRUST':55, # peer message trust + 'NONE_TRUST':49, # i.e. disable posting for 'None' peer msg trust + 'REPO_DIR':repo_dir, + + # Only uncomment for testing. + #'MSG_SPOOL_DIR':'/tmp/fake_msgs', + }) + + return params + +def run_wikibot(params): + """ Setup an FMSBotRunner and run a single WikiBot instance in it. """ + + # Setup RequestQueue for FCP requests. + async_socket = PolledSocket(params['FCP_HOST'], params['FCP_PORT']) + request_runner = RequestRunner(FCPConnection(async_socket, True), + params['N_CONCURRENT']) + + # Setup FMSBotRunner to house the WikiBot. + bot_runner = FMSBotRunner(params) + if 'MSG_SPOOL_DIR' in params: + print "READING MESSAGES FROM SPOOL DIR INSTEAD OF FMS!" + + bot_runner.nntp = FMSStub(params['MSG_SPOOL_DIR'], + params['FMS_GROUP']) + + # Install a single WikiBot instance. + wiki_bot = WikiBot('wikibot_' + params['USK_HASH'], + params, request_runner) + bot_runner.register_bot(wiki_bot, (params['FMS_GROUP'], )) + + # Initialize the FMSBotRunner + bot_runner.startup() + + # Run until there's an error on the FCP socket or + # the FMSBotRunner shuts down. + run_event_loops(bot_runner, request_runner, + params['FMS_POLL_SECS'], + params['FCP_POLL_SECS'], + wiki_bot.log) # Hmmm... (ab)use WikiBot log. + + + +if __name__ == "__main__": + run_wikibot(get_params()) diff --git a/infocalypse/sitecmds.py b/infocalypse/sitecmds.py --- a/infocalypse/sitecmds.py +++ b/infocalypse/sitecmds.py @@ -31,10 +31,8 @@ from fcpclient import FCPClient, get_fil # HACK from pathhacks import add_parallel_sys_path add_parallel_sys_path('fniki') - import piki -# REDFLAG: DCI deal with loading hacks for config from config import write_default_config def get_insert_uri(params): @@ -55,7 +53,7 @@ def show_request_uri(ui_, params, uri): request_uri = uri ui_.status('RequestURI:\n%s\n' % request_uri) -def dump_wiki_html(wiki_root, staging_dir): +def dump_wiki_html(wiki_root, staging_dir, overlayed): """ Dump the wiki as flat directory of html. wiki_root is the directory containing the wikitext and www dirs. @@ -72,26 +70,27 @@ def dump_wiki_html(wiki_root, staging_di os.makedirs(staging_dir) - # REDFLAG: DCI, should be piki. - piki.dump(staging_dir, wiki_root) + piki.dump(staging_dir, wiki_root, overlayed) TMP_DUMP_DIR = '_tmp_wiki_html_deletable' # Hmmmm... broken out to appease pylint def do_freenet_insert(ui_, repo, params, insert_uri, progress_func): - """ INTERNAL: Helper does the actual insert. """ + """ INTERNAL: Helper does the actual insert. + + Caller must delete TMP_DUMP_DIR! + """ default_mime_type = "text/plain" # put_complex_dir() default. Hmmmm. if not params['ISWIKI']: site_root = os.path.join(repo.root, params['SITE_DIR']) else: - # REDFLAG: DCI temp file cleanup on exception - # Because wiki html files have no extension to guess from. default_mime_type = 'text/html' ui_.status("Dumping wiki as HTML...\n") site_root = os.path.join(params['TMP_DIR'], TMP_DUMP_DIR) dump_wiki_html(os.path.join(repo.root, params['WIKI_ROOT']), - site_root) + site_root, + params['OVERLAYED']) ui_.status('Default file: %s\n' % params['SITE_DEFAULT_FILE']) ui_.status('Reading files from:\n%s\n' % site_root) diff --git a/infocalypse/submission.py b/infocalypse/submission.py new file mode 100644 --- /dev/null +++ b/infocalypse/submission.py @@ -0,0 +1,913 @@ +""" Functions to bundle and unbundle wiki submission zip files. + + Copyright (C) 2009 Darrell Karbott + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2.0 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks +""" + + +import os +import time +import StringIO + +from mercurial import mdiff +from mercurial import commands +from zipfile import ZipFile +from binascii import hexlify + +from graph import hex_version, has_version +from validate import is_hex_string +from hgoverlay import HgFileOverlay + +from pathhacks import add_parallel_sys_path + +add_parallel_sys_path('wormarc') +from shafunc import new_sha +from binaryrep import NULL_SHA +from deltacoder import compress, decompress + +add_parallel_sys_path('fniki') +from fileoverlay import DirectFiles +from piki import versioned_page_re as WIKINAME_REGEX + +# Reasons submission were rejected. +REJECT_UNKNOWN = 0 # Dunno why submission failed. +REJECT_FCPFAIL = 1 # FCP request for CHK failed +REJECT_NOTRUST = 2 # Not enough trust +REJECT_APPLIED = 3 # Submission was already fully applied. +# Hmmm no longer needed? +REJECT_PARTIAL = 4 # Submission was already partially applied. +REJECT_CONFLICT = 5 # Submission couldn't be applied because of conflict. +REJECT_ILLEGAL = 6 # Submission bundle malformed or illegal. +SENTINEL_VER = '000000000000' + +MAX_INFO_LEN = 1024 # Arbitrary, reasonable bound. + +#----------------------------------------------------------# +CRLF = '\x0d\x0a' +EMPTY_FILE_SHA_HEX = new_sha('').hexdigest() +EMPTY_FILE_SHA = new_sha('').digest() +#----------------------------------------------------------# +# diff / patch helper funcs +# +# LATER: Use unified diffs? +# RESEARCH: No patch in python standard modules? +# 0) http://docs.python.org/library/difflib.html +# can write diffs (context, unified) but doesn't +# read them. +# restore() operates on the entire file rep. ie. not just deltas +# Double check. +# 1) http://code.google.com/p/python-patch/ +# Looks promising, but too new to rely on. +# 2) http://code.google.com/p/google-diff-match-patch/ +# Looks like it would do the trick but forces licensing +# to GPL3 since it is Apache. Double check. +# 3) Mercurial patch/diff. [WINNER] +# Non-standard format, but will get the job done fast. + +# REQUIRE 8-bit strings! + +# RAM! +def make_patch(old_text, new_text): + """ Return a raw patch bytes which transforms old_text into new_text. """ + values = compress(mdiff.textdiff(old_text, new_text)) + if values[0]: + return ''.join(values) + return values[1] + +# RAM! +def apply_patch(old_text, patch): + """ Return raw new file bytes by applying patch to old_text. """ + return mdiff.patches(old_text, + [decompress(patch)]) +#----------------------------------------------------------# +# Returns a unicode string. +def unicode_apply_patch(old_text, patch, updated_sha, name): + """ Helper wrapper around apply_patch() which takes a unicode string + for old_text. + + raises a SubmitError if the SHA1 of the patched text != updated_sha. """ + ret = apply_patch(old_text.encode('utf8'), patch) + if new_sha(ret).digest() != updated_sha: + raise SubmitError("Patch failed to validate: %s" % + name, True) + return ret.decode('utf8') + +# Returns an 8-bit string. +def unicode_make_patch(old_text, new_text): + """ Helper wrapper around make_patch() which takes unicode strings.""" + values = compress(mdiff.textdiff(old_text.encode('utf8'), + new_text.encode('utf8'))) + if values[0]: + return ''.join(values) + + return values[1] + +def utf8_sha(unicode_text): + """ Return a SHA1 hash instance for the utf8 8-bit string rep + of unicode_text.""" + return new_sha(unicode_text.encode('utf8')) + +class SubmitError(Exception): + """ Exception used to indicate failure by bundle_wikitext and + unbundle_wikitext. """ + def __init__(self, msg, illegal=False): + Exception.__init__(self, msg, illegal) + self.illegal = illegal + +class NoChangesError(SubmitError): + """ Exception to indicate that there are no local + changes to be submitted. """ + def __init__(self): + SubmitError. __init__(self, "No changes found." , False) + +def pack_info(version, submitter): + """ INTERNAL: Validate and pack __INFO__ contents into 7-bit ASCII. """ + try: + submitter.encode('ascii') + except UnicodeError: + raise SubmitError("Non-ASCII characters in submitter name: %s" % + repr(submitter), True) + if not is_hex_string(version, 40): + raise SubmitError("Version isn't a 40 digit hex string: %s" % + repr(version), True) + try: + ret = ("%s\n%s\n" % (version, submitter)).encode('ascii') + except UnicodeError: + # Impossible? + raise SubmitError("Unexpected error packing info???", True) + + if len(ret) > MAX_INFO_LEN: + raise SubmitError("Info file too big.", True) + + return ret + +def unpack_info(info_text): + """ INTERNAL: Validate and unpack __INFO__ contents from 7-bit ASCII. """ + try: + info_text = info_text.decode('ascii') + except UnicodeError: + raise SubmitError("Non-ASCII characters in info file.", True) + + if len(info_text) > MAX_INFO_LEN: + raise SubmitError("Info file too big.", True) + + fields = info_text.splitlines(False) + if len(fields) != 2: + raise SubmitError("Format error in info file.", True) + + if not is_hex_string(fields[0], 40): + raise SubmitError("Version isn't a 40 digit hex string: %s" % + repr(fields[0]), True) + # Hmmmm... empty submitter is ok + return fields[0], fields[1].strip() + +def get_read_only_list(overlay): + """ Helper reads the 'readonly.txt' list of locked page names. """ + full_path = os.path.join(overlay.base_path, 'readonly.txt') + if not os.path.exists(full_path): + return frozenset([]) + + in_file = open(full_path, 'rb') + try: + return frozenset([value.strip() + for value in in_file.read().splitlines()]) + finally: + in_file.close() + +# RAM +def bundle_wikitext(overlay, version, submitter): + """ Return raw zipfile bytes containing the overlayed wiki changes + in the overlay_base dir. """ + + assert overlay.is_overlayed() + + # Catch bad wikitext. + validate_wikitext(overlay) + + + wiki_text = os.path.join(overlay.base_path, 'wikitext') + + names = (set(overlay.list_pages(wiki_text)). + union(overlay.list_pages(wiki_text, True))) + + illegal_writes = names.intersection(get_read_only_list(overlay)) + if len(illegal_writes) > 0: + raise SubmitError("Can't modify read only page(s): %s" % + ','.join(illegal_writes), True) + + # Catch illegal names. + for name in names: + if not WIKINAME_REGEX.match(name): + raise SubmitError("File name is not a WikiWord: %s" % name, True) + page_ver = WIKINAME_REGEX.match(name).group('version') + if not page_ver: + continue + if not overlay.exists(os.path.join(wiki_text, name), True): + raise SubmitError("Forked page doesn't exist in base version: %s" \ + % name, True) + # Catch unresolved merges. + check_merges([name for name in names + if overlay.has_overlay(os.path.join(wiki_text, name))], + names, + OverlayHasher(overlay).hexdigest) + + buf = StringIO.StringIO() + arch = ZipFile(buf, 'w') + assert version + arch.writestr('__INFO__', pack_info(version, submitter)) + count = 0 + for name in names: + full_path = os.path.join(wiki_text, name) + if not overlay.has_overlay(full_path): + # has_overlay is True for locally deleted files. + continue + + if not overlay.exists(full_path, True): + original_sha = NULL_SHA + original_raw = '' + else: + # Compute SHA1 of original file. + original_raw = overlay.read(full_path, 'rb', True) + original_sha = utf8_sha(original_raw).digest() + + new_raw = overlay.read(full_path, 'rb') + if new_raw == original_raw: + # Don't bundle changes which are already in the repo + # even if we have a copy of them in the overlay + # directory. + continue + + # Make patch. + delta = unicode_make_patch(original_raw, new_raw) + + # REDFLAG: BLOAT. Worth 40 bytes / file ??? + # Prepend old and new SHA1 to patch so we will know if we + # are trying to patch against the wrong file or patch + # a file that has already been patched. + delta = original_sha + utf8_sha(new_raw).digest() + delta + arch.writestr(name, delta) + count += 1 + arch.close() + if count < 1: + raise NoChangesError() + return buf.getvalue() + +# (version, submitter) +def get_info(in_stream): + """ Return the version and submitter strings from zipfile byte stream. """ + arch = ZipFile(in_stream, 'r') + try: + return unpack_info(arch.read('__INFO__')) + finally: + arch.close() + + + +# 0 created +# 1 modified +# 2 removed +# 3 Already applied +def extract_wikitext(arch, overlay, name): + """ Helper to simplify unbundle_wikitext. """ + ret = -1 + raw_delta = checked_read_delta(arch, name) + #print "NAME: %s, raw len: %i" % (name, len(raw_delta)) + base_sha = raw_delta[:20] + updated_sha = raw_delta[20:40] + raw_delta = raw_delta[40:] + #print "base: %s, new: %s remaining: %i" % (hexlify(base_sha)[:12], + # hexlify(updated_sha)[:12], + # len(raw_delta)) + full_path = os.path.join(os.path.join(overlay.base_path, 'wikitext'), + name) + + if base_sha == NULL_SHA: + # New file. + if overlay.exists(full_path): + if utf8_sha(overlay.read(full_path, 'rb')).digest() == updated_sha: + return 3 # Already patched. + raise SubmitError("New file already exists: %s" % name) + raw_a = '' + ret = 0 + else: + #print "OVERLAYED: ", overlay.overlay_path(full_path) + if not overlay.exists(full_path): + if updated_sha == EMPTY_FILE_SHA: + return 3 # Already patched. + raise SubmitError("Base file doesn't exist: %s" % name) + raw_a = overlay.read(full_path, 'rb') + tmp_sha = utf8_sha(raw_a).digest() + if tmp_sha == updated_sha: + return 3 # Already patched. + if not tmp_sha == base_sha: + # Hmmmm... windows vs. *nix line terminators? + raise SubmitError("Base file SHA1 hash failure: %s" % name) + ret = 1 + #print "Extracting: %s [%s] " % (name, hexlify(base_sha)[:12]) + #print "ORIGINAL:" + #print repr(raw_a) + #print "PATCH:" + #print repr(raw_delta) + + raw_file = unicode_apply_patch(raw_a, raw_delta, updated_sha, name) + + #print "PATCHED:" + #print repr(raw_file) + + if len(raw_file) == 0: + # HACK. len == 0 => delete + ret = 2 + if not overlay.is_overlayed(): + os.remove(full_path) + return ret + + overlay.write(full_path, raw_file, 'wb') + + return ret + + +def raise_if_not_merging(is_merging, msg): + """ INTERNAL: Helper to raise a SubmitError when not merging.""" + if not is_merging: + raise SubmitError(msg) + +def handle_conflict(head, full_path, name, bytes, updated_sha): + """ INTERNAL: Helper to deal with conflicting merges. """ + assert full_path.endswith(name) + versioned_name = "%s_%s" % (name, hexlify(updated_sha)) + # REDFLAG: LATER: explict hg copy to minimize repo size? + head.write(os.path.join(os.path.split(full_path)[0], + versioned_name), + bytes, 'wb') + return versioned_name + +def checked_read_delta(arch, name): + """ INTERNAL: Read a raw delta from an archive.""" + raw_delta = arch.read(name) + if len(raw_delta) < 40: + raise SubmitError("<40 bytes: %s" % name, True) + return raw_delta +# DCI: BUG: Don't fork if final version == current version. i.e. already applied +# bug from a different base version. +def forking_extract_wikitext(arch, overlay, head, name): + """ Helper function used by merge_wikitext() to merge a single + file. """ + assert not overlay is None + assert not head is None + assert not head == overlay + ret = -1 + raw_delta = checked_read_delta(arch, name) + + #print "NAME: %s, raw len: %i" % (name, len(raw_delta)) + base_sha = raw_delta[:20] + updated_sha = raw_delta[20:40] + raw_delta = raw_delta[40:] + #print "base: %s, new: %s remaining: %i" % (hexlify(base_sha)[:12], + # hexlify(updated_sha)[:12], + # len(raw_delta)) + full_path = os.path.join(os.path.join(overlay.base_path, 'wikitext'), + name) + + if base_sha == NULL_SHA: + # New file. + if overlay.exists(full_path): + # ILLEGAL. + raise SubmitError("New file already exists in base version: %s" + % name, True) + if head.exists(full_path): + # CONFLICT. + # Create a versioned conflict file because the file the + # submitter wants to create already exists in the repo. + raw_file = unicode_apply_patch('', raw_delta, updated_sha, name) + # Wrote conflicting version. + return 4, handle_conflict(head, full_path, + name, raw_file, updated_sha) + + raw_a = '' + ret = 0 + else: + #print "OVERLAYED: ", overlay.overlay_path(full_path) + if not overlay.exists(full_path): + # ILLEGAL + raise SubmitError("Base file doesn't exist in base version: %s" % + name, True) + + if not head.exists(full_path): + if updated_sha == EMPTY_FILE_SHA: + return 3, name # Already patched. + + # CONFLICT + # Create a versioned conflict file because the file the + # submitter wants to modify already was deleted from + # the repo. + # + # Patch against the SUBMITTER'S version! + raw_file = unicode_apply_patch(overlay.read(full_path, 'rb'), + raw_delta, updated_sha, name) + return 4, handle_conflict(head, full_path, + name, raw_file, updated_sha) + + raw_a = overlay.read(full_path, 'rb') + tmp_sha = utf8_sha(raw_a).digest() + if not tmp_sha == base_sha: + # ILLEGAL + raise SubmitError(("Base file SHA1 hash failure against base " + + "version: %s") % name, True) + head_sha = utf8_sha(head.read(full_path)).digest() + if head_sha != tmp_sha: + # CONFLICT + # Create a versioned conflict file because the file the + # submitter wants to modify already was modified in the repo. + # Patch against the SUBMITTER'S version! + raw_file = unicode_apply_patch(raw_a, raw_delta, updated_sha, name) + return 4, handle_conflict(head, full_path, name, + raw_file, updated_sha) + + if tmp_sha == updated_sha: + return 3, name # Already patched. + + ret = 1 + #print "Extracting: %s [%s] " % (name, hexlify(base_sha)[:12]) + #print "ORIGINAL:" + #print repr(raw_a) + #print "PATCH:" + #print repr(raw_delta) + + raw_file = unicode_apply_patch(raw_a, raw_delta, updated_sha, name) + + #print "PATCHED:" + #print repr(raw_file) + + if len(raw_file) == 0: + # HACK. len == 0 => delete + ret = 2 + if not head.is_overlayed(): + os.remove(full_path) + return ret, name + + head.write(full_path, raw_file, 'wb') + + return ret, name + +# Hmmm ugly code duplication, but we want to fail +# WITHOUT writing if any update fails. +def check_base_shas(arch, overlay): + """ Helper to simplify unbundle_wikitext. """ + for name in arch.namelist(): + #print "CHECKING NAME: ", name + if name == '__INFO__': + continue + if not WIKINAME_REGEX.match(name): + raise SubmitError("File name is not a WikiWord: %s" % name, True) + + raw_delta = arch.read(name) + base_sha = raw_delta[:20] + updated_sha = raw_delta[20:40] + full_path = os.path.join(os.path.join(overlay.base_path, 'wikitext'), + name) + if base_sha == NULL_SHA: + # New file. + if overlay.exists(full_path): + if (utf8_sha(overlay.read(full_path, 'rb')).digest() + == updated_sha): + continue + raise SubmitError("New file already exists: %s" % name) + else: + if not overlay.exists(full_path): + if updated_sha == EMPTY_FILE_SHA: + continue + raise SubmitError("Base file doesn't exist(1): %s [%s]" % + (name, full_path)) + raw_a = overlay.read(full_path, 'rb') + tmp_sha = utf8_sha(raw_a).digest() + if tmp_sha == updated_sha: + continue + if not tmp_sha == base_sha: + # Hmmmm... windows vs. *nix line terminators? + raise SubmitError("Base file SHA1 hash failure(1): %s" % name) + +def check_writable(overlay, arch): + """ Helper raises SubmitError if any pages in the zip are read only. """ + names = set([]) + for name in arch.namelist(): + match = WIKINAME_REGEX.match(name) + if not match: + continue + names.add(match.group('wikiword')) + + illegal_writes = names.intersection(get_read_only_list(overlay)) + + if len(illegal_writes) > 0: + raise SubmitError("Attempt to modify read only page(s): %s" % + ','.join(illegal_writes), True) +# REDFLAG: get rid of required_* args? +# LATER: get_version_func(name, version) +# target_name is subdir i.e. wikitext +def unbundle_wikitext(overlay, in_stream, + required_version = None, + required_submitter = None): + """ Unbundle a wiki submission bundle from a zipfile byte stream. + """ + + wiki_text = os.path.join(overlay.base_path, 'wikitext') + if not os.path.exists(overlay.overlay_path(wiki_text)): + os.makedirs(overlay.overlay_path(wiki_text)) + # created, modified, removed, skipped + op_lut = (set([]), set([]), set([]), set([])) + arch = ZipFile(in_stream, 'r') + try: + base_ver, submitter = unpack_info(arch.read('__INFO__')) + if not required_version is None and required_version != base_ver: + raise SubmitError("Expected version: %s, got: %s" % + (required_version[:12], base_ver[:12])) + if not required_submitter is None and submitter != required_submitter: + raise SubmitError("Expected submitter: %s, got: %s" % \ + (required_submitter, submitter)) + if required_version is None: + check_base_shas(arch, overlay) + + check_writable(overlay, arch) + + for name in arch.namelist(): + if name == "__INFO__": + continue + if not WIKINAME_REGEX.match(name): + raise SubmitError("File name is not a WikiWord: %s" % + name, True) + action = extract_wikitext(arch, overlay, name) + op_lut[action].add(name) + return op_lut + finally: + arch.close() + +def validate_wikitext_str(raw_text, full_path="unknown_file"): + """ Raises a SubmitError when illegal wikitext is encountered. + + For now, it only checks for DOS line terminators. """ + + if raw_text.find(CRLF) != -1: + raise SubmitError("Saw DOS line terminator: %s" % full_path, + True) + +def validate_wikitext(overlay, non_overlayed=False): + """ Runs the valididate_wikitext_str() function over every + page in the overlay. """ + + path = os.path.join(overlay.base_path, 'wikitext') + for name in overlay.list_pages(path, non_overlayed): + full_path = os.path.join(path, name) + validate_wikitext_str(overlay.read(full_path, 'rb', non_overlayed), + full_path) + +def conflict_table(names): + """ INTERNAL: Make a WikiName -> version map from a list of + 'WikiName_40digithexversion' names. """ + ret = {} + for name in names: + #print "conflict_table -- NAME: ", name + match = WIKINAME_REGEX.match(name) + if not match: + continue + + wiki_word = match.group('wikiword') + version = match.group('version') + + if not version or not wiki_word: # hmmm... not wiki_word??? + continue + + entry = ret.get(wiki_word, set([])) + assert not version in entry # Slow! but list should be short + entry.add(version) + ret[wiki_word] = entry + + return ret + + +class ArchiveHasher: + """ Helper class to get page hexdigests out of submission .zip archives + for check_merges(). """ + + def __init__(self, arch): + self.arch = arch + + def hexdigest(self, versioned_name): + """ Return the hexdigest for the updated page stored in the archive. + + THIS VALUE IS NOT VALIDATED. + + Illegal values will be caught later when apply_patch() fails. + """ + raw_delta = checked_read_delta(self.arch, versioned_name) + return hexlify(raw_delta[20:40]) + +class OverlayHasher: + """ Helper class to get hexdigests of wiki pages from an overlay + for check_merges(). """ + def __init__(self, overlay): + self.overlay = overlay + assert overlay.is_overlayed() + + def hexdigest(self, wiki_name): + """ Return the hexdigest of page with name wiki_name. """ + wikitext_dir = os.path.join(self.overlay.base_path, 'wikitext') + full_path = os.path.join(wikitext_dir, wiki_name) + return utf8_sha(self.overlay.read(full_path)).hexdigest() + +# WHY? Make users *look at* files before deleting them. +# evildoer can just autogenerate versioned delete files +# BUT at least they must know the version they are deleting against. +# will check for full deletion on submit + +# Check: +# o All previous versioned files for any modified file +# deleted. +# o Not adding any versioned files +def check_merges(submitted_pages, all_pages, hexdigest_func): + """ INTERNAL: Raises a SubmitError if the merge constraints + aren't met. """ + #print "SUBMITTED_PAGES: ", submitted_pages + conflicts = conflict_table(all_pages) + resolved = conflict_table(submitted_pages) + for name in submitted_pages: + #print "check_merges -- NAME: ", name + assert WIKINAME_REGEX.match(name) + if name in conflicts: + if resolved.get(name, set([])) != conflicts[name]: + unresolved = set([ver for ver in conflicts[name] + if not ver in resolved.get(name, set([]))]) + + raise SubmitError("Unresolved fork(s): [%s]:%s" % + (WIKINAME_REGEX.match(name).group('wikiword'), + ','.join([ver[:12] for ver in unresolved])), + True) + + for name in resolved: + for version in resolved[name]: + versioned_name = '%s_%s' % (name, version) + if hexdigest_func(versioned_name) != EMPTY_FILE_SHA_HEX: + raise SubmitError("Not deleted!: %s" % versioned_name, + True) + +def merge_wikitext(ui_, repo, base_dir, tmp_file, in_stream): + """ Merge changes from a submission zip file into the + repository. """ + + # HgFileOverlay to read bundle files with. + prev_overlay = HgFileOverlay(ui_, repo, base_dir, tmp_file) + + # Direct overlay to write updates into the repo. + head_overlay = DirectFiles(os.path.join(repo.root, base_dir)) + + arch = ZipFile(in_stream, 'r') + try: + base_ver, dummy = unpack_info(arch.read('__INFO__')) + if not has_version(repo, base_ver): + # REDFLAG: Think. What about 000000000000? + # It is always legal. hmmmm... + raise SubmitError("Base version: %s not in repository." % + base_ver[:12], True) + + # Still need to check for illegal submissions. + prev_overlay.version = base_ver + + # REDFLAG: revisit. + # just assert in forking_extract_wikitext and + # get rid of extra checking / exception raising? + check_base_shas(arch, prev_overlay) + # Hmmmm... checking against a version of readonly.txt + # which may be later than the one that the submitter + # used. + check_writable(head_overlay, arch) + check_merges([name for name in arch.namelist() + if name != '__INFO__'], + # pylint gives spurious E1101 here ??? + #pylint: disable-msg=E1101 + prev_overlay.list_pages(os.path.join(prev_overlay. + base_path, + 'wikitext')), + ArchiveHasher(arch).hexdigest) + + # created, modified, removed, skipped, forked + op_lut = (set([]), set([]), set([]), set([]), set([])) + + for name in arch.namelist(): + # check_base_sha validates wikinames. + if name == "__INFO__": + continue + action, versioned_name = forking_extract_wikitext(arch, + prev_overlay, + head_overlay, + name) + op_lut[action].add(versioned_name) + return op_lut + finally: + arch.close() + + +class ForkingSubmissionHandler: + """ Class which applies submissions to wikitext in an hg repo, creating + version suffixed pages on merge conflicts. """ + def __init__(self): + self.ui_ = None + self.repo = None + self.logger = None + self.base_dir = None # relative wrt self.repo.root + self.notify_needs_commit = lambda :None + self.notify_committed = lambda succeeded:None + + def full_base_path(self): + """ INTERNAL: Returns the full path to the dir which contains the + wikitext dir. """ + return os.path.join(self.repo.root, self.base_dir) + + def apply_submission(self, msg_id, submission_tuple, raw_zip_bytes, + tmp_file): + """ Apply a submission zip bundle. """ + code = REJECT_CONFLICT + try: + self.commit_results(msg_id, submission_tuple, + merge_wikitext(self.ui_, + self.repo, + self.base_dir, + tmp_file, + StringIO.StringIO( + raw_zip_bytes))) + return True + + except SubmitError, err: + self.logger.debug("apply_submission -- err: %s" % str(err)) + + if err.illegal: + self.logger.warn("apply_submission -- ILLEGAL .zip: %s" % + str(submission_tuple)) + code = REJECT_ILLEGAL + + except Exception, err: + self.logger.warn("apply_submission -- ILLEGAL .zip(1): %s" % + str(submission_tuple)) + raise # DCI + self.update_change_log(msg_id, submission_tuple, + code, False) + return False + + # Sets needs commit on failure, but not success. Hmmm... + # Update <wiki_root>/submitted.txt + # Update <wiki_root>/rejected.txt + def update_change_log(self, msg_id, submission_tuple, result=None, + succeeded=False): + """ Update the accepted.txt or rejected.txt change log + based on the results of a submission. """ + self.logger.trace("update_change_log:\n%s\n%s\n%s\n%s" % + (msg_id, submission_tuple, str(result), + str(succeeded))) + + full_path = self.full_base_path() + if succeeded: + full_path = os.path.join(full_path, 'accepted.txt') + out_file = open(full_path, 'ab') + try: + out_file.write("%s:%i:%s:%s\n" % ( SENTINEL_VER, + time.time(), + submission_tuple[0], + submission_tuple[3])) + # Created, modified, removed, skipped, forked + op_lut = ('C', 'M', 'R', '*', 'F') + for index, values in enumerate(result): + if index == 3 or index > 4: + continue # HACK # REDFLAG: change order? + if len(values): + values = list(values) + values.sort() + out_file.write("%s:%s\n" % (op_lut[index], + ':'.join(values))) + finally: + out_file.close() + # Caller is resposible for commiting or setting "needs commit". + return + + # Failed + full_path = os.path.join(full_path, 'rejected.txt') + if result is None: + result = REJECT_UNKNOWN # ??? just assert? + out_file = open(full_path, 'ab') + try: + out_file.write("%s:%i:%s:%s:%i\n" % (hex_version(self.repo)[:12], + time.time(), + submission_tuple[0], + submission_tuple[3], + int(result))) + finally: + out_file.close() + + self.notify_needs_commit() + + + # Internal helper function which is only called immediately after + # a successful commit. + # LATER: also truncate? + # Just commit twice so that you don't have to deal with this? + # i.e. because we have to write the log entry *before* we know the version. + def fixup_accepted_log(self): + """ INTERNAL: Hack to fix the hg version int the accepted.txt log. """ + version = hex_version(self.repo)[:12] # The new tip. + self.logger.debug("fixup_accept_log -- fixing up: %s" % version) + assert len(version) == len(SENTINEL_VER) + + full_path = os.path.join(self.full_base_path(), 'accepted.txt') + in_file = open(full_path, 'rb') + try: + lines = in_file.readlines() + in_file.close() + pos = len(lines) -1 + while pos >= 0: + if not lines[pos].startswith(SENTINEL_VER): + pos -= 1 + continue + lines[pos] = version + lines[pos][len(version):] + break + + assert pos >= 0 + # Replace existing file. + os.remove(full_path) + try: + out_file = open(full_path, 'wb') + out_file.write("".join(lines)) + finally: + out_file.close() + finally: + in_file.close() + + + # DCI: need code to scrub non vcd files? + # DCI: failure cases? + # REDFLAG: LATER: rework ordering of results entries? + # IMPLIES SUCCESS. + def commit_results(self, msg_id, submission_tuple, results): + """ INTERNAL: Commit the results of a submission to the local repo. """ + assert len(results[3]) == 0 + wikitext_dir = os.path.join(self.full_base_path(), 'wikitext') + raised = True + # grrr pylint gives spurious + #pylint: disable-msg=E1101 + self.ui_.pushbuffer() + try: + # hg add new files. + for name in results[0]: + full_path = os.path.join(wikitext_dir, name) + commands.add(self.ui_, self.repo, full_path) + + # hg add fork files + for name in results[4]: + full_path = os.path.join(wikitext_dir, name) + commands.add(self.ui_, self.repo, full_path) + + # hg remove removed files. + for name in results[2]: + full_path = os.path.join(wikitext_dir, name) + commands.remove(self.ui_, self.repo, full_path) + + # Writes to/prunes special file used to generate RemoteChanges. + self.update_change_log(msg_id, submission_tuple, results, True) + + # REDFLAG: LATER, STAKING? later allow third field for staker. + # fms_id|chk + commit_msg = "%s|%s" % (submission_tuple[0], + submission_tuple[3]) + # hg commit + commands.commit(self.ui_, self.repo, + logfile=None, addremove=None, user=None, + date=None, + message=commit_msg) + self.fixup_accepted_log() # Fix version in accepted.txt + self.notify_committed(True) + raised = False + finally: + text = self.ui_.popbuffer() + if raised: + self.logger.debug("commit_results -- popped log:\n%s" % text) + + + def force_commit(self): + """ Force a commit to the repository after failure. """ + self.logger.trace("force_commit -- Commit local changes " + + "after failure.") + commands.commit(self.ui_, self.repo, + logfile=None, addremove=None, user=None, + date=None, + message='F') # Must have failed. + self.notify_committed(False) diff --git a/infocalypse/test_block_redundancy.py b/infocalypse/test_block_redundancy.py --- a/infocalypse/test_block_redundancy.py +++ b/infocalypse/test_block_redundancy.py @@ -125,7 +125,7 @@ class FakeUI: print text class HoldingBlocks(State): - """ State to hold blockd for testing RequestingRedundantBlocks """ + """ State to hold blocks for testing RequestingRedundantBlocks """ def __init__(self, parent, name, next_state): State.__init__(self, parent, name) self.next_state = next_state diff --git a/infocalypse/test_merging.py b/infocalypse/test_merging.py new file mode 100644 --- /dev/null +++ b/infocalypse/test_merging.py @@ -0,0 +1,777 @@ +#pylint: disable-msg=C0111,C0103,R0904,W0201 +import os +import sys +import shutil +import unittest + +from mercurial import ui, hg, commands + +from pathhacks import add_parallel_sys_path +add_parallel_sys_path('wormarc') +from shafunc import new_sha + +add_parallel_sys_path('fniki') +from fileoverlay import get_file_funcs + +from graph import hex_version +from submission import bundle_wikitext, ForkingSubmissionHandler, SubmitError +from hgoverlay import HgFileOverlay + +TEST_BASE = '/tmp' +TEST_ROOT = '__merging_test_run__' +TMP_DIR = "__TMP__" + +# ONLY files from the last test to run! +LEAVE_TEST_DIR = True + +class RepoTests(unittest.TestCase): + def setup_test_dirs(self, base_dir, dir_name): + if not os.path.exists(base_dir): + raise IOError("Base test directory doesn't exist: %s" % base_dir) + + full_path = os.path.join(base_dir, dir_name) + self.test_root = full_path + self.tmp_dir = os.path.join(self.test_root, TMP_DIR) + + if LEAVE_TEST_DIR and os.path.exists(full_path): + print "Cleaning up directory from previous test run..." + self.remove_test_dirs() + + if os.path.exists(full_path): + raise IOError("Test directory exists: %s" % full_path) + + os.makedirs(full_path) + os.makedirs(self.tmp_dir) + + def remove_test_dirs(self): + assert self.test_root.endswith(TEST_ROOT) + if os.path.exists(self.test_root): + shutil.rmtree(self.test_root) + + def make_repo(self, base_dir): + repo_root = os.path.join(self.test_root, base_dir) + if not os.path.exists(repo_root): + os.makedirs(repo_root) + return hg.repository(self.ui_, repo_root, True) + + def clone_repo(self, repo, out_dir, to_rev=None): + #if not to_rev is None: + # to_rev = repo[to_rev] + + return hg.clone(self.ui_, repo, + dest=os.path.join(self.test_root, out_dir), + pull=False, rev=to_rev, update=True, stream=False)[1] + + # DOESN'T REMOVE FILES + def commit_revision(self, repo, raw_files, msg='no comment'): + # DCI: Assert working dir is tip? + manifest = repo['tip'].manifest() + for fname, raw_bytes in raw_files: + full_path = os.path.join(repo.root, fname) + dname = os.path.dirname(full_path) + if dname and not os.path.exists(dname): + print "CREATED: ", dname + os.makedirs(dname) + + out_file = open(full_path, 'wb') + try: + out_file.write(raw_bytes) + finally: + out_file.close() + if not fname in manifest: + commands.add(self.ui_, repo, full_path) + + commands.commit(self.ui_, repo, + logfile=None, addremove=None, user=None, + date=None, + message=msg) + + def commit_deletions(self, repo, file_names, msg='no comment'): + for fname in file_names: + commands.remove(self.ui_, repo, + os.path.join(repo.root, fname)) + commands.commit(self.ui_, repo, + logfile=None, addremove=None, user=None, + date=None, + message=msg) + + @classmethod + def get_write_overlay(cls, repo): + base_dir = os.path.join(repo.root, DEFAULT_WIKI_ROOT) + base_dir = os.path.join(repo.root, base_dir) + filefuncs = get_file_funcs(base_dir, True) + text_dir = os.path.join(base_dir, 'wikitext') + full_path = filefuncs.overlay_path(text_dir) + if not os.path.exists(full_path): + os.makedirs(full_path) + + return filefuncs + + def get_hg_overlay(self, repo): + return HgFileOverlay(self.ui_, repo, + DEFAULT_WIKI_ROOT, + os.path.join(self.tmp_dir, + '_tmp_shared_hg_overlay_tmp')) + + def make_submission_zip(self, repo): + return bundle_wikitext(self.get_write_overlay(repo), + hex_version(repo), + DEFAULT_SUBMITTER) + + def get_applier(self, repo): + ret = ForkingSubmissionHandler() + ret.ui_ = self.ui_ + ret.repo = repo + ret.logger = Logging() + ret.base_dir = DEFAULT_WIKI_ROOT + ret.notify_needs_commit = needs_commit + ret.notify_committed = committed + return ret + + def setUp(self): + self.setup_test_dirs(TEST_BASE, TEST_ROOT) + self.ui_ = ui.ui() + + def tearDown(self): + if not LEAVE_TEST_DIR: + self.remove_test_dirs() + +class Logging: + def __init__(self): + pass + @classmethod + def out(cls, msg): + print msg + def trace(self, msg): + self.out("T:" + str(msg)) + def debug(self, msg): + self.out("D:" + str(msg)) + def warn(self, msg): + self.out("W:" + str(msg)) + +def needs_commit(): + print "NEEDS COMMIT" + +def committed(result): + print "COMMITTED: %s" % str(result) + +DEFAULT_WIKI_ROOT = 'wiki_root' +DEFAULT_SUBMITTER = 'freenetizen@this_is_not_a_real_fms_id' + +class NoConflictTests(RepoTests): + def testrepo(self): + repo = self.make_repo('foobar') + self.commit_revision(repo, (('wiki_root/wikitext/SomePage', + 'This is a page.\n'),), + 'automagically generated test repo.') + cloned = self.clone_repo(repo, 'snarfu') + print "REPO: ", repo.root + print "CLONED: ", cloned.root + + + ############################################################ + # Smoketest create, remove, modify w/o conflict + def test_create_file(self): + # setup the server repository + server_repo = self.make_repo('server') + self.commit_revision(server_repo, + (('wiki_root/wikitext/FrontPage', + 'This the default front page.\n'),), + 'Initial checkin of server repo.') + + # pull the client repo + client_repo = self.clone_repo(server_repo, 'client') + + # get a write overlay for the client repo + overlay = self.get_write_overlay(client_repo) + + page_path = 'wiki_root/wikitext/NewPage' + page_bytes = 'This is my new page.\n\n' + + # write a new file into it. + overlay.write(os.path.join(client_repo.root, + page_path), + page_bytes) + # make a submission bundle + raw_zip_bytes = self.make_submission_zip(client_repo) + + #(fms_id, usk_hash, base_version, chk, length) + msg_id = 'fake_msg_id_000' + submission_tuple = (DEFAULT_SUBMITTER, + '000000000000', + hex_version(server_repo)[:12], + 'CHK@fakechk', + len(raw_zip_bytes)) + + + server_overlay = self.get_hg_overlay(server_repo) + server_overlay.version = hex_version(server_repo) # tip + server_page_path = os.path.join(server_repo.root, page_path) + + self.assertTrue(not server_overlay.exists(server_page_path)) + + # apply the submission bundle to the server repo + self.get_applier(server_repo).apply_submission(msg_id, + submission_tuple, + raw_zip_bytes, + os.path.join( + self.tmp_dir, + '_tmp__applying')) + self.assertTrue(not server_overlay.exists(server_page_path)) + server_overlay.version = hex_version(server_repo) # new tip + self.assertTrue(server_overlay.exists(server_page_path)) + + # check that the versions are the same + self.assertTrue(server_overlay.read(server_page_path) == page_bytes) + + def test_remove_file(self): + # setup the server repository + server_repo = self.make_repo('server') + self.commit_revision(server_repo, + (('wiki_root/wikitext/FrontPage', + 'This the default front page.\n'),), + 'Initial checkin of server repo.') + + # pull the client repo + client_repo = self.clone_repo(server_repo, 'client') + + # get a write overlay for the client repo + overlay = self.get_write_overlay(client_repo) + + page_path = 'wiki_root/wikitext/FrontPage' + page_bytes = '' + + # write a new file into it. + overlay.write(os.path.join(client_repo.root, + page_path), + page_bytes) + # make a submission bundle + raw_zip_bytes = self.make_submission_zip(client_repo) + + #(fms_id, usk_hash, base_version, chk, length) + msg_id = 'fake_msg_id_000' + submission_tuple = (DEFAULT_SUBMITTER, + '000000000000', + hex_version(server_repo)[:12], + 'CHK@fakechk', + len(raw_zip_bytes)) + + + server_overlay = self.get_hg_overlay(server_repo) + server_overlay.version = hex_version(server_repo) # tip + server_page_path = os.path.join(server_repo.root, page_path) + + # Check that the target page exists. + self.assertTrue(server_overlay.exists(server_page_path)) + + # apply the submission bundle to the server repo + self.get_applier(server_repo).apply_submission(msg_id, + submission_tuple, + raw_zip_bytes, + os.path.join( + self.tmp_dir, + '_tmp__applying')) + self.assertTrue(server_overlay.exists(server_page_path)) + server_overlay.version = hex_version(server_repo) # new tip + self.assertTrue(not server_overlay.exists(server_page_path)) + + + def test_modify_file(self): + # setup the server repository + server_repo = self.make_repo('server') + original_page_bytes = 'This the default front page.\n' + self.commit_revision(server_repo, + (('wiki_root/wikitext/FrontPage', + original_page_bytes),), + 'Initial checkin of server repo.') + + # pull the client repo + client_repo = self.clone_repo(server_repo, 'client') + + # get a write overlay for the client repo + overlay = self.get_write_overlay(client_repo) + + page_path = 'wiki_root/wikitext/FrontPage' + page_bytes = original_page_bytes + 'Client changes.\n' + + # write the updated file into it. + overlay.write(os.path.join(client_repo.root, + page_path), + page_bytes) + # make a submission bundle + raw_zip_bytes = self.make_submission_zip(client_repo) + + #(fms_id, usk_hash, base_version, chk, length) + msg_id = 'fake_msg_id_000' + submission_tuple = (DEFAULT_SUBMITTER, + '000000000000', + hex_version(server_repo)[:12], + 'CHK@fakechk', + len(raw_zip_bytes)) + + server_overlay = self.get_hg_overlay(server_repo) + server_overlay.version = hex_version(server_repo) # tip + server_page_path = os.path.join(server_repo.root, page_path) + + self.assertTrue(server_overlay.exists(server_page_path)) + self.assertTrue(server_overlay.read(server_page_path) == + original_page_bytes) + + # apply the submission bundle to the server repo + self.get_applier(server_repo).apply_submission(msg_id, + submission_tuple, + raw_zip_bytes, + os.path.join( + self.tmp_dir, + '_tmp__applying')) + self.assertTrue(server_overlay.exists(server_page_path)) + self.assertTrue(server_overlay.read(server_page_path) == + original_page_bytes) + + server_overlay.version = hex_version(server_repo) # new tip + self.assertTrue(server_overlay.exists(server_page_path)) + + # check that the versions are the same + self.assertTrue(server_overlay.read(server_page_path) == + original_page_bytes + 'Client changes.\n') + + + def test_modify_read_only_file(self): + # setup the server repository + server_repo = self.make_repo('server') + original_page_bytes = 'This the default front page.\n' + self.commit_revision(server_repo, + (('wiki_root/wikitext/FrontPage', + original_page_bytes),), + 'Initial checkin of server repo.') + + # pull the client repo + client_repo = self.clone_repo(server_repo, 'client') + + # get a write overlay for the client repo + overlay = self.get_write_overlay(client_repo) + + page_path = 'wiki_root/wikitext/FrontPage' + page_bytes = original_page_bytes + 'Client changes.\n' + + # write the updated file into it. + overlay.write(os.path.join(client_repo.root, + page_path), + page_bytes) + # make a submission bundle + raw_zip_bytes = self.make_submission_zip(client_repo) + + #(fms_id, usk_hash, base_version, chk, length) + msg_id = 'fake_msg_id_000' + submission_tuple = (DEFAULT_SUBMITTER, + '000000000000', + hex_version(server_repo)[:12], + 'CHK@fakechk', + len(raw_zip_bytes)) + + server_overlay = self.get_hg_overlay(server_repo) + server_overlay.version = hex_version(server_repo) # tip + server_page_path = os.path.join(server_repo.root, page_path) + + self.assertTrue(server_overlay.exists(server_page_path)) + self.assertTrue(server_overlay.read(server_page_path) == + original_page_bytes) + + + + # Add FrontPage to the read only list on the server end. + self.commit_revision(server_repo, + (('wiki_root/readonly.txt', + 'FrontPage\n'),), + 'Make FrontPage read only.') + server_overlay.version = hex_version(server_repo) # tip + + + # apply the submission bundle to the server repo + self.get_applier(server_repo).apply_submission(msg_id, + submission_tuple, + raw_zip_bytes, + os.path.join( + self.tmp_dir, + '_tmp__applying')) + + + # Should remain unchanged. + server_overlay.version = hex_version(server_repo) # tip + self.assertTrue(server_overlay.exists(server_page_path)) + self.assertTrue(server_overlay.read(server_page_path) == + original_page_bytes) + + + +class ConflictTests(RepoTests): + ############################################################ + # Smoketest create, remove, modify with conflict + + def has_forked_version(self, overlay, page_path, raw_bytes): + if not overlay.exists(page_path): + return False + sha_value = new_sha(raw_bytes).hexdigest() + + versioned_path = "%s_%s" % (page_path, sha_value) + if not overlay.exists(versioned_path): + return False + + if new_sha(overlay.read(versioned_path)).hexdigest() != sha_value: + print "SHA FAILS: ", versioned_path + self.assertTrue(False) + return True + + def test_create_file_conflict(self): + # setup the server repository + server_repo = self.make_repo('server') + original_page_bytes = 'Server side addition of a new page.\n' + self.commit_revision(server_repo, + (('wiki_root/wikitext/FrontPage', + 'This the default front page.\n'),), + 'Initial checkin of server repo.') + + self.commit_revision(server_repo, + (('wiki_root/wikitext/NewPage', + original_page_bytes),), + 'Second checkin of server repo.') + + # pull the client repo but only up to the first version + client_repo = self.clone_repo(server_repo, 'client', '0') + + # get a write overlay for the client repo + overlay = self.get_write_overlay(client_repo) + + page_path = 'wiki_root/wikitext/NewPage' + page_bytes = 'Conflicting client side changes.\n\n' + + # write a new file into it. + overlay.write(os.path.join(client_repo.root, + page_path), + page_bytes) + # make a submission bundle + raw_zip_bytes = self.make_submission_zip(client_repo) + + #(fms_id, usk_hash, base_version, chk, length) + msg_id = 'fake_msg_id_000' + submission_tuple = (DEFAULT_SUBMITTER, + '000000000000', + hex_version(server_repo, '0')[:12], + 'CHK@fakechk', + len(raw_zip_bytes)) + + + server_overlay = self.get_hg_overlay(server_repo) + server_overlay.version = hex_version(server_repo, '0') # clients version + server_page_path = os.path.join(server_repo.root, page_path) + + self.assertTrue(not server_overlay.exists(server_page_path)) + + # apply the submission bundle to the server repo + self.get_applier(server_repo).apply_submission(msg_id, + submission_tuple, + raw_zip_bytes, + os.path.join( + self.tmp_dir, + '_tmp__applying')) + self.assertTrue(not server_overlay.exists(server_page_path)) + server_overlay.version = hex_version(server_repo) # new tip + self.assertTrue(server_overlay.exists(server_page_path)) + + # Check that the head version is the servers. + self.assertTrue(server_overlay.read(server_page_path) == + original_page_bytes) + + # Check that the forked version was created. + self.has_forked_version(server_overlay, server_page_path, page_bytes) + + + def test_remove_file_conflict(self): + # setup the server repository + server_repo = self.make_repo('server') + self.commit_revision(server_repo, + (('wiki_root/wikitext/FrontPage', + 'This the default front page.\n'),), + 'Initial checkin of server repo.') + self.commit_revision(server_repo, + (('wiki_root/wikitext/FrontPage', + 'This the updated front page.\n'),), + 'Second checkin of server repo.') + + # pull the client repo + client_repo = self.clone_repo(server_repo, 'client', '0') + + # get a write overlay for the client repo + overlay = self.get_write_overlay(client_repo) + + page_path = 'wiki_root/wikitext/FrontPage' + page_bytes = '' + + # write a new file into it. + overlay.write(os.path.join(client_repo.root, + page_path), + page_bytes) + # make a submission bundle + raw_zip_bytes = self.make_submission_zip(client_repo) + + #(fms_id, usk_hash, base_version, chk, length) + msg_id = 'fake_msg_id_000' + submission_tuple = (DEFAULT_SUBMITTER, + '000000000000', + hex_version(server_repo)[:12], + 'CHK@fakechk', + len(raw_zip_bytes)) + + + server_overlay = self.get_hg_overlay(server_repo) + server_overlay.version = hex_version(server_repo, '0') + server_page_path = os.path.join(server_repo.root, page_path) + + # Check that the target page exists. + self.assertTrue(server_overlay.exists(server_page_path)) + + # apply the submission bundle to the server repo + self.get_applier(server_repo).apply_submission(msg_id, + submission_tuple, + raw_zip_bytes, + os.path.join( + self.tmp_dir, + '_tmp__applying')) + self.assertTrue(server_overlay.exists(server_page_path)) + server_overlay.version = hex_version(server_repo) # new tip + self.assertTrue(server_overlay.exists(server_page_path)) + + # Check that the head version is the servers. + self.assertTrue(server_overlay.read(server_page_path) == + 'This the updated front page.\n') + + # Check that the forked version was created. + self.has_forked_version(server_overlay, server_page_path, '') + + def test_modify_file_conflict(self): + # setup the server repository + server_repo = self.make_repo('server') + original_page_bytes = 'This the default front page.\n' + self.commit_revision(server_repo, + (('wiki_root/wikitext/FrontPage', + original_page_bytes),), + 'Initial checkin of server repo.') + + self.commit_revision(server_repo, + (('wiki_root/wikitext/FrontPage', + 'Updated front page.\n'),), + 'Initial checkin of server repo.') + + # pull the client repo + client_repo = self.clone_repo(server_repo, 'client', '0') + + # get a write overlay for the client repo + overlay = self.get_write_overlay(client_repo) + + page_path = 'wiki_root/wikitext/FrontPage' + page_bytes = original_page_bytes + 'Client changes.\n' + + # write the updated file into it. + overlay.write(os.path.join(client_repo.root, + page_path), + page_bytes) + # make a submission bundle + raw_zip_bytes = self.make_submission_zip(client_repo) + + #(fms_id, usk_hash, base_version, chk, length) + msg_id = 'fake_msg_id_000' + submission_tuple = (DEFAULT_SUBMITTER, + '000000000000', + hex_version(server_repo)[:12], + 'CHK@fakechk', + len(raw_zip_bytes)) + + server_overlay = self.get_hg_overlay(server_repo) + server_overlay.version = hex_version(server_repo, '0') + server_page_path = os.path.join(server_repo.root, page_path) + + self.assertTrue(server_overlay.exists(server_page_path)) + self.assertTrue(server_overlay.read(server_page_path) == + original_page_bytes) + + # apply the submission bundle to the server repo + self.get_applier(server_repo).apply_submission(msg_id, + submission_tuple, + raw_zip_bytes, + os.path.join( + self.tmp_dir, + '_tmp__applying')) + self.assertTrue(server_overlay.exists(server_page_path)) + self.assertTrue(server_overlay.read(server_page_path) == + original_page_bytes) + + server_overlay.version = hex_version(server_repo) # new tip + self.assertTrue(server_overlay.exists(server_page_path)) + + # Check that the head versions i unchanged. + self.assertTrue(server_overlay.read(server_page_path) == + 'Updated front page.\n') + + # Check that the forked version was created. + self.has_forked_version(server_overlay, server_page_path, + original_page_bytes + 'Client changes.\n') + + def test_unresolved_merge(self): + # setup the server repository + server_repo = self.make_repo('server') + + page_path = 'wiki_root/wikitext/FrontPage' + + texts = ('This the default front page.\n', + 'This fork 1 of the front page.\n', + 'This fork 2 of the front page.\n',) + + print "---" + print "Main : FrontPage" + print "fork 1: ", ("%s_%s" % (page_path, new_sha(texts[1]). + hexdigest())) + print "fork 2: ", ("%s_%s" % (page_path, new_sha(texts[2]). + hexdigest())) + print "---" + self.commit_revision(server_repo, + ((page_path, + texts[0]), + ("%s_%s" % (page_path, new_sha(texts[1]). + hexdigest()), + texts[1]), + ("%s_%s" % (page_path, new_sha(texts[2]). + hexdigest()), + texts[2]), + ), + 'Initial checkin of server repo.') + + # pull the client repo + client_repo = self.clone_repo(server_repo, 'client', '0') + + # get a write overlay for the client repo + overlay = self.get_write_overlay(client_repo) + + page_bytes = 'Modify front page without deleting forks.\n' + + # write an updated file into it. + overlay.write(os.path.join(client_repo.root, + page_path), + page_bytes) + + # verify write. + self.assertTrue(overlay.read(os.path.join(client_repo.root, + page_path)) == + page_bytes) + + # make a submission bundle w/ 2 unresolved forks + try: + raw_zip_bytes = self.make_submission_zip(client_repo) + self.assertTrue(False) + except SubmitError, err0: + print "Got expected error:" + print err0 + self.assertTrue(err0.illegal) + + # Resolve one fork in client overlay. + overlay.write(os.path.join(client_repo.root, + "%s_%s" % (page_path, new_sha(texts[1]). + hexdigest())), + '') + + # make a submission bundle w/ 1 unresolved fork + try: + raw_zip_bytes = self.make_submission_zip(client_repo) + self.assertTrue(False) + except SubmitError, err1: + print "Got second expected error:" + print err1 + self.assertTrue(err1.illegal) + + + # Resolve the final fork in client overlay. + overlay.write(os.path.join(client_repo.root, + "%s_%s" % (page_path, new_sha(texts[2]). + hexdigest())), + '') + + # make a submission bundle w/ all forks resolved. + raw_zip_bytes = self.make_submission_zip(client_repo) + + #(fms_id, usk_hash, base_version, chk, length) + msg_id = 'fake_msg_id_000' + submission_tuple = (DEFAULT_SUBMITTER, + '000000000000', + hex_version(server_repo)[:12], + 'CHK@fakechk', + len(raw_zip_bytes)) + + + server_overlay = self.get_hg_overlay(server_repo) + server_overlay.version = hex_version(server_repo, '0') + server_page_path = os.path.join(server_repo.root, page_path) + + # Check that the target page exists. + self.assertTrue(server_overlay.exists(server_page_path)) + self.assertTrue(server_overlay.read(server_page_path) != page_bytes) + + + self.assertTrue(overlay.read(os.path.join(client_repo.root, + page_path)) == + page_bytes) + + # Apply the bundle + self.get_applier(server_repo).apply_submission(msg_id, + submission_tuple, + raw_zip_bytes, + os.path.join( + self.tmp_dir, + '_tmp__applying')) + self.assertTrue(overlay.read(os.path.join(client_repo.root, + page_path)) == + page_bytes) + + # Check that the head version is the clients + server_overlay.version = hex_version(server_repo) # tip + self.assertTrue(server_overlay.read(server_page_path) == page_bytes) + +# Test fixures +# hg repo +# create_repo(root path) +# commit_repo(repo) +# rest done w/ IFileFunctions? hmmmm... commit currently done above the line... + + +# Whitebox +# version sha's verify against file contents + +# Cases + +# Error/Illegal +# Create, Remove, Modify w/o deleting all previous versions +# base sha verification failure wrt stated base version +# final patch sha failure + + +# Non-Error +# Create +# Conflict +# No conflict +# Remove +# Conflict +# No conflict +# Modify +# Conflict +# No conflict + +if __name__ == '__main__': + # use -v on command line to get verbose output. + # verbosity keyword arg not supported in 2.6? + if len(sys.argv) >= 2 and sys.argv[1] != '-v': + # Run a single test case + suite = unittest.TestSuite() + #suite.addTest(ConflictTests(sys.argv[1])) + suite.addTest(NoConflictTests(sys.argv[1])) + unittest.TextTestRunner().run(suite) + else: + # Run everything. + unittest.main() diff --git a/infocalypse/wikibot.py b/infocalypse/wikibot.py new file mode 100644 --- /dev/null +++ b/infocalypse/wikibot.py @@ -0,0 +1,681 @@ +""" An FMSBot to run a wiki over freenet. + + Copyright (C) 2009 Darrell Karbott + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2.0 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks +""" + +# DCI: fix debugging param values. e.g.: short timeouts that cause 10, 25 errors +import os +import shutil +import time + +from mercurial import ui, hg, commands + +from fcpmessage import GET_DEF, PUT_COMPLEX_DIR_DEF +from fcpclient import parse_progress, get_file_infos, \ + set_index_file, dir_data_source + +from requestqueue import QueueableRequest, RequestQueue + +from validate import is_hex_string +from chk import ENCODED_CHK_SIZE +from fms import TrustCache, to_msg_string +from fmsbot import FMSBot +from submission import ForkingSubmissionHandler, REJECT_NOTRUST, REJECT_FCPFAIL + +from bundlecache import BundleCache, is_writable, make_temp_file +from updatesm import UpdateContext, UpdateStateMachine, QUIESCENT, FINISHING +from infcmds import UICallbacks, set_debug_vars + +# freesite insert stuff +from statemachine import StatefulRequest +from sitecmds import dump_wiki_html + +from wikibotctx import WikiBotContext, context_to_str + +from pathhacks import add_parallel_sys_path +add_parallel_sys_path('fniki') + +HTML_DUMP_DIR = '__html_dump_deletable__' + +# Parameters used by WikiBot. +REQUIRED_PARAMS = frozenset([ + 'FCP_HOST', 'FCP_PORT', 'FCP_POLL_SECS', 'N_CONCURRENT', + 'CANCEL_TIME_SECS', + 'FMS_HOST', 'FMS_PORT', 'FMS_POLL_SECS', + 'BOT_STORAGE_DIR', 'LATEST_INDEX', 'SITE_KEY', 'SITE_NAME', + 'SITE_DEFAULT_FILE', 'INSERT_URI', 'REQUEST_URI','VERBOSITY', + 'TMP_DIR', 'NO_SEARCH', 'USK_HASH', 'FNPUSH_COALESCE_SECS', + 'SITE_COALESCE_SECS', 'NOTIFY_COALESCE_SECS', 'COMMIT_COALESCE_SECS', + 'FMS_GROUP', 'FMS_ID', 'FMS_TRUST_CACHE_SECS', 'FMS_MIN_TRUST', + 'NONE_TRUST', + 'REPO_DIR', 'WIKI_ROOT',]) + +# LATER: Aggregated report message to the list? +# DCI: Think through dos attacks +# DCI: Keep track of patches that have already been applied? +# implicit in hg log, explicit in success.txt ? +# wiki submission tuple is: +# (usk_hash, base_version, chk, length) +class SubmissionRequest(QueueableRequest): + """ A QueueableRequest subclass to read submission CHK zips for + wiki submissions. """ + def __init__(self, queue, msg_id): + QueueableRequest.__init__(self, queue) + self.msg_id = msg_id + +def parse_submission(fms_id, lines, usk_hash): + """ Parse a single submission from raw fms message lines + and returns a submission tuple of the form: + + (fms_id, usk_hash, base_version, chk, length) + + Returns None if no submission could be parsed. + """ + print "LINES:" + print lines + for line in lines: + if not line.startswith('W:'): + continue + # 0 1 2 3 4 + # W:<repo_hash>:<base_version>:<chk><length> + fields = line.strip().split(':') + if not is_hex_string(fields[1]) or not is_hex_string(fields[2]): + + continue + if fields[1] != usk_hash: + continue + if (not fields[3].startswith('CHK@') or + len(fields[3]) != ENCODED_CHK_SIZE): + continue + try: + length = int(fields[4]) + except ValueError: + continue + # (fms_id, usk_hash, base_version, chk, length) + return (fms_id, fields[1], fields[2], fields[3], length) + return None + +class WikiBot(FMSBot, RequestQueue): + """ An FMSBot implementation to run a wiki over freenet. """ + def __init__(self, name, params, request_runner): + FMSBot.__init__(self, name) + RequestQueue.__init__(self, request_runner) + + self.ctx = None + self.applier = None + self.params = params.copy() + self.ui_ = None + self.repo = None + + self.trust = None + self.update_sm = None + # Why doesn't the base class ctr do this? + request_runner.add_queue(self) + + def trace(self, msg): + """ Write a log message at trace level. """ + self.log("T:" + msg) + + def debug(self, msg): + """ Write a log message at debug level. """ + self.log("D:" + msg) + + def warn(self, msg): + """ Write a log message at warn level. """ + self.log("W:" + msg) + + #----------------------------------------------------------# + # FMSBot implementation. + def on_startup(self): + """ Set up the bot instance. """ + self.trace("on_startup") + # Fail early and predictably. + for required in REQUIRED_PARAMS: + if not required in self.params: + raise KeyError(required) + + # NOT ATOMIC + # REDFLAG: LATER: RTFM python ATOMIC file locking. + if os.path.exists(self.parent.get_path(self, 'pid')): + self.warn("on_startup -- lock file exists!: %s" % + self.parent.get_path(self, 'pid')) + raise IOError("Already running or previous instance crashed.") + pid_file = open(self.parent.get_path(self, 'pid'), 'wb') + try: + pid_file.write("%i\n" % os.getpid()) + self.trace("on_startup -- pid[%i], created lock file: %s" + % (os.getpid(), self.parent.get_path(self, 'pid'))) + finally: + pid_file.close() + + self.ctx = WikiBotContext(self) + self.ctx.setup_dbs(self.params) + + # Can't push this up into the FMSBotRunner because it + # requires a particular fms_id. + # DCI: how does this get reconnected? when server drops + self.trust = TrustCache(self.parent.nntp_server, + self.params['FMS_TRUST_CACHE_SECS']) + # Mercurial stuff. + self.ui_ = WikiBotUI(None, self) + self.repo = hg.repository(self.ui_, self.params['REPO_DIR']) + self.trace("Loaded hg repo from: %s" % self.params['REPO_DIR']) + + self.applier = ForkingSubmissionHandler() + self.applier.ui_ = self.ui_ + self.applier.repo = self.repo + self.applier.logger = self + self.applier.base_dir = os.path.join(self.repo.root, + self.params['WIKI_ROOT']) + + print "BASE_DIR:", self.applier.base_dir + + # 2qt? + self.applier.notify_needs_commit = ( + lambda: self.ctx.set_timeout('COMMIT_COALESCE_SECS')) + self.applier.notify_committed = self.ctx.committed + self._send_status_notification('STARTED') + + def on_shutdown(self, why): + """ Shut down the bot instance. """ + self.trace("on_shutdown -- %s" % why) + self.ctx.close_dbs() + self._cleanup_temp_files() + if os.path.exists(self.parent.get_path(self, 'pid')): + try: + os.remove(self.parent.get_path(self, 'pid')) + self.trace("on_shutdown -- removed lock file: %s" + % self.parent.get_path(self, 'pid')) + + except IOError, err: + self.warn("on_shutdown -- err: %s" % str(err)) + + self._send_status_notification('STOPPED') + + def on_fms_change(self, connected): + """ FMSBot implementation. """ + self.trust.server = self.parent.nntp_server + if not connected: + self.debug("The fms server disconnected.") + self.warn("REQUESTING BOT SHUTDOWN!") + self.exit = True + + # Thought about putting the repo hash in the subject but I want to + # keep the subject human readable. + def wants_msg(self, group, items): + """ Return True for messages to the target groups that haven't + been handled.""" + + # Ignore our own repo update notifications. + if not items[1].strip().startswith('Submit:'): + self.trace("wants_msg -- ignored: %s" % items[1]) + return False + + msg_id = items[4] + # Hmmm...better to provide ctx.wants(msg_id) accessor? + if ((not group in self.groups) or + (msg_id in self.ctx.store_handled_ids)) : + #self.trace("wants_msg -- skipped: %s" % msg_id) + return False + self.trace("wants_msg -- accepted: %s" % msg_id) + return True + + def on_idle(self): + """ FMSBot implementation. + + This handles pushing updates of the wikitext repo into Freenet and + re-inserting the wiki freesite as necessary. + """ + self.trace(context_to_str(self.ctx)) + self.ctx.synch_dbs() + + if self.ctx.should_notify(): + self._send_update_notification() + + if not self.update_sm is None: + return + + # DCI: Is this working as expected? + if self.ctx.has_submissions(): + return + + if self.ctx.timed_out('COMMIT_COALESCE_SECS'): + self.trace("Commit local changes after failure.") + self.applier.force_commit() + self.ctx.committed() # DCI: Required? + # Only update freesite on success. + + if self.ctx.timed_out('FNPUSH_COALESCE_SECS'): + self.trace("Starting push into freenet.") + self._start_fnpush() + return + + if self.ctx.should_insert_site(): + self.trace("Starting freesite insertion.") + self._start_freesite_insert() + + # Handle a single message + def recv_fms_msg(self, dummy_group, items, lines): + """ FMSBot implementation to handle incoming FMS messages. """ + msg_id = items[4] + self.trace("recv_fms_msg -- called: %s" % msg_id) + # Hmmm... accessor? ctx.mark_recvd() or put in ctx.wants() ??? + self.ctx.store_handled_ids[msg_id] = "" # (ab)use as hashset + + sender_fms_id = items[2] + submission = parse_submission(sender_fms_id, lines, + self.params['USK_HASH']) + if submission is None: + self.trace("recv_fms_msg -- couldn't parse submission: %s" % msg_id) + return + + if not self._has_enough_trust(msg_id, submission, + self.params['NONE_TRUST']): + self.trace("recv_fms_msg -- not enough trust: %s" % msg_id) + return + + self.trace("recv_fms_msg -- parsed: %s" % str(submission)) + + self.ctx.queue_submission(msg_id, submission) + # Will get picked up by next_runnable. + + #----------------------------------------------------------# + def _cleanup_temp_files(self): + """ Helper to clean up temp files. """ + site_root = os.path.join(self.params['TMP_DIR'], HTML_DUMP_DIR) + if os.path.exists(site_root): + if not site_root.find("deletable"): + raise ValueError("staging dir name must contain 'deletable'") + shutil.rmtree(site_root) + assert not os.path.exists(site_root) + + # Order is import. remove_files() errors if there are dirs. + if (not self.update_sm is None and + not self.update_sm.ctx.bundle_cache is None): + self.update_sm.cancel() + self.update_sm.ctx.bundle_cache.remove_files() + + def _has_enough_trust(self, msg_id, submission, none_trust=0): + """ INTERNAL: Returns True if the sender is trusted enough + to commit to the wiki. + + Writes a REJECT_NOTRUST record into rejected.txt when + it returns False. + """ + assert self.trust.server # DCI: fix! + trust = self.trust.get_trust(submission[0]) + self.trace("has_enough_trust -- %s" % str(trust)) + + trust_value = trust[2] + if trust_value is None: + self.trace("has_enough_trust -- used %i for 'None'" % + none_trust) + if trust_value < self.params['FMS_MIN_TRUST']: + # Use %s for trust because it can be 'None' + self.debug("has_enough_trust -- Failed: %s < %s" % ( + str(trust_value), str(self.params['FMS_MIN_TRUST']))) + self.applier.update_change_log(msg_id, submission, + REJECT_NOTRUST, False) + return False + return True + + def _start_fnpush(self): + """ INTERNAL: Starts asynchronous push of local repository into + Freenet. """ + # Intialize update_sm + # start it + assert self.update_sm is None + self.update_sm = setup_sm(self.ui_, self.repo, self.runner, self.params) + # LATER: Replace UICallbacks and back out dorky chaining? + self.update_sm.transition_callback = ( + ChainedCallback.chain((self.update_sm.transition_callback, + self._fnpush_transition))) + self.update_sm.start_pushing(self.ctx.insert_uri()) + + def _start_freesite_insert(self): + """ INTERNAL: Start asynchronous insert of Wiki freesite. """ + assert self.update_sm is None + self.debug("start_freesite_insert -- starting insert of edition: %i" % + (latest_site_index(self.repo) + 1)) + + self.update_sm = setup_sm(self.ui_, self.repo, self.runner, self.params) + # LATER: Replace UICallbacks and back out dorky chaining? + self.update_sm.transition_callback = ( + ChainedCallback.chain((self.update_sm.transition_callback, + self._freesite_transition))) + + # DCI: try block, with file cleanup + # DCI: need to check that there are no uncommited files! + site_root = os.path.join(self.params['TMP_DIR'], HTML_DUMP_DIR) + dump_wiki_html(os.path.join(self.repo.root, self.params['WIKI_ROOT']), + site_root, False) + + infos = get_file_infos(site_root) + set_index_file(infos, self.params['SITE_DEFAULT_FILE']) + self.debug('start_freesite_insert -- dumped %i files' % len(infos)) + self.trace('--- files ---') + for info in infos: + self.trace('%s %s' % (info[0], info[1])) + self.trace('---') + + request = StatefulRequest(self.update_sm) + request.tag = 'freesite_insert' + request.in_params.definition = PUT_COMPLEX_DIR_DEF + request.in_params.fcp_params = self.params.copy() + request.in_params.fcp_params['DontCompress'] = False + request.in_params.fcp_params['URI'] = self._freesite_insert_uri() + + # dir_data_source() creates an IDataSource which allows + # the FCPConnection to slurp the files up over the + # FCP socket as one contiguous blob. + + # Sets up in_params for ClientPutComplexDir as a side effect. + request.custom_data_source = ( + dir_data_source(infos, request.in_params, 'text/html')) + + request.cancel_time_secs = (time.time() + + self.params['CANCEL_TIME_SECS']) + self.update_sm.start_single_request(request) + + def _freesite_insert_uri(self): + """ Return the insert URI for the freesite. """ + return '%s/%s-%i/' % (self.params['SITE_KEY'], + self.params['SITE_NAME'], + latest_site_index(self.repo) + 1) + + def _fnpush_transition(self, old_state, new_state): + """ INTERNAL: Handle UpdateStateMachine state changes while pushing + the local repo into Freenet. """ + self.trace("fnpush_transition -- [%s]->[%s]" % + (old_state.name, new_state.name)) + if new_state.name != QUIESCENT: + return + + if old_state.name == FINISHING: + # Success + self.ctx.pushed() + self.debug("fnpush_transition -- fn-push finished.") + prev_value = self.ctx.store_info['LATEST_INDEX'] + self.ctx.update_latest_index(self.update_sm.ctx['INSERT_URI']) + if self.ctx.store_info['LATEST_INDEX'] > prev_value: + self.trace("fnpush_transition -- incremented index to: %i " % + self.ctx.store_info['LATEST_INDEX']) + else: + # Failure + self.debug("fnpush_transition -- fn-push failed.") + # djk20091219 weird ClientPut collision failure + # fails with 9, BUT subsequent get doesn't + # get the updated version. + #DCI: recoverable vs. non-recoverable errors. THINK! + # For now, all infocalypse errors are fatal. + # We can get robustness by other means. e.g. cron. + self.debug("REQUESTING BOT SHUTDOWN!") + self.exit = True + + # Cleanup + self._cleanup_temp_files() + self.update_sm = None + + def _freesite_transition(self, old_state, new_state): + """ INTERNAL: Handle UpdateStateMachine state changes while inserting + the freesite. """ + + self.trace("freesite_transition -- [%s]->[%s]" % + (old_state.name, new_state.name)) + if new_state.name != QUIESCENT: + return + + if old_state.name == FINISHING: + # Success + self.ctx.clear_timeout('SITE_COALESCE_SECS') + self.debug("freesite_transition -- freesite insertion finished.") + tag_site_index(self.ui_, self.repo) + else: + # Failure + self.debug("freesite_transition -- freesite insertion FAILED.") + self.debug("REQUESTING BOT SHUTDOWN!") + self.exit = True + + # Cleanup + self._cleanup_temp_files() + self.update_sm = None + + #----------------------------------------------------------# + # RequestQueue implementation. + def next_runnable(self): + """ RequestQueue implementation. """ + if not self.update_sm is None: + return None # Don't run CHK request while fn-pushing repo. + + msg_id = self.ctx.pop_msg_id() + if msg_id is None: + return None + + self.trace("next_runnable -- popped: %s" % msg_id) + + chk = self.ctx.store_running_requests[msg_id][3] # hmmm why not 0 or 1? + + self.trace("next_runnable -- chk: %s" % chk) + request = SubmissionRequest(self, msg_id) + request.in_params.definition = GET_DEF + request.in_params.fcp_params = self.params.copy() + request.in_params.fcp_params['URI'] = chk + request.in_params.fcp_params['MaxSize'] = 32 * 1024 + + request.cancel_time_secs = (time.time() + + self.params['CANCEL_TIME_SECS']) + # DCI: Retrying ? + self.ctx.mark_running(msg_id) + + return request + + def request_progress(self, dummy_client, msg): + """ RequestQueue implementation dumps progress to log.""" + if msg[0] != 'SimpleProgress': + self.debug(msg[0]) + return + + self.debug(str(parse_progress(msg))) + + def request_done(self, client, msg): + """ RequestQueue implementation. """ + msg_id = client.msg_id + self.debug("request_done -- : %s" % msg_id) + self.ctx.mark_finished(msg_id) + + # DCI: Retrying ??? + submission_tuple = self.ctx.remove_submission(msg_id) + + if msg[0] == 'AllData': # Success + self._handle_submission(msg_id, submission_tuple, msg) + else: + self._handle_fcp_failure(msg_id, submission_tuple, msg) + + + def _handle_submission(self, msg_id, submission_tuple, msg): + """ INTERNAL: Handle incoming submission bundles.""" + self.debug("handle_submission -- %s" % msg_id) + self.trace("handle_submission -- %s" % str(submission_tuple)) + tmp_file = make_temp_file(self.params['TMP_DIR']) + try: + self.applier.apply_submission(msg_id, submission_tuple, + msg[2], tmp_file) + finally: + if os.path.exists(tmp_file): + os.remove(tmp_file) + + def _handle_fcp_failure(self, msg_id, submission_tuple, msg): + """ INTERNAL: Handle FCP request failure when requesting CHK + for submission .zip.""" + code = -1 + if 'Code' in msg[1]: + try: + code = int(msg[1]['Code']) + except ValueError: + code = -1 # Silence W0704 + self.debug("handle_fcp_failure -- %s[%i]" % (msg[0], code)) + self.trace("handle_fcp_failure -- msg:\n%s" % str(msg)) + # DCI: Handle PutFailed, code 9 + self.applier.update_change_log(msg_id, submission_tuple, + REJECT_FCPFAIL, False) + + def _send_status_notification(self, short_msg, long_msg=None): + """ Post a status message to FMS. """ + if long_msg is None: + long_msg = "EOM" + + if not long_msg.endswith('\n'): + long_msg += '\n' + + self.parent.queue_msg((self.params['FMS_ID'], + self.params['FMS_GROUP'], + 'wikibot[%s]:%s' % (self.params['USK_HASH'], + short_msg), + long_msg)) + + def _send_update_notification(self): + """ INTERNAL: Send an FMS notification for the latest repo index. """ + self.trace("send_update_notification -- repo index: %i" % + self.ctx.store_info['LATEST_INDEX']) + + subject = ('Wikitext Update:' + + '/'.join(self.ctx.request_uri().split('/')[1:])) + text = to_msg_string(((self.params['USK_HASH'], + self.ctx.store_info['LATEST_INDEX']), )) + '\n' + groups = self.params['FMS_GROUP'] + if self.params.get('FMS_NOTIFY_GROUP', ''): + groups = "%s, %s" % (groups, self.params['FMS_NOTIFY_GROUP']) + self.trace("send_update_notification -- groups: %s" % groups) + + self.parent.queue_msg((self.params['FMS_ID'], + groups, + subject, + text)) + # DCI: better to use send confirm callback? + self.ctx.clear_timeout('NOTIFY_COALESCE_SECS') + +def latest_site_index(repo): + """ Read the latest known freesite index out of the hg changelog. """ + for tag, dummy in reversed(repo.tagslist()): + if tag.startswith('I_'): + return int(tag.split('_')[1]) + return -1 + +def tag_site_index(ui_, repo, index=None): + """ Tag the local repository with a freesite index. """ + if index is None: + index = latest_site_index(repo) + 1 # hmmmm... lazy vs. explicit. + commands.tag(ui_, repo, 'I_%i' % index) + +def scrub_eol(text): + """ Return text w/o last trailing '\\n'. """ + if text.endswith('\n'): + text = text[:-1] + return text + +# Tested w/ Mercurial 1.3.1 on x86 Linux. Works. +class WikiBotUI(ui.ui): + """ A Mercurial ui subclass which routes all output through + the WikiBot logging functions. """ + + def __init__(self, src=None, wikibot=None): + ui.ui.__init__(self, src) + # Hmmm... I just copied pattern of base class __init__. + # Why doesn't copy() copy wikibot member? + if src: + self.wikibot = src.wikibot + elif not wikibot is None: + self.wikibot = wikibot + assert not self.wikibot is None + + def write(self, *args): + """ ui override which writes into the WikiBot log. """ + for arg in args: + self.wikibot.trace(scrub_eol(str(arg))) + + def write_err(self, *args): + """ ui override which writes into the WikiBot log. """ + for arg in args: + self.wikibot.warn(scrub_eol(str(arg))) + + def flush(self): + """ ui override which is a NOP.""" + pass + + # Have no choice, must implement hg's ui interface. + #pylint: disable-msg=R0201 + def interactive(self): + """ ui override which returns False """ + return False + + # DCI: remove? + # This does get called. + def copy(self): + """ ui override.""" + assert hasattr(self, 'wikibot') + ret = self.__class__(self) + assert hasattr(ret, 'wikibot') + return ret +# 2qt? +class ChainedCallback: + """ Helper class to chain UpdateStateMachine transition callbacks. """ + def __init__(self, callbacks): + self.callbacks = callbacks + + def chained_dispatch(self, old_state, new_state): + """ A transition callback implementation which runs the chained + callbacks sequentially. """ + for callback in self.callbacks: + callback(old_state, new_state) + + @classmethod + def chain(cls, callbacks): + """ Returns a transition callback implementation which chains + the callbacks in the callbacks sequence. """ + return ChainedCallback(callbacks).chained_dispatch + +def setup_sm(ui_, repo, runner, params): + """ INTERNAL: Helper function which sets up an UpdateStateMachine + instance. """ + assert is_writable(os.path.expanduser(params['TMP_DIR'])) + + verbosity = params.get('VERBOSITY', 1) + set_debug_vars(verbosity, params) + + callbacks = UICallbacks(ui_) + callbacks.verbosity = verbosity + # DCI: bundle cache needed for inserting? + cache = BundleCache(repo, ui_, params['TMP_DIR']) + + # For Infocalypse repositories + ctx = UpdateContext(None) + ctx.repo = repo + ctx.ui_ = ui_ + ctx.bundle_cache = cache + update_sm = UpdateStateMachine(runner, ctx) + + update_sm.params = params.copy() + update_sm.transition_callback = callbacks.transition_callback + update_sm.monitor_callback = callbacks.monitor_callback + + # Modify only after copy. + update_sm.params['FREENET_BUILD'] = runner.connection.node_hello[1]['Build'] + + return update_sm + diff --git a/infocalypse/wikibotctx.py b/infocalypse/wikibotctx.py new file mode 100644 --- /dev/null +++ b/infocalypse/wikibotctx.py @@ -0,0 +1,247 @@ +""" Class to hold the runtime state of a WikiBot instance. + + Copyright (C) 2009 Darrell Karbott + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2.0 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks +""" + +import shelve +import time + +from fcpclient import get_version, get_usk_for_usk_version + +def pretty_timeout(future_time): + """ Return a human readable string for a timeout. """ + if future_time is None: + return 'False' + diff = future_time - time.time() + if diff < 0: + return 'True' + if diff < 1.0: + diff = 1 # Don't display 0 for <1 sec remaining. + return str(int(diff)) + +def context_to_str(ctx): + """ Return human readable info about a WikiBotContext in a string.""" + return (("running: %i, queued: %i, " + + "commit: %s, fnpush: %s, freesite: %s") % + (len(ctx.store_running_requests['running']), + len(ctx.store_running_requests['queued']), + # DCI: clean list comprehension? + pretty_timeout(ctx.timeouts.get('COMMIT_COALESCE_SECS', + None)), + pretty_timeout(ctx.timeouts.get('FNPUSH_COALESCE_SECS', + None)), + pretty_timeout(ctx.timeouts.get('SITE_COALESCE_SECS', + None)) + )) + + +class WikiBotContext: + # DCI: not exactly, better doc + """ Class to hold the runtime state of a WikiBot instance. """ + def __init__(self, parent): + # shelve storage + self.parent = parent + self.store_handled_ids = None + self.store_running_requests = None + self.store_applied_requests = None + self.store_info = None + + self.timeouts = {} + + def set_timeout(self, key): + """ Set a timeout for key value. """ + self.timeouts[key] = time.time() + self.parent.params[key] + + def clear_timeout(self, key): + """ Reset the timeout for key. """ + if key in self.timeouts: + del self.timeouts[key] + + def timed_out(self, key): + """ Return True if a timeout was set for key and it timed out, + False otherwise. """ + if not key in self.timeouts: + return False + return time.time() >= self.timeouts[key] + + def is_set(self, key): + """ Return True if there's a timeout set for key, False otherwise. """ + return key in self.timeouts + + def synch_dbs(self): + """ Force write of databases to disk. """ + if not self.store_handled_ids is None: + self.store_handled_ids.sync() + if not self.store_running_requests is None: + self.store_running_requests.sync() + + def setup_dbs(self, params): + """ Initialize the databases used for persistent storage. """ + # Load shelves. + # Set of handled msg_ids + assert not self.parent is None + assert not self.parent.parent is None + + self.store_handled_ids = shelve.open( + self.parent.parent.get_path(self.parent, 'store_handled_ids')) + # msg_id -> submission_tuple map + # 'running' -> list of currently running request msg_ids + # 'queued' -> FIFO of msg_ids for enqueued requests + self.store_running_requests = shelve.open( + self.parent.parent.get_path(self.parent, + 'store_running_requests')) + + self.store_info = shelve.open( + self.parent.parent.get_path(self.parent, + 'store_info')) + + self.parent.trace("Opened shelve dbs.") + if not 'running' in self.store_running_requests: + self.store_running_requests['running'] = [] + if not 'queued' in self.store_running_requests: + self.store_running_requests['queued'] = [] + + if self.store_info.get('USK_HASH', '') != params['USK_HASH']: + # Reset if the repos usk changed. hmmmm possible? + self.store_info['USK_HASH'] = params['USK_HASH'] + self.store_info['LATEST_INDEX'] = params['LATEST_INDEX'] + + # Make sure we have the latest index. + if params['LATEST_INDEX'] > self.store_info.get('LATEST_INDEX', 0): + self.store_info['LATEST_INDEX'] = params['LATEST_INDEX'] + self.update_latest_index(params['INSERT_URI']) + self.update_latest_index(params['REQUEST_URI']) + + del params['LATEST_INDEX'] # DCI: debugging hack! + + running = self.store_running_requests['running'] + queued = self.store_running_requests['queued'] + if len(running) > 0: + # DCI: Test + self.parent.debug("Cleaning up crashed requests:\n%s" % + '\n'.join(running)) + # Hmmmm... what if a running request caused the crash? + # Reset after crash. + self.store_running_requests['queued'] = running + queued + self.store_running_requests['running'] = [] + + def close_dbs(self): + """ Close the databases used for persistent storage. """ + if not self.store_handled_ids is None: + self.store_handled_ids.close() + if not self.store_running_requests is None: + self.store_running_requests.close() + if not self.store_info is None: + self.store_info.close() + + def queue_submission(self, msg_id, submission): + """ Add a submission to the submission FIFO. """ + assert not msg_id in self.store_running_requests + queued = self.store_running_requests['queued'] + assert not msg_id in queued + assert not msg_id in self.store_running_requests['running'] + + self.store_running_requests[msg_id] = submission + queued.append(msg_id) + self.store_running_requests['queued'] = queued + + # can return None + def pop_msg_id(self): + """ Remove the oldest submission from the FIFO and return it. """ + queued = self.store_running_requests['queued'] + if len(queued) == 0: + return None + + msg_id = queued.pop(0) + self.store_running_requests['queued'] = queued # force sync. + + #self.trace("next_runnable -- popped: %s" % msg_id) + running = self.store_running_requests['running'] + assert not msg_id in running + return msg_id + + def mark_running(self, msg_id): + """ Persistently mark the submission as running. """ + running = self.store_running_requests['running'] + assert not msg_id in running + running.append(msg_id) + self.store_running_requests['running'] = running # force sync. + + def mark_finished(self, msg_id): + """ Persistently mark the submission as not running. """ + running = self.store_running_requests['running'] + assert msg_id in running + running.remove(msg_id) + self.store_running_requests['running'] = running # force sync. + + def update_latest_index(self, uri): + """ Update the latest known version of the stored repo usk. """ + if uri is None: + return + version = get_version(uri) + if version > self.store_info['LATEST_INDEX']: + self.store_info['LATEST_INDEX'] = version + + def remove_submission(self, msg_id): + """ Remove stored the stored information for the submission. """ + ret = self.store_running_requests[msg_id] + del self.store_running_requests[msg_id] + return ret + + def request_uri(self): + """ Return the repository request URI. """ + return get_usk_for_usk_version(self.parent.params['REQUEST_URI'], + self.store_info['LATEST_INDEX']) + + def insert_uri(self): + """ Return the repository insert URI. """ + return get_usk_for_usk_version(self.parent.params['INSERT_URI'], + self.store_info['LATEST_INDEX']) + + def should_notify(self): + """ Return True if an FMS repo update message should be posted, + False otherwise. """ + return (self.timed_out('NOTIFY_COALESCE_SECS') and + not self.is_set('FNPUSH_COALESCE_SECS')) + + def committed(self, success=False): + """ Handle commit to the local repository. """ + self.clear_timeout('COMMIT_COALESCE_SECS') + self.set_timeout('FNPUSH_COALESCE_SECS') + if success: + # Update freesite on success. + self.set_timeout('SITE_COALESCE_SECS') + + def pushed(self): + """ Handle push of local repo into Freenet. """ + self.clear_timeout('FNPUSH_COALESCE_SECS') + self.set_timeout('NOTIFY_COALESCE_SECS') + + def should_insert_site(self): + """ Return True if the freesite needs to be inserted. """ + return (self.timed_out('SITE_COALESCE_SECS') and + not self.is_set('FNPUSH_COALESCE_SECS')) + + # DCI: correct? + def has_submissions(self): + """ Return True if there are subissions which are running or need + to be run, False otherwise. """ + return (len(self.store_running_requests['running']) > 0 or + len(self.store_running_requests['queued']) > 0) + diff --git a/infocalypse/wikicmds.py b/infocalypse/wikicmds.py --- a/infocalypse/wikicmds.py +++ b/infocalypse/wikicmds.py @@ -1,4 +1,4 @@ -""" Implementation of experiment commands for wikis over freenet. +""" Implementation of experimental commands for wikis over freenet. Copyright (C) 2009 Darrell Karbott @@ -20,13 +20,22 @@ """ import os -import sys +import StringIO +from binascii import hexlify from mercurial import util -from config import write_default_config - +from config import write_default_config, read_freesite_cfg, normalize +from submission import bundle_wikitext, unbundle_wikitext, get_info, \ + NoChangesError, validate_wikitext +from hgoverlay import HgFileOverlay +from infcmds import setup, run_until_quiescent, cleanup +from statemachine import StatefulRequest +from fcpmessage import PUT_FILE_DEF, GET_DEF +from graph import FREENET_BLOCK_LEN, has_version +from updatesm import QUIESCENT, FINISHING, RUNNING_SINGLE_REQUEST +from bundlecache import make_temp_file # HACK from pathhacks import add_parallel_sys_path add_parallel_sys_path('fniki') @@ -34,15 +43,19 @@ add_parallel_sys_path('fniki') import servepiki from piki import create_default_wiki +from fileoverlay import get_file_funcs + def execute_wiki(ui_, repo, params): """ Run the wiki command. """ def out_func(text): """ Helper displays output from serve_wiki via ui.status. """ ui_.status(text + '\n') if params['WIKI'] == 'run': + ui_.status("wikitext version: %s\n" % get_hg_version(repo)[:12]) if not os.path.exists(os.path.join(repo.root, 'fnwiki.cfg')): raise util.Abort("Can't read fnwiki.cfg. Did you forget hg " + "fn-wiki --createconfig?") + servepiki.serve_wiki(params['HTTP_PORT'], params['HTTP_BIND'], out_func) return @@ -63,3 +76,168 @@ def execute_wiki(ui_, repo, params): raise util.Abort("Unsupported subcommand: " + params.get('WIKI', 'unknown')) +def get_hg_version(repo): + """ Return the 40 digit hex string for the current hg version. """ + heads = repo.heads() + if len(heads) > 1: + raise util.Abort("Multiple heads!") + + parents = [cp.node() for cp in repo[None].parents()] + if len(parents) > 1: + raise util.Abort("Multiple parents!") + if heads[0] != parents[0]: + raise util.Abort("Working directory is not the head! Run hg update.") + + return hexlify(heads[0]) +# LATER: better error message when there's no OVERLAY dir? +def execute_wiki_submit(ui_, repo, params, stored_cfg): + """ Insert and overlayed wiki change submission CHK into freenet and + return a notification message string. """ + update_sm = None + try: + # Read submitter out of stored_cfg + submitter = stored_cfg.defaults.get('FMS_ID', None) + assert not submitter is None + assert submitter.find('@') == -1 + + # Get version, i.e. just the hg parent == hg head + version = get_hg_version(repo) + + params['ISWIKI'] = True + read_freesite_cfg(ui_, repo, params, stored_cfg) + if not params.get('OVERLAYED', False): + raise util.Abort("Can't submit from non-overlayed wiki edits!") + if not params.get('CLIENT_WIKI_GROUP', None): + # DCI: test code path + raise util.Abort("No wiki_group in fnwiki.cfg. Don't " + + "know where to post to!") + + ui_.status("\nPreparing to submit to %s FMS group as %s.\n" % + (params['CLIENT_WIKI_GROUP'], submitter)) + + # Create submission zip file in RAM. + overlay = get_file_funcs(os.path.join(repo.root, params['WIKI_ROOT']), + True) + try: + raw_bytes = bundle_wikitext(overlay, version, submitter) + except NoChangesError: + raise util.Abort("There are no overlayed changes to submit.") + # Punt if it's too big. + if len(raw_bytes) >= FREENET_BLOCK_LEN: + raise util.Abort("Too many changes. Change .zip must be <32K") + + update_sm = setup(ui_, repo, params, stored_cfg) + + # Make an FCP file insert request which will run on the + # on the state machine. + request = StatefulRequest(update_sm) + request.tag = 'submission_zip_insert' + request.in_params.definition = PUT_FILE_DEF + request.in_params.fcp_params = update_sm.params.copy() + request.in_params.fcp_params['URI'] = 'CHK@' + request.in_params.send_data = raw_bytes + + ui_.status("Inserting %i byte submission CHK...\n" % len(raw_bytes)) + update_sm.start_single_request(request) + run_until_quiescent(update_sm, params['POLL_SECS']) + + heads = [hexlify(head) for head in repo.heads()] + heads.sort() + if update_sm.get_state(QUIESCENT).arrived_from(((FINISHING,))): + chk = update_sm.get_state(RUNNING_SINGLE_REQUEST).\ + final_msg[1]['URI'] + ui_.status("Patch CHK:\n%s\n" % + chk) + # ':', '|' not in freenet base64 + # DCI: why normalize??? + # (usk_hash, base_version, chk, length) + ret = ':'.join(('W', + normalize(params['REQUEST_URI']), + version[:12], + chk, + str(len(raw_bytes)))) + + ui_.status("\nNotification:\n%s\n" % ret + + '\n') + + return ret, params['CLIENT_WIKI_GROUP'] + + raise util.Abort("Submission CHK insert failed.") + + finally: + # Cleans up out file. + cleanup(update_sm) + +def execute_wiki_apply(ui_, repo, params, stored_cfg): + """ Fetch a wiki change submission CHK and apply it to a local + directory. """ + update_sm = None + try: + assert 'REQUEST_URI' in params + # Get version, i.e. just the hg parent == hg head + version = get_hg_version(repo) + + # Get target directory. + params['ISWIKI'] = True + read_freesite_cfg(ui_, repo, params, stored_cfg) + + update_sm = setup(ui_, repo, params, stored_cfg) + + # Make an FCP download request which will run on the + # on the state machine. + request = StatefulRequest(update_sm) + request.tag = 'submission_zip_request' + request.in_params.definition = GET_DEF # To RAM. + request.in_params.fcp_params = update_sm.params.copy() + request.in_params.fcp_params['URI'] = params['REQUEST_URI'] + # Knee high barrier against abuse. + request.in_params.fcp_params['MaxSize'] = FREENET_BLOCK_LEN + + ui_.status("Requesting wiki submission from...\n%s\n" % + params['REQUEST_URI']) + update_sm.start_single_request(request) + run_until_quiescent(update_sm, params['POLL_SECS']) + + if update_sm.get_state(QUIESCENT).arrived_from(((FINISHING,))): + raw_bytes = update_sm.get_state(RUNNING_SINGLE_REQUEST).\ + final_msg[2] + assert request.response[0] == 'AllData' + ui_.status("Fetched %i byte submission.\n" % len(raw_bytes)) + base_ver, submitter = get_info(StringIO.StringIO(raw_bytes)) + ui_.status("Base version: %s, Submitter: %s (unverifiable!)\n" + % (base_ver[:12], submitter)) + + #print "H_ACKING base_ver to test exception!" + #base_ver = 'da2f653c5c47b7ee7a814e668aa1d63c50c3a4f3' + if not has_version(repo, base_ver): + ui_.warn("That version isn't in the local repo.\n" + + "Try running hg fn-pull --aggressive.\n") + raise util.Abort("%s not in local repo" % base_ver[:12]) + + if base_ver != version: + ui_.warn("Version mismatch! You might have to " + + "manually merge.\n") + + # Set up an IFileFunctions that reads the correct versions of + # the unpatched files out of Mercurial. + overlay = HgFileOverlay(ui_, repo, + # i.e. "<>/wiki_root" NOT " + # <>/wiki_root/wikitext" + os.path.join(repo.root, + params['WIKI_ROOT']), + # cleanup() in finally deletes this. + make_temp_file(update_sm.ctx. + bundle_cache.base_dir)) + overlay.version = base_ver + validate_wikitext(overlay) + updates = unbundle_wikitext(overlay, + StringIO.StringIO(raw_bytes)) + for index, label in enumerate(('CREATED', 'MODIFIED', 'REMOVED', + 'ALREADY PATCHED')): + if len(updates[index]) > 0: + values = list(updates[index]) + values.sort() + ui_.status('%s:\n%s\n' % (label, '\n'.join(values))) + + finally: + cleanup(update_sm) diff --git a/wormarc/binaryrep.py b/wormarc/binaryrep.py --- a/wormarc/binaryrep.py +++ b/wormarc/binaryrep.py @@ -216,6 +216,7 @@ def get_file_sha(full_path): """ Return the 20 byte sha1 hash digest of a file. """ in_file = open(full_path, 'rb') try: + # Bug: why doesn't this use sha_func? sha_value = sha1() while True: bytes = in_file.read(READ_CHUNK_LEN)