""" Simplified client interface for common FCP request. Copyright (C) 2008 Darrell Karbott This library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: djk@isFiaD04zgAgnrEC5XJt1i4IE7AkNPqhBG5bONi6Yks """ import mimetypes, os, re from fcpconnection import FCPConnection, IDataSource, READ_BLOCK, \ MinimalClient, PolledSocket, FCPError, sha1_hexdigest from fcpmessage import GETNODE_DEF, GENERATE_SSK_DEF, \ GET_REQUEST_URI_DEF, GET_DEF, \ PUT_FILE_DEF, PUT_REDIRECT_DEF, PUT_COMPLEX_DIR_DEF # Defaults for commonly used FCP parameters. FCP_PARAM_DEFAULTS = { 'ReturnType':'direct', 'IgnoreDS':False, 'MaxRetries':3, 'DontCompress':True, # Hmmmm... 'Verbosity':1023 # MUST set this to get progress messages. } #-----------------------------------------------------------# # file_info helper functions #-----------------------------------------------------------# def get_file_infos(directory, forced_mime_type=None, accept_regex = None): """ Traverse a directory and return a list of file information tuples which is suitable for use by FCPClient.put_complex_dir(). forced_mime_type determines the value of the mime_type field in the returned tuples. If acceptRegex is not None only files which match it are added. TUPLE FORMAT: (name, length, mime_type, full_path) """ def walk_visitor(file_info, dirname, names): """ Function scope visitor implementation passed to os.path.walk. """ for name in names: full_name = os.path.join(dirname, name) if os.path.isfile(full_name): base = file_info[0] local_path = full_name.replace(base, '') if file_info[2] and not file_info[2].match(local_path): # Skip files rejected by the regex continue file_info[1].append((local_path, os.path.getsize(full_name), forced_mime_type, full_name)) if directory[-1] != os.path.sep: # Force trailing path separator. directory += os.path.sep file_info = (directory, [], accept_regex) os.path.walk(directory, walk_visitor, file_info) return file_info[1] def total_length(file_infos): """ Returns the sum of the file lengths in file_info list. """ total = 0 for info in file_infos: total += info[1] return total def set_index_file(file_infos, file_name): """ Move the tuple with the name file_name to the front of file_infos so that it will be used as the index. """ index = None for info in file_infos: # hmmm... faster search? if info[0] == file_name: index = info break if index is None: raise ValueError("No file named: %s" % file_name) file_infos.remove(index) file_infos.insert(0, index) class FileInfoDataSource(IDataSource): """ IDataSource which concatenates files in a list of file infos into a contiguous data stream. Useful for direct ClientPutComplexDir requests. """ MSG_LENGTH_MISMATCH = "Upload bytes doesn't match sum of " \ + "lengths in file_infos. Did the files " \ + "change during uploading?" def __init__(self, file_infos): IDataSource.__init__(self) assert file_infos self.infos = file_infos self.total_length = total_length(file_infos) self.running_total = 0 self.chunks = None self.input_file = None def data_generator(self, infos): """ INTERNAL: Returns a generator which yields the concatenated data from all the file infos. """ for info in infos: #print "FileInfoDataSource.GEN -- opening", info[3] self.input_file = open(info[3], 'rb') while True: raised = True try: data = self.input_file.read(READ_BLOCK) #print "FileInfoDataSource.GEN -- read:", len(data) raised = False finally: # Note: Wacky control flow because you can't yield # from a finally block if raised or data is None: #print "FileInfoDataSource.GEN -- closing", info[3] self.input_file.close() self.input_file = None if not data: break self.running_total += len(data) if self.running_total > self.total_length: raise IOError(self.MSG_LENGTH_MISMATCH) #print "FileInfoDataSource.GEN -- yeilding", len(data) yield data if self.running_total != self.total_length: raise IOError(self.MSG_LENGTH_MISMATCH) yield None return def initialize(self): """ IDataSource implementation. """ #print "FileInfoDataSource.initialize -- called" assert self.chunks is None self.chunks = self.data_generator(self.infos) def data_length(self): """ IDataSource implementation. """ #print "FileInfoDataSource.data_length -- ", self.total_length return self.total_length def release(self): """ IDataSource implementation. """ #print "FileInfoDataSource.release -- called" if not self.chunks is None: self.chunks = None if not self.input_file: self.input_file.close() self.input_file = None def read(self): """ IDataSource implementation. """ #print "FileInfoDataSource.read -- called" assert not self.chunks is None if self.chunks: ret = self.chunks.next() if ret is None: self.chunks = None #print "FileInfoDataSource.read -- returned None" return None #print "FileInfoDataSource.read -- returned:", len(ret) return ret #print "FileInfoDataSource.read(1) -- returned None, \ # SHOULD NEVER HAPPEN" return None #-----------------------------------------------------------# # Key classification and manipulation helper functions #-----------------------------------------------------------# # REDFLAG: Use a common regex? Not sure that would cut loc... USK_FILE_REGEX = re.compile('(freenet:)?(USK).*/((\\-)?[0-9]+[0-9]*)$') def is_usk_file(uri): """ Returns True if uri points to a single file, False otherwise. """ return bool(USK_FILE_REGEX.match(uri)) USK_CONTAINER_REGEX = re.compile('(freenet:)?(USK).*/((\\-)?[0-9]+[0-9]*)/$') def is_usk_container(uri): """ Return True if uri is USK uri which points to a Freenet Container, False otherwise. """ return bool(USK_CONTAINER_REGEX.match(uri)) KEY_TYPE_REGEX = re.compile('(freenet:)?(?P<key_type>CHK|KSK|SSK|USK)@') def key_type(uri): """ Returns the key type. """ match = KEY_TYPE_REGEX.match(uri) if not match: raise Exception("Doesn't look like a Freenet URI: %s" % uri) return match.groupdict()['key_type'] def is_chk(uri): """ Returns True if the URI is a CHK key, False otherwise. """ return key_type(uri) == 'CHK' def is_ksk(uri): """ Returns True if the URI is a KSK key, False otherwise. """ return key_type(uri) == 'KSK' def is_ssk(uri): """ Returns True if the URI is a SSK key, False otherwise. """ return key_type(uri) == 'SSK' def is_usk(uri): """ Returns True if the URI is a USK key, False otherwise. """ return key_type(uri) == 'USK' # LATER: fix regex to work for SSKs too. VERSION_REGEX = re.compile('(?P<usk>USK)@(.*)/(?P<version>' + '(\\-)?[0-9]+[0-9]*)(/.*)?') def get_version(uri): """ Return the version index of USK. Raises ValueError if no version could be extracted. """ try: version = int(VERSION_REGEX.match(uri). groupdict()['version']) except: raise ValueError("Couldn't parse a USK or SSK version from: %s" % uri) return version def get_ssk_for_usk_version(usk_uri, version): """ Return an SSK for a specific version of a USK. NOTE: The version in usk_uri is ignored. """ match = VERSION_REGEX.match(usk_uri) if not match: raise Exception("Couldn't parse version from USK: %s" % usk_uri) return 'SSK' + usk_uri[match.end('usk') : match.start('version') - 1] \ + '-' + str(version) + usk_uri[match.end('version'):] def get_usk_for_usk_version(usk_uri, version, negative = False): """ Return an USK for a specific version of a USK. NOTE: The version in usk_uri is ignored. Works for both containers and files. """ match = VERSION_REGEX.match(usk_uri) if not match: raise Exception("Couldn't parse version from USK: %s" % usk_uri) if negative and version > 0: version = -1 * version version_str = str(version) if version == 0 and negative: version_str = '-0' # BITCH: # They should have picked some other symbol ('*'?) which doesn't # encourage implementers to jam the version into an integer. # i.e. because you can't represent the version with an integer # because -0 == 0. assert not negative or version_str.find('-') > -1 return usk_uri[0 : match.start('version')] \ + version_str + usk_uri[match.end('version'):] def is_negative_usk(usk_uri): """ Returns True if usk_uri has a negative version index, False otherwise. REQUIRES: usk_uri is a USK key. """ match = VERSION_REGEX.match(usk_uri) if not match: raise Exception("Couldn't parse version from USK: %s" % usk_uri) return match.groupdict()['version'].find('-') > -1 def get_negative_usk(usk_uri): """ Return an USK with a negative version index. NOTE: Using a negative index causes the FCP server to search harder for later versions in ClientGet requests. NOTE: This is a NOP if usk_uri is already negative. """ version = get_version(usk_uri) if is_negative_usk(usk_uri): return usk_uri return get_usk_for_usk_version(usk_uri, version, True) def prefetch_usk(client, usk_uri, allowed_redirects = 3, message_callback = None): """ Force the FCP server to explicitly search for updates to the USK. Returns the latest version as an integer or None if no version could be determined. This works by sending a negative index value for the USK. Note that this can return a version LESS THAN the version in usk_uri. """ if client.in_params.async: raise ValueError("This function only works synchronously.") usk_uri = get_negative_usk(usk_uri) client.reset() callback = client.message_callback return_type = client.in_params.default_fcp_params.get('ReturnType') version = None try: if message_callback: # Install a custom message callback client.message_callback = message_callback client.in_params.default_fcp_params['ReturnType'] = 'none' try: version = get_version(client.get(usk_uri, allowed_redirects)[1]['URI']) except FCPError: version = None finally: client.message_callback = callback if return_type: client.in_params.default_fcp_params['ReturnType'] = return_type return version def latest_usk_index(client, usk_uri, allowed_redirects = 1, message_callback = None): """ Determines the version index of a USK key. Returns a (version, data_found) tuple where version is the integer version and data_found is the data_found message for the latest index. NOTE: This fetches the key and discards the data. It may take a very long time if you call it for a key which points to a large block of data. """ if client.in_params.async: raise ValueError("This function only works synchronously.") client.reset() callback = client.message_callback #print "PARAMS:", client.in_params.default_fcp_params return_type = client.in_params.default_fcp_params.get('ReturnType') try: if message_callback: # Install a custom message callback client.message_callback = message_callback client.in_params.default_fcp_params['ReturnType'] = 'none' prev = None while True: # Hmmmm... Make sure that the USK has 'settled' next = client.get(usk_uri, allowed_redirects) if prev and next[1]['URI'] == prev[1]['URI']: break prev = next finally: client.message_callback = callback if return_type: client.in_params.default_fcp_params['ReturnType'] = return_type return (get_version(prev[1]['URI']), prev) def get_insert_chk_filename(uri): """ Returns the file name part of CHK@/file_part.ext style CHK insert uris. """ assert uri.startswith('CHK@') if not uri.startswith('CHK@/'): if uri != 'CHK@': raise ValueError("Unexpected data after '@'. Maybe you forgot the " + "'/' before the filename part?") return None return uri[5:] def set_insert_uri(params, uri): """ INTERNAL: Set the 'URI' and 'TargetFilename' in params, correctly handling CHK@/filename.ext style insert URIs. """ if is_chk(uri): params['URI'] = 'CHK@' filename = get_insert_chk_filename(uri) if not filename is None: params['TargetFilename'] = filename else: params['URI'] = uri def get_usk_hash(usk): """ Returns a 12 hex digit hash for a USK which is independant of verison. """ return sha1_hexdigest(get_usk_for_usk_version(usk, 0))[:12] def check_usk_hash(usk, hash_value): """ Returns True if the hash matches, False otherwise. """ return (sha1_hexdigest(get_usk_for_usk_version(usk, 0))[:12] == hash_value) def show_progress(dummy, msg): """ Default message callback implementation. """ if msg[0] == 'SimpleProgress': print "Progress: (%s/%s/%s)" % (msg[1]['Succeeded'], msg[1]['Required'], msg[1]['Total']) else: print "Progress: %s" % msg[0] def parse_progress(msg): """ Parse a SimpleProgress message into a tuple. """ assert msg[0] == 'SimpleProgress' return (int(msg[1]['Succeeded']), int(msg[1]['Required']), int(msg[1]['Total']), int(msg[1]['Failed']), int(msg[1]['FatallyFailed']), bool(msg[1]['FinalizedTotal'].lower() == 'true')) class FCPClient(MinimalClient): """ A class to execute common FCP requests. This class provides a simplified interface for common FCP commands. Calls are blocking by default. Set FCPClient.in_params.async = True to run asynchronously. You can set FCP parameters using the FCPClient.in_params.default_fcp_params dictionary. GOTCHA: Don't set FCPClient.in_params.fcp_params directly. It is reset before most calls so changes to it probably won't have any effect. """ def __init__(self, conn): MinimalClient.__init__(self) self.conn = conn self.message_callback = show_progress self.in_params.default_fcp_params = FCP_PARAM_DEFAULTS.copy() @classmethod def connect(cls, host, port, socket_class = PolledSocket, state_callback = None): """ Create an FCPClient which owns a new FCPConnection. NOTE: If you need multiple FCPClient instances it is better to explictly create an FCPConnection and use the FCPClient.__init__() method so that all instances are multiplexed over the same connection. """ sock = None conn = None raised = True try: sock = socket_class(host, port) conn = FCPConnection(sock, True, state_callback) raised = False finally: if raised: if conn: conn.close() if sock: sock.close() return FCPClient(conn) def wait_until_finished(self): """ Wait for the current request to finish. """ assert self.conn self.conn.wait_for_terminal(self) def close(self): """ Close the underlying FCPConnection. """ if self.conn: self.conn.close() def get_node(self, opennet = False, private = False, volatile = True): """ Query node information by sending an FCP GetNode message. """ # Hmmmm... I added an 'Identifier' value to request message # even though there's None in the doc. See GETNODE_DEF. # It seems to work. self.reset() self.in_params.definition = GETNODE_DEF self.in_params.fcp_params = {'GiveOpennetRef': opennet, 'WithPrivate': private, 'WithVolatile': volatile } return self.conn.start_request(self) def generate_ssk(self): """ Generate an SSK key pair. Returns the SSKKeyPair message. """ self.reset() self.in_params.definition = GENERATE_SSK_DEF return self.conn.start_request(self) def get_request_uri(self, insert_uri): """ Return the request URI corresponding to the insert URI. REQUIRES: insert_uri is a private SSK or USK. """ if self.in_params.async: raise ValueError("This function only works synchronously.") assert is_usk(insert_uri) or is_ssk(insert_uri) if is_usk(insert_uri): target = get_ssk_for_usk_version(insert_uri, 0) else: target = insert_uri self.reset() self.in_params.definition = GET_REQUEST_URI_DEF self.in_params.fcp_params = {'URI': target, 'MaxRetries': 1, 'PriorityClass':1, 'UploadFrom':'direct', 'DataLength':9, 'GetCHKOnly':True} self.in_params.send_data = '012345678' # 9 bytes of data inverted = self.conn.start_request(self)[1]['URI'] public = inverted[inverted.find('@') + 1: inverted.find('/')] return insert_uri[:insert_uri.find('@') + 1] + public \ + insert_uri[insert_uri.find('/'):] def get(self, uri, allowed_redirects = 0, output_file = None): """ Requests the data corresponding to the URI from the FCP server. Returns an AllData or DataFound (when self.default_fcp_params['ReturnType'] == 'none') message on success. If output_file or self.output_file is not None, write the raw data to file instead of returning it as a string. Raises an FCPError on failure. An extra 'URI' entry is added to the returned message containing the final URI the data was requested from after redirecting. An extra 'Metadata.ContentType' entry is added to the returned AllData message containing the mime type information extracted from the last DataFound. """ self.reset() self.in_params.definition = GET_DEF self.in_params.fcp_params = {'URI':uri } self.in_params.allowed_redirects = allowed_redirects self.in_params.file_name = output_file # REDFLAG: fix self.in_params.send_data = False return self.conn.start_request(self) def put(self, uri, bytes, mime_type=None): """ Insert a string into Freenet. Returns a PutSuccessful message on success. Raises an FCPError on failure. """ self.reset() self.in_params.definition = PUT_FILE_DEF set_insert_uri(self.in_params.fcp_params, uri) if mime_type: self.in_params.fcp_params['Metadata.ContentType'] = mime_type self.in_params.send_data = bytes return self.conn.start_request(self) def put_file(self, uri, path, mime_type=None): """ Insert a single file into Freenet. Returns a PutSuccessful message on success. Raises an FCPError on failure. REQUIRES: The size of the file can't change during this call. """ self.reset() self.in_params.definition = PUT_FILE_DEF set_insert_uri(self.in_params.fcp_params, uri) if mime_type: self.in_params.fcp_params['Metadata.ContentType'] = mime_type # REDFLAG: test. not sure this ever worked in previous version #if 'UploadFrom' in params and params['UploadFrom'] == 'disk': # # REDFLAG: test this code path! # params['FileName'] = path # path = None self.in_params.file_name = path # REDFLAG: fix self.in_params.send_data = True return self.conn.start_request(self) def put_redirect(self, uri, target_uri, mime_type=None): """ Insert a redirect into freenet. Returns a PutSuccessful message on success. Raises an FCPError on failure. """ self.reset() self.in_params.definition = PUT_REDIRECT_DEF self.in_params.fcp_params = {'URI':uri, 'TargetURI':target_uri, 'UploadFrom':'redirect'} if mime_type: self.in_params.fcp_params['Metadata.ContentType'] = mime_type return self.conn.start_request(self) def put_complex_dir(self, uri, file_infos, default_mime_type = 'text/plain'): """ Insert a collection of files into a Freenet Container. file_infos must be a list of (name, length, mime_type, full_path) tuples. file_infos[0] is inserted as the default document. mime types: If the mime_type value in the file_infos tuple for the file is not None, it is used. Otherwise the mime type is guessed from the file extension. Finally, if guessing fails, default_mime_type is used. """ assert default_mime_type assert file_infos self.reset() self.in_params.definition = PUT_COMPLEX_DIR_DEF self.in_params.fcp_params = {'URI': uri} for field in self.in_params.default_fcp_params: if field.startswith("Files"): raise ValueError("You can't set file entries via " + " default_fcp_params.") if 'DefaultName' in self.in_params.default_fcp_params: raise ValueError("You can't set 'DefaultName' via " + "default_fcp_params.") files = {} index = 0 for info in file_infos: mime_type = info[2] if not mime_type: # First try to guess from the extension. type_tuple = mimetypes.guess_type(info[0]) if type_tuple: mime_type = type_tuple[0] if not mime_type: # Fall back to the default. mime_type = default_mime_type files['Files.%i.Name' % index] = info[0] files['Files.%i.UploadFrom' % index] = 'direct' files['Files.%i.DataLength' % index] = info[1] files['Files.%i.Metadata.ContentType' % index] = mime_type index += 1 self.in_params.fcp_params['Files'] = files self.in_params.fcp_params['DefaultName'] = file_infos[0][0] #REDFLAG: Fix self.in_params.send_data = True # IMPORTANT: Don't set the data length. return self.conn.start_request(self, FileInfoDataSource(file_infos), False) ############################################################ # Helper function for hg changeset bundle handling. ############################################################ # Saw here: # http://sage.math.washington.edu/home/robertwb/trac-bundle/test \ # /sage_trac/log/trac.log HG_MIME_TYPE = 'application/mercurial-bundle' def package_metadata(metadata): """ Package the bundle contents metadata into a string which can be inserted into to the Metadata.ContentType field of the Freenet key. All args must be full 40 digit hex keys. """ return "%s;%s,%s,%s" % (HG_MIME_TYPE, metadata[0], metadata[1], metadata[2]) CHANGESET_REGEX = re.compile('.*;\s*([0-9a-fA-F]{40,40})\s*,' + '\s*([0-9a-fA-F]{40,40})\s*,' + '\s*([0-9a-fA-F]{40,40})') def parse_metadata(msg): """ INTERNAL: Parse the (base_rev, first_rev, tip) info out of the Metadata.ContentType field of msg. FCP2.0 doesn't have support for user defined metadata, so we jam the metadata we need into the mime type field. """ match = CHANGESET_REGEX.match(msg[1]['Metadata.ContentType']) if not match or len(match.groups()) != 3: # This happens for bundles inserted with older versions # of hg2fn.py raise ValueError("Couldn't parse changeset info from [%s]." \ % msg[1]['Metadata.ContentType']) return match.groups() def make_rollup_filename(rollup_info, request_uri): """ Return a filename containing info for a rollup bundle. """ if not is_usk_file(request_uri): raise ValueError("request_uri is not a USK file uri.") # Hmmmm.... get rid of symbolic names? tip = rollup_info[0][0] parent = rollup_info[0][1] start_index = rollup_info[0][2] end_index = rollup_info[0][3] assert len(tip) == 40 # LATER: is_changset_id_str() func? assert len(parent) == 40 assert start_index >= 0 assert end_index >= 0 assert end_index >= start_index human_readable = request_uri.split('/')[1] # hmmmm... always supress .hg if human_readable.lower().endswith('.hg'): human_readable = human_readable[:-3] # <human_name>_<end_index>_<start_index>_<tip>_<parent>_ID<repoid> return "%s_%i_%i_%s_%s_ID%s" % (human_readable, end_index, start_index, tip[:12], parent[:12], get_usk_hash(request_uri)) def parse_rollup_filename(filename): """ Parse a filename created with make_rollup_filename into a tuple.""" fields = filename.split('_') repo_id = fields[-1] if not repo_id.startswith("ID") or len(repo_id) != 14: raise ValueError("Couldn't parse repo usk hash.") repo_id = repo_id[2:] parent = fields[-2] if len(parent) != 12: raise ValueError("Couldn't parse parent.") tip = fields[-3] if len(tip) != 12: raise ValueError("Couldn't parse tip.") start_index = int(fields[-4]) end_index = int(fields[-5]) human_readable = '_'.join(fields[:-6]) # REDFLAG: dci obo? return (human_readable, start_index, end_index, tip, parent, repo_id)