# # patch "dumb.py" # from [e13ab5bf2026025a1b559f02fc88f164e23cf686] # to [587c80aaf9ab28b6e17049f75dc8ddfb706eb064] # # patch "fs.py" # from [05141f5122b18b6601bf0a33a3834ae48f6c3309] # to [019161a560cc3092f62b01bbf7146d88339e1bb5] # # patch "fs_sftp.py" # from [67ebc9831df0ee859488fd2118383358061e7ce3] # to [ae1409f264dc600ffe8109fca9f716c10e557e10] # # patch "merkle_dir.py" # from [fa38d11914a8107a2fb7c9de37b002cb0053579f] # to [736fb122be92473b73f6de96a0e9584da9700611] # # patch "monotone.py" # from [a60a03bd596164fa7a6bd99d7007b4898f033da2] # to [7bfd2db49f9fd5aeee6a7c0f16f78e1d82ea9308] # ======================================================================== --- dumb.py e13ab5bf2026025a1b559f02fc88f164e23cf686 +++ dumb.py 587c80aaf9ab28b6e17049f75dc8ddfb706eb064 @@ -111,8 +111,8 @@ print "Pushed %s packets to %s" % (pushed, other_url) def main(name, args): - + pass -if __name__ = __main__: +if __name__ == "__main__": import sys main(sys.argv[0], sys.argv[1:]) ======================================================================== --- fs.py 05141f5122b18b6601bf0a33a3834ae48f6c3309 +++ fs.py 019161a560cc3092f62b01bbf7146d88339e1bb5 @@ -1,6 +1,6 @@ # interface to FS-like things -import urlparse +from urlparse import urlsplit import os import os.path @@ -8,7 +8,7 @@ pass def readable_fs_for_url(url): - (scheme, host, path, param, query, frag) = urlparse(url, "file") + (scheme, host, path, query, frag) = urlsplit(url, "file") if scheme == "file": return LocalReadableFS(path) elif scheme in ("http", "https", "ftp"): @@ -21,7 +21,7 @@ raise BadURL, url def writeable_fs_for_url(url): - (scheme, host, path, param, query, frag) = urlparse(url, "file") + (scheme, host, path, query, frag) = urlsplit(url, "file") if scheme == "file": return LocalWriteableFs(path) elif scheme == "sftp": @@ -44,15 +44,41 @@ def fetch(self, filenames): raise NotImplementedError - # bytes is an iterable of pairs (offset, length) + # bytes_iter is an iterable of pairs (offset, length) # this is a generator # it yields nested tuples ((offset, length), data) # subclasses should implement _real_fetch_bytes which has the same API; # but will receive massaged (seek-optimized) arguments - def fetch_bytes(self, filename, bytes): - # FIXME: implement block coalescing/decoalescing, and sort to optimize - # seeks. - return self._real_fetch_bytes(filename, bytes) + def fetch_bytes(self, filename, bytes_iter): + bytes = list(bytes_iter) + bytes.sort() + coalesced_map = {} + curr_offset = 0 + curr_length = 0 + covered_pieces = [] + for offset, length in bytes: + if offset == curr_offset + curr_length: + curr_length += length + else: + if covered_pieces: + coalesced_map[(curr_offset, curr_length)] = covered_pieces + covered_pieces = [] + curr_offset = offset + curr_length = length + covered_pieces.append((offset, length)) + if covered_pieces: + coalesced_map[(curr_offset, curr_length)] = covered_pieces + coalesced_bytes = coalesced_map.keys() + coalesced_bytes.sort() + for (c_offset, c_length), c_data \ + in self._real_fetch_bytes(filename, coalesced_bytes): + assert len(c_data) == c_length + covered_pieces = coalesced_map[(c_offset, c_length)] + internal_offset = 0 + for offset, length in covered_pieces: + yield ((offset, length), + c_data[internal_offset:internal_offset+length]) + internal_offset += length def _real_fetch_bytes(self, filename, bytes): raise NotImplementedError @@ -116,7 +142,7 @@ files[fn] = None return files - def fetch_bytes(self, filename, bytes): + def _real_fetch_bytes(self, filename, bytes): f = open(self._fname(filename), "rb") for offset, length in bytes: f.seek(offset) @@ -158,4 +184,4 @@ name = self._fname("") if os.path.exists(name): return + os.makedirs(name) - os.mkdirs(name) ======================================================================== --- fs_sftp.py 67ebc9831df0ee859488fd2118383358061e7ce3 +++ fs_sftp.py ae1409f264dc600ffe8109fca9f716c10e557e10 @@ -1,8 +1,9 @@ # we need paramiko for sftp protocol support import paramiko import getpass import fs import os.path +import base64 # All of this heavily cribbed from demo{,_simple}.py in the paramiko # distribution, which is LGPL. @@ -33,7 +34,7 @@ return keys def get_user_password_host_port(hostspec): - username, password, hostname, port = None, None, None + username, password, hostname, port = None, None, None, None if hostspec.find("@") >= 0: userspec, hostspec = hostspec.split("@") if userspec.find(":") >= 0: @@ -51,7 +52,7 @@ # FIXME: support agents etc. (see demo.py in paramiko dist) if password is None: password = getpass.getpass("Password for address@hidden: " % (username, hostname)) - return username, password, hostname + return username, password, hostname, port def get_host_key(hostname): hkeys = load_host_keys() @@ -68,7 +69,7 @@ self.transport = paramiko.Transport((hostname, port)) self.transport.connect(username=username, password=password, hostkey=hostkey) - self.client = t.open_sftp_client() + self.client = self.transport.open_sftp_client() def _fname(self, filename): return os.path.join(self.dir, filename) @@ -115,7 +116,7 @@ def mkdir(self, filename): try: self.client.mkdir(self._fname(filename)) - except OSError: + except IOError: return 0 return 1 @@ -126,7 +127,7 @@ try: self.client.stat(self._fname("")) return - except OSError: + except IOError: pass # fall through to actually create dir pieces = [] rest = self.dir ======================================================================== --- merkle_dir.py fa38d11914a8107a2fb7c9de37b002cb0053579f +++ merkle_dir.py 736fb122be92473b73f6de96a0e9584da9700611 @@ -348,14 +348,18 @@ source_children = self._get_child_hashes(new_stuff) target_children = target._get_child_hashes(new_stuff) locations = {} + new_chunks = [] for prefix in new_stuff: source_hash = source_children[prefix] target_hash = target_children[prefix] - new_in_source = list(source_hash.new_in_me(target_hash)) - for id, data in self.get_chunks(new_in_source): - target.add(id, data) - if new_chunk_callback is not None: - new_chunk_callback(id, data) + new_chunks += list(source_hash.new_in_me(target_hash)) + # we build up a list of all chunks and then fetch them in a single + # call, to give the chunk optimized and pipelining maximum + # opportunity to work + for id, data in self.get_chunks(new_chunks): + target.add(id, data) + if new_chunk_callback is not None: + new_chunk_callback(id, data) target.flush() target.commit() except: ======================================================================== --- monotone.py a60a03bd596164fa7a6bd99d7007b4898f033da2 +++ monotone.py 7bfd2db49f9fd5aeee6a7c0f16f78e1d82ea9308 @@ -41,7 +41,7 @@ output = self.run_monotone(["automate", "toposort", "address@hidden"], "\n".join(revisions) + "\n") sorted = output.split() - assert len(sorted) = len(revisions) + assert len(sorted) == len(revisions) return sorted def get_revision(self, rid):