# # patch "merkle_dir.py" # from [f4ddf7aac47ab8713431d5b30db4defdb861ef9d] # to [09d854914ad5a16e07d04184f78880edb99918fc] # # patch "test_merkle_dir.py" # from [17e1616704e2089a055c4e16aa48b8ab3586449e] # to [27014de99955ad80cbc870a054f627800d8210bb] # ======================================================================== --- merkle_dir.py f4ddf7aac47ab8713431d5b30db4defdb861ef9d +++ merkle_dir.py 09d854914ad5a16e07d04184f78880edb99918fc @@ -16,8 +16,8 @@ def get(self, item): return self.items[item] - def __in__(self, item): - return item in self.items + def __contains__(self, item): + return self.items.has_key(item) def assign(self, item, values): assert len(values) == len(self.values) @@ -45,7 +45,7 @@ for key, values in self: value_txt = " ".join([str(v) for v in values]) lines.append("%s %s %s" % (self.prefix, key, value_txt)) - return zlib.compress("".join(lines)) + return zlib.compress("\n".join(lines)) # yields (key, values) def new_in_me(self, versus): @@ -198,6 +198,7 @@ child_hashes = self._get_child_hashes(bins.iterkeys()) for k in bins.iterkeys(): for id, location in bins[k]: + assert id not in child_hashes[k] child_hashes[k].assign(id, location) print ("writing hashes for %s new ids to %s hash files" % (len(self._ids_to_flush), len(bins))) @@ -228,11 +229,26 @@ # (FIXME: perhaps should split this up more; for large chunks (e.g., # initial imports) this will load the entire chunk into memory) def all_chunks(self): - id_to_locations = dict(self._all_chunk_locations()) - if id_to_locations: + return self.get_chunks(self._all_chunk_locations()) + + # id_locations is an iterable over (id, location) tuples + # yields (id, data) tuples + def get_chunks(self, id_locations): + locations_to_ids = {} + for id, location in id_locations: + if location[1] == 0: + # just go ahead and process null-length chunks directly, + # rather than calling fetch_bytes on them -- these are the + # only chunks for which the location->chunk map may not be + # one-to-one. + yield (id, "") + else: + assert not locations_to_ids.has_key(location) + locations_to_ids[location] = id + if locations_to_ids: for loc, data in self._fs.fetch_bytes(self._data_file, - id_to_locations.values()): - yield id_to_locations[loc], data + locations_to_ids.keys()): + yield locations_to_ids[loc], data def flush(self): if self._locked: @@ -247,18 +263,16 @@ target_root = target._get_root_hash() new_stuff = list(source_root.new_or_different_in_me(target_root)) source_children = self._get_child_hashes(new_stuff) - target_children = self._get_child_hashes(new_stuff) + target_children = target._get_child_hashes(new_stuff) locations = {} for prefix in new_stuff: - new_in_source = source_children[prefix].new_in_me(target_children[prefix]) - for id, location in new_in_source: - locations[location] = id - for source_location, data in self._fs.fetch_bytes(self._data_file, - locations.keys()): - id = locations[source_location] - target.add(id, data) - if new_chunk_callback is not None: - new_chunk_callback(id, data) + source_hash = source_children[prefix] + target_hash = target_children[prefix] + new_in_source = list(source_hash.new_in_me(target_hash)) + for id, data in self.get_chunks(new_in_source): + target.add(id, data) + if new_chunk_callback is not None: + new_chunk_callback(id, data) target.flush() target.commit() except: ======================================================================== --- test_merkle_dir.py 17e1616704e2089a055c4e16aa48b8ab3586449e +++ test_merkle_dir.py 27014de99955ad80cbc870a054f627800d8210bb @@ -8,7 +8,7 @@ return random.randrange(2) def randid(): - return "".join([random.choice("0123456789zbcdef") for i in xrange(40)]) + return "".join([random.choice("0123456789abcdef") for i in xrange(40)]) def randdata(): length = random.choice([0, 1, None, None, None, None]) @@ -34,58 +34,73 @@ checked += 1 assert checked == len(expected) +def new_in(thing, versus): + new = {} + for key, val in thing.iteritems(): + if not versus.has_key(key): + new[key] = val + return new + def run_tests(): random.seed(0) try: - a_dir = tempfile.mkdtemp() - b_dir = tempfile.mkdtemp() - a_fs = fs.LocalWriteableFs(a_dir) - b_fs = fs.LocalWriteableFs(b_dir) + try: + a_dir = tempfile.mkdtemp() + b_dir = tempfile.mkdtemp() + a_fs = fs.LocalWriteableFs(a_dir) + b_fs = fs.LocalWriteableFs(b_dir) - in_a = {} - in_b = {} + in_a = {} + in_b = {} - for i in xrange(1000): - print i - a = merkle_dir.MerkleDir(a_fs) - b = merkle_dir.MerkleDir(b_fs) - if flip(): - add_to(a, in_a) - if flip(): - add_to(b, in_b) - - if flip(): - subject = a - in_subject = in_a - object = b - in_object = in_b - else: - subject = b - in_subject = in_b - object = a - in_object = in_a - - verb = random.choice(["push", "pull", "sync"]) - print verb - if verb == "push": - subject.push(object) - in_object.update(in_subject) - elif verb == "pull": - subject.pull(object) - in_subject.update(in_object) - elif verb == "sync": - subject.sync(object) - in_subject.update(in_object) - in_object.update(in_subject) + for i in xrange(1000): + print i + a = merkle_dir.MerkleDir(a_fs) + b = merkle_dir.MerkleDir(b_fs) + if flip(): + add_to(a, in_a) + if flip(): + add_to(b, in_b) - check_matches(a, in_a) - check_matches(b, in_b) + if flip(): + subject_name = "a" + subject = a + in_subject = in_a + object_name = "b" + object = b + in_object = in_b + else: + subject_name = "b" + subject = b + in_subject = in_b + object_name = "a" + object = a + in_object = in_a + verb = random.choice(["push", "pull", "sync", "nothing"]) + print "%s(%s, %s)" % (verb, subject_name, object_name) + if verb == "push": + subject.push(object) + in_object.update(in_subject) + elif verb == "pull": + subject.pull(object) + in_subject.update(in_object) + elif verb == "sync": + subject.sync(object) + in_subject.update(in_object) + in_object.update(in_subject) + elif verb == "nothing": + pass + + check_matches(a, in_a) + check_matches(b, in_b) + except: + import sys, pdb + pdb.post_mortem(sys.exc_traceback) finally: - #shutil.rmtree(a_dir, ignore_errors=1) - #shutil.rmtree(b_dir, ignore_errors=1) - pass + shutil.rmtree(a_dir, ignore_errors=1) + shutil.rmtree(b_dir, ignore_errors=1) if __name__ == "__main__":