# # # add_file "ancestry.cc" # content [a8362c21538ff7576166507a9b87fd4ebde69a00] # # patch "Makefile.am" # from [61aa922983ae89a8bb50cf5ad203cbadf1e943fb] # to [607163f9d88dec6858dfada0e5b0169fa5ec7bb4] # # patch "database.hh" # from [31ba75343ca21813eb814b814d6576440f4f0cfc] # to [a4a07097a63f5877e7d4b5ca36f39ad263ff685c] # # patch "rev_types.hh" # from [99327382c7c617bc5965569a55d7789d95b0fa99] # to [af3c5cf58a78153d7b6a515a25dd4d6d584b851f] # # patch "revision.cc" # from [c265c4dfdc6ad23d44811a145d47cc43c263fa3b] # to [5b5f6c11cf218a22285d6939f14a1d5d64d3e482] # # patch "roster.cc" # from [98ea42f99e59a884a7f00dfab6a76334c5b89772] # to [2ffc18c20c2e53c540b7312a125681d60a6778f9] # # patch "roster.hh" # from [61c885cf725e7b20bbbe5a8e595904a4edc38c14] # to [ef1b40df5043af074ee007ea5efdb726e1b4112f] # ============================================================ --- ancestry.cc a8362c21538ff7576166507a9b87fd4ebde69a00 +++ ancestry.cc a8362c21538ff7576166507a9b87fd4ebde69a00 @@ -0,0 +1,575 @@ +// Copyright (C) 2004 Graydon Hoare +// +// This program is made available under the GNU GPL version 2.0 or +// greater. See the accompanying file COPYING for details. +// +// This program is distributed WITHOUT ANY WARRANTY; without even the +// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. + +#include "base.hh" +#include "sanity.hh" +#include "revision.hh" +#include "rev_height.hh" +#include "roster.hh" + +#include "database.hh" +#include "interner.hh" + +#include "safe_map.hh" +#include +#include +#include + +using std::make_pair; +using std::map; +using std::max; +using std::multimap; +using std::pair; +using std::set; +using std::stack; +using std::vector; + +using boost::shared_ptr; +using boost::dynamic_bitset; + +// For a surprisingly long time, we have been using an algorithm which +// is nonsense, based on a misunderstanding of what "LCA" means. The +// LCA of two nodes is *not* the first common ancestor which you find +// when iteratively expanding their ancestor sets. Instead, the LCA is +// the common ancestor which is a descendent of all other common +// ancestors. +// +// In general, a set of nodes in a DAG doesn't always have an +// LCA. There might be multiple common ancestors which are not parents +// of one another. So we implement something which is "functionally +// useful" for finding a merge point (and moreover, which always +// terminates): we find an LCA of the input set if it exists, +// otherwise we replace the input set with the nodes we did find and +// repeat. +// +// All previous discussions in monotone-land, before say August 2005, +// of LCA (and LCAD) are essentially wrong due to our silly +// misunderstanding. It's unfortunate, but our half-baked +// approximations worked almost well enough to take us through 3 years +// of deployed use. Hopefully this more accurate new use will serve us +// even longer. + +typedef unsigned long ctx; +typedef dynamic_bitset<> bitmap; +typedef shared_ptr shared_bitmap; + +static void +calculate_ancestors_from_graph(interner & intern, + revision_id const & init, + multimap const & graph, + map< ctx, shared_bitmap > & ancestors, + shared_bitmap & total_union); + +void +find_common_ancestor_for_merge(database & db, + revision_id const & left, + revision_id const & right, + revision_id & anc) +{ + interner intern; + set leaves; + map ancestors; + + shared_bitmap isect = shared_bitmap(new bitmap()); + shared_bitmap isect_ancs = shared_bitmap(new bitmap()); + + leaves.insert(intern.intern(left.inner()())); + leaves.insert(intern.intern(right.inner()())); + + + multimap inverse_graph; + { + multimap graph; + db.get_revision_ancestry(graph); + typedef multimap::const_iterator gi; + for (gi i = graph.begin(); i != graph.end(); ++i) + inverse_graph.insert(make_pair(i->second, i->first)); + } + + + while (leaves.size() != 1) + { + isect->clear(); + isect_ancs->clear(); + + // First intersect all ancestors of current leaf set + for (set::const_iterator i = leaves.begin(); i != leaves.end(); ++i) + { + ctx curr_leaf = *i; + shared_bitmap curr_leaf_ancestors; + map::const_iterator j = ancestors.find(*i); + if (j != ancestors.end()) + curr_leaf_ancestors = j->second; + else + { + curr_leaf_ancestors = shared_bitmap(new bitmap()); + calculate_ancestors_from_graph(intern, revision_id(intern.lookup(curr_leaf), + origin::internal), + inverse_graph, ancestors, + curr_leaf_ancestors); + } + if (isect->size() > curr_leaf_ancestors->size()) + curr_leaf_ancestors->resize(isect->size()); + + if (curr_leaf_ancestors->size() > isect->size()) + isect->resize(curr_leaf_ancestors->size()); + + if (i == leaves.begin()) + *isect = *curr_leaf_ancestors; + else + (*isect) &= (*curr_leaf_ancestors); + } + + // isect is now the set of common ancestors of leaves, but that is not enough. + // We need the set of leaves of isect; to do that we calculate the set of + // ancestors of isect, in order to subtract it from isect (below). + set new_leaves; + for (ctx i = 0; i < isect->size(); ++i) + { + if (isect->test(i)) + { + calculate_ancestors_from_graph(intern, revision_id(intern.lookup(i), + origin::internal), + inverse_graph, ancestors, isect_ancs); + } + } + + // Finally, the subtraction step: for any element i of isect, if + // it's *not* in isect_ancs, it survives as a new leaf. + leaves.clear(); + for (ctx i = 0; i < isect->size(); ++i) + { + if (!isect->test(i)) + continue; + if (i < isect_ancs->size() && isect_ancs->test(i)) + continue; + safe_insert(leaves, i); + } + } + + I(leaves.size() == 1); + anc = revision_id(intern.lookup(*leaves.begin()), origin::internal); +} + +static void +add_bitset_to_union(shared_bitmap src, + shared_bitmap dst) +{ + if (dst->size() > src->size()) + src->resize(dst->size()); + if (src->size() > dst->size()) + dst->resize(src->size()); + *dst |= *src; +} + + +static void +calculate_ancestors_from_graph(interner & intern, + revision_id const & init, + multimap const & graph, + map< ctx, shared_bitmap > & ancestors, + shared_bitmap & total_union) +{ + typedef multimap::const_iterator gi; + stack stk; + + stk.push(intern.intern(init.inner()())); + + while (! stk.empty()) + { + ctx us = stk.top(); + revision_id rev(intern.lookup(us), origin::internal); + + pair parents = graph.equal_range(rev); + bool pushed = false; + + // first make sure all parents are done + for (gi i = parents.first; i != parents.second; ++i) + { + ctx parent = intern.intern(i->second.inner()()); + if (ancestors.find(parent) == ancestors.end()) + { + stk.push(parent); + pushed = true; + break; + } + } + + // if we pushed anything we stop now. we'll come back later when all + // the parents are done. + if (pushed) + continue; + + shared_bitmap b = shared_bitmap(new bitmap()); + + for (gi i = parents.first; i != parents.second; ++i) + { + ctx parent = intern.intern(i->second.inner()()); + + // set all parents + if (b->size() <= parent) + b->resize(parent + 1); + b->set(parent); + + // ensure all parents are loaded into the ancestor map + I(ancestors.find(parent) != ancestors.end()); + + // union them into our map + map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent); + I(j != ancestors.end()); + add_bitset_to_union(j->second, b); + } + + add_bitset_to_union(b, total_union); + ancestors.insert(make_pair(us, b)); + stk.pop(); + } +} + +void +toposort(database & db, + set const & revisions, + vector & sorted) +{ + map work; + + for (set::const_iterator i = revisions.begin(); + i != revisions.end(); ++i) + { + rev_height height; + db.get_rev_height(*i, height); + work.insert(make_pair(height, *i)); + } + + sorted.clear(); + + for (map::const_iterator i = work.begin(); + i != work.end(); ++i) + { + sorted.push_back(i->second); + } +} + +static void +accumulate_strict_ancestors(database & db, + revision_id const & start, + set & all_ancestors, + multimap const & inverse_graph, + rev_height const & min_height) +{ + typedef multimap::const_iterator gi; + + vector frontier; + frontier.push_back(start); + + while (!frontier.empty()) + { + revision_id rid = frontier.back(); + frontier.pop_back(); + pair parents = inverse_graph.equal_range(rid); + for (gi i = parents.first; i != parents.second; ++i) + { + revision_id const & parent = i->second; + if (all_ancestors.find(parent) == all_ancestors.end()) + { + // prune if we're below min_height + rev_height h; + db.get_rev_height(parent, h); + if (h >= min_height) + { + all_ancestors.insert(parent); + frontier.push_back(parent); + } + } + } + } +} + +// this call is equivalent to running: +// erase(remove_if(candidates.begin(), candidates.end(), p)); +// erase_ancestors(candidates, db); +// however, by interleaving the two operations, it can in common cases make +// many fewer calls to the predicate, which can be a significant speed win. + +void +erase_ancestors_and_failures(database & db, + std::set & candidates, + is_failure & p, + multimap *inverse_graph_cache_ptr) +{ + // Load up the ancestry graph + multimap inverse_graph; + + if (candidates.empty()) + return; + + if (inverse_graph_cache_ptr == NULL) + inverse_graph_cache_ptr = &inverse_graph; + if (inverse_graph_cache_ptr->empty()) + { + multimap graph; + db.get_revision_ancestry(graph); + for (multimap::const_iterator i = graph.begin(); + i != graph.end(); ++i) + inverse_graph_cache_ptr->insert(make_pair(i->second, i->first)); + } + + // Keep a set of all ancestors that we've traversed -- to avoid + // combinatorial explosion. + set all_ancestors; + + rev_height min_height; + db.get_rev_height(*candidates.begin(), min_height); + for (std::set::const_iterator it = candidates.begin(); it != candidates.end(); it++) + { + rev_height h; + db.get_rev_height(*it, h); + if (h < min_height) + min_height = h; + } + + vector todo(candidates.begin(), candidates.end()); + std::random_shuffle(todo.begin(), todo.end()); + + size_t predicates = 0; + while (!todo.empty()) + { + revision_id rid = todo.back(); + todo.pop_back(); + // check if this one has already been eliminated + if (all_ancestors.find(rid) != all_ancestors.end()) + continue; + // and then whether it actually should stay in the running: + ++predicates; + if (p(rid)) + { + candidates.erase(rid); + continue; + } + // okay, it is good enough that all its ancestors should be + // eliminated + accumulate_strict_ancestors(db, rid, all_ancestors, *inverse_graph_cache_ptr, min_height); + } + + // now go and eliminate the ancestors + for (set::const_iterator i = all_ancestors.begin(); + i != all_ancestors.end(); ++i) + candidates.erase(*i); + + L(FL("called predicate %s times") % predicates); +} + +// This function looks at a set of revisions, and for every pair A, B in that +// set such that A is an ancestor of B, it erases A. + +namespace +{ + struct no_failures : public is_failure + { + virtual bool operator()(revision_id const & rid) + { + return false; + } + }; +} +void +erase_ancestors(database & db, set & revisions) +{ + no_failures p; + erase_ancestors_and_failures(db, revisions, p); +} + +// This function takes a revision A and a set of revision Bs, calculates the +// ancestry of each, and returns the set of revisions that are in A's ancestry +// but not in the ancestry of any of the Bs. It tells you 'what's new' in A +// that's not in the Bs. If the output set if non-empty, then A will +// certainly be in it; but the output set might be empty. +void +ancestry_difference(database & db, revision_id const & a, + set const & bs, + set & new_stuff) +{ + new_stuff.clear(); + typedef multimap::const_iterator gi; + multimap graph; + multimap inverse_graph; + + db.get_revision_ancestry(graph); + for (gi i = graph.begin(); i != graph.end(); ++i) + inverse_graph.insert(make_pair(i->second, i->first)); + + interner intern; + map< ctx, shared_bitmap > ancestors; + + shared_bitmap u = shared_bitmap(new bitmap()); + + for (set::const_iterator i = bs.begin(); + i != bs.end(); ++i) + { + calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u); + ctx c = intern.intern(i->inner()()); + if (u->size() <= c) + u->resize(c + 1); + u->set(c); + } + + shared_bitmap au = shared_bitmap(new bitmap()); + calculate_ancestors_from_graph(intern, a, inverse_graph, ancestors, au); + { + ctx c = intern.intern(a.inner()()); + if (au->size() <= c) + au->resize(c + 1); + au->set(c); + } + + au->resize(max(au->size(), u->size())); + u->resize(max(au->size(), u->size())); + + *au -= *u; + + for (unsigned int i = 0; i != au->size(); ++i) + { + if (au->test(i)) + { + revision_id rid(intern.lookup(i), origin::internal); + if (!null_id(rid)) + new_stuff.insert(rid); + } + } +} + +void +select_nodes_modified_by_rev(database & db, + revision_t const & rev, + roster_t const new_roster, + set & nodes_modified) +{ + nodes_modified.clear(); + + for (edge_map::const_iterator i = rev.edges.begin(); + i != rev.edges.end(); ++i) + { + set edge_nodes_modified; + roster_t old_roster; + db.get_roster(edge_old_revision(i), old_roster); + select_nodes_modified_by_cset(edge_changes(i), + old_roster, + new_roster, + edge_nodes_modified); + + copy(edge_nodes_modified.begin(), edge_nodes_modified.end(), + inserter(nodes_modified, nodes_modified.begin())); + } +} + +// These functions create new ancestry! + +namespace { + struct true_node_id_source + : public node_id_source + { + true_node_id_source(database & db) : db(db) {} + virtual node_id next() + { + node_id n = db.next_node_id(); + I(!temp_node(n)); + return n; + } + database & db; + }; +} + +// WARNING: these functions have no unit tests. All the real work +// should be done in the alternative overloads in roster.cc, where it +// can be unit tested. (See comments in that file for further explanation.) +static void +make_roster_for_merge(revision_t const & rev, revision_id const & new_rid, + roster_t & new_roster, marking_map & new_markings, + database & db, node_id_source & nis) +{ + edge_map::const_iterator i = rev.edges.begin(); + revision_id const & left_rid = edge_old_revision(i); + cset const & left_cs = edge_changes(i); + ++i; + revision_id const & right_rid = edge_old_revision(i); + cset const & right_cs = edge_changes(i); + + I(!null_id(left_rid) && !null_id(right_rid)); + cached_roster left_cached, right_cached; + db.get_roster(left_rid, left_cached); + db.get_roster(right_rid, right_cached); + + set left_uncommon_ancestors, right_uncommon_ancestors; + db.get_uncommon_ancestors(left_rid, right_rid, + left_uncommon_ancestors, + right_uncommon_ancestors); + + make_roster_for_merge(left_rid, *left_cached.first, *left_cached.second, + left_cs, left_uncommon_ancestors, + right_rid, *right_cached.first, *right_cached.second, + right_cs, right_uncommon_ancestors, + new_rid, + new_roster, new_markings, + nis); +} + +static void +make_roster_for_nonmerge(revision_t const & rev, + revision_id const & new_rid, + roster_t & new_roster, marking_map & new_markings, + database & db, node_id_source & nis) +{ + revision_id const & parent_rid = edge_old_revision(rev.edges.begin()); + cset const & parent_cs = edge_changes(rev.edges.begin()); + db.get_roster(parent_rid, new_roster, new_markings); + make_roster_for_nonmerge(parent_cs, new_rid, new_roster, new_markings, nis); +} + +void +make_roster_for_revision(database & db, node_id_source & nis, + revision_t const & rev, revision_id const & new_rid, + roster_t & new_roster, marking_map & new_markings) +{ + MM(rev); + MM(new_rid); + MM(new_roster); + MM(new_markings); + if (rev.edges.size() == 1) + make_roster_for_nonmerge(rev, new_rid, new_roster, new_markings, db, nis); + else if (rev.edges.size() == 2) + make_roster_for_merge(rev, new_rid, new_roster, new_markings, db, nis); + else + I(false); + + // If nis is not a true_node_id_source, we have to assume we can get temp + // node ids out of it. ??? Provide a predicate method on node_id_sources + // instead of doing a typeinfo comparison. + new_roster.check_sane_against(new_markings, + typeid(nis) != typeid(true_node_id_source)); +} + +void +make_roster_for_revision(database & db, + revision_t const & rev, revision_id const & new_rid, + roster_t & new_roster, marking_map & new_markings) +{ + true_node_id_source nis(db); + make_roster_for_revision(db, nis, rev, new_rid, new_roster, new_markings); +} + + + + +// Local Variables: +// mode: C++ +// fill-column: 76 +// c-file-style: "gnu" +// indent-tabs-mode: nil +// End: +// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s: ============================================================ --- Makefile.am 61aa922983ae89a8bb50cf5ad203cbadf1e943fb +++ Makefile.am 607163f9d88dec6858dfada0e5b0169fa5ec7bb4 @@ -53,7 +53,7 @@ MOST_SOURCES = \ merkle_tree.cc merkle_tree.hh \ lcs.cc lcs.hh \ rcs_import.cc rcs_import.hh \ - revision.cc revision.hh \ + revision.cc ancestry.cc revision.hh \ cset.cc cset.hh \ roster.cc roster.hh \ mt_version.cc mt_version.hh \ @@ -313,11 +313,11 @@ UNIT_TEST_OBJ_SUPPORT = \ # testing, but can be used "as is" from the main build. (many of # these _should_ have unit tests, but they haven't been written yet.) UNIT_TEST_OBJ_SUPPORT = \ - mtn-cert.$(OBJEXT) mtn-constants.$(OBJEXT) \ - mtn-database.$(OBJEXT) mtn-epoch.$(OBJEXT) \ - mtn-file_io.$(OBJEXT) mtn-gzip.$(OBJEXT) mtn-hmac.$(OBJEXT) \ - mtn-inodeprint.$(OBJEXT) mtn-key_store.$(OBJEXT) \ - mtn-keys.$(OBJEXT) mtn-lcs.$(OBJEXT) mtn-legacy.$(OBJEXT) \ + mtn-ancestry.$(OBJEXT) mtn-cert.$(OBJEXT) \ + mtn-constants.$(OBJEXT) mtn-database.$(OBJEXT) \ + mtn-epoch.$(OBJEXT) mtn-file_io.$(OBJEXT) mtn-gzip.$(OBJEXT) \ + mtn-hmac.$(OBJEXT) mtn-inodeprint.$(OBJEXT) \ + mtn-key_store.$(OBJEXT) mtn-keys.$(OBJEXT) mtn-lcs.$(OBJEXT) \ mtn-lua.$(OBJEXT) mtn-lua_hooks.$(OBJEXT) \ mtn-merkle_tree.$(OBJEXT) mtn-pcrewrap.$(OBJEXT) \ mtn-project.$(OBJEXT) mtn-sanity.$(OBJEXT) \ ============================================================ --- database.hh 31ba75343ca21813eb814b814d6576440f4f0cfc +++ database.hh a4a07097a63f5877e7d4b5ca36f39ad263ff685c @@ -443,51 +443,6 @@ void check_db(database & db); // not a member function, defined in database_check.cc void check_db(database & db); -// Parent maps are used in a number of places to keep track of all the -// parent rosters of a given revision. - -inline revision_id const & parent_id(parent_entry const & p) -{ - return p.first; -} - -inline revision_id const & parent_id(parent_map::const_iterator i) -{ - return i->first; -} - -inline cached_roster const & -parent_cached_roster(parent_entry const & p) -{ - return p.second; -} - -inline cached_roster const & -parent_cached_roster(parent_map::const_iterator i) -{ - return i->second; -} - -inline roster_t const & parent_roster(parent_entry const & p) -{ - return *(p.second.first); -} - -inline roster_t const & parent_roster(parent_map::const_iterator i) -{ - return *(i->second.first); -} - -inline marking_map const & parent_marking(parent_entry const & p) -{ - return *(p.second.second); -} - -inline marking_map const & parent_marking(parent_map::const_iterator i) -{ - return *(i->second.second); -} - // Transaction guards nest. Acquire one in any scope you'd like // transaction-protected, and it'll make sure the db aborts a transaction // if there's any exception before you call commit(). ============================================================ --- rev_types.hh 99327382c7c617bc5965569a55d7789d95b0fa99 +++ rev_types.hh af3c5cf58a78153d7b6a515a25dd4d6d584b851f @@ -35,6 +35,12 @@ struct editable_tree; struct cset; struct editable_tree; +const node_id first_temp_node = 1U << (sizeof(node_id) * 8 - 1); +inline bool temp_node(node_id n) +{ + return n & first_temp_node; +} + // full definitions in graph.hh struct rev_graph; struct reconstruction_graph; ============================================================ --- revision.cc c265c4dfdc6ad23d44811a145d47cc43c263fa3b +++ revision.cc 5b5f6c11cf218a22285d6939f14a1d5d64d3e482 @@ -8,35 +8,19 @@ // PURPOSE. #include "base.hh" -#include "sanity.hh" #include "revision.hh" -#include "cset.hh" -#include "rev_height.hh" #include "roster.hh" -#include "database.hh" -#include "interner.hh" +#include "sanity.hh" #include "basic_io.hh" #include "transforms.hh" #include "safe_map.hh" -#include "vector.hh" -#include -#include -#include #include using std::make_pair; using std::map; -using std::max; -using std::multimap; -using std::pair; -using std::set; -using std::stack; using std::string; -using std::vector; - -using boost::dynamic_bitset; using boost::shared_ptr; void revision_t::check_sane() const @@ -106,444 +90,7 @@ revision_t::operator=(revision_t const & return *this; } - -// For a surprisingly long time, we have been using an algorithm which -// is nonsense, based on a misunderstanding of what "LCA" means. The -// LCA of two nodes is *not* the first common ancestor which you find -// when iteratively expanding their ancestor sets. Instead, the LCA is -// the common ancestor which is a descendent of all other common -// ancestors. -// -// In general, a set of nodes in a DAG doesn't always have an -// LCA. There might be multiple common ancestors which are not parents -// of one another. So we implement something which is "functionally -// useful" for finding a merge point (and moreover, which always -// terminates): we find an LCA of the input set if it exists, -// otherwise we replace the input set with the nodes we did find and -// repeat. -// -// All previous discussions in monotone-land, before say August 2005, -// of LCA (and LCAD) are essentially wrong due to our silly -// misunderstanding. It's unfortunate, but our half-baked -// approximations worked almost well enough to take us through 3 years -// of deployed use. Hopefully this more accurate new use will serve us -// even longer. - -typedef unsigned long ctx; -typedef dynamic_bitset<> bitmap; -typedef shared_ptr shared_bitmap; - -static void -calculate_ancestors_from_graph(interner & intern, - revision_id const & init, - multimap const & graph, - map< ctx, shared_bitmap > & ancestors, - shared_bitmap & total_union); - void -find_common_ancestor_for_merge(database & db, - revision_id const & left, - revision_id const & right, - revision_id & anc) -{ - interner intern; - set leaves; - map ancestors; - - shared_bitmap isect = shared_bitmap(new bitmap()); - shared_bitmap isect_ancs = shared_bitmap(new bitmap()); - - leaves.insert(intern.intern(left.inner()())); - leaves.insert(intern.intern(right.inner()())); - - - multimap inverse_graph; - { - multimap graph; - db.get_revision_ancestry(graph); - typedef multimap::const_iterator gi; - for (gi i = graph.begin(); i != graph.end(); ++i) - inverse_graph.insert(make_pair(i->second, i->first)); - } - - - while (leaves.size() != 1) - { - isect->clear(); - isect_ancs->clear(); - - // First intersect all ancestors of current leaf set - for (set::const_iterator i = leaves.begin(); i != leaves.end(); ++i) - { - ctx curr_leaf = *i; - shared_bitmap curr_leaf_ancestors; - map::const_iterator j = ancestors.find(*i); - if (j != ancestors.end()) - curr_leaf_ancestors = j->second; - else - { - curr_leaf_ancestors = shared_bitmap(new bitmap()); - calculate_ancestors_from_graph(intern, revision_id(intern.lookup(curr_leaf), - origin::internal), - inverse_graph, ancestors, - curr_leaf_ancestors); - } - if (isect->size() > curr_leaf_ancestors->size()) - curr_leaf_ancestors->resize(isect->size()); - - if (curr_leaf_ancestors->size() > isect->size()) - isect->resize(curr_leaf_ancestors->size()); - - if (i == leaves.begin()) - *isect = *curr_leaf_ancestors; - else - (*isect) &= (*curr_leaf_ancestors); - } - - // isect is now the set of common ancestors of leaves, but that is not enough. - // We need the set of leaves of isect; to do that we calculate the set of - // ancestors of isect, in order to subtract it from isect (below). - set new_leaves; - for (ctx i = 0; i < isect->size(); ++i) - { - if (isect->test(i)) - { - calculate_ancestors_from_graph(intern, revision_id(intern.lookup(i), - origin::internal), - inverse_graph, ancestors, isect_ancs); - } - } - - // Finally, the subtraction step: for any element i of isect, if - // it's *not* in isect_ancs, it survives as a new leaf. - leaves.clear(); - for (ctx i = 0; i < isect->size(); ++i) - { - if (!isect->test(i)) - continue; - if (i < isect_ancs->size() && isect_ancs->test(i)) - continue; - safe_insert(leaves, i); - } - } - - I(leaves.size() == 1); - anc = revision_id(intern.lookup(*leaves.begin()), origin::internal); -} - -static void -add_bitset_to_union(shared_bitmap src, - shared_bitmap dst) -{ - if (dst->size() > src->size()) - src->resize(dst->size()); - if (src->size() > dst->size()) - dst->resize(src->size()); - *dst |= *src; -} - - -static void -calculate_ancestors_from_graph(interner & intern, - revision_id const & init, - multimap const & graph, - map< ctx, shared_bitmap > & ancestors, - shared_bitmap & total_union) -{ - typedef multimap::const_iterator gi; - stack stk; - - stk.push(intern.intern(init.inner()())); - - while (! stk.empty()) - { - ctx us = stk.top(); - revision_id rev(intern.lookup(us), origin::internal); - - pair parents = graph.equal_range(rev); - bool pushed = false; - - // first make sure all parents are done - for (gi i = parents.first; i != parents.second; ++i) - { - ctx parent = intern.intern(i->second.inner()()); - if (ancestors.find(parent) == ancestors.end()) - { - stk.push(parent); - pushed = true; - break; - } - } - - // if we pushed anything we stop now. we'll come back later when all - // the parents are done. - if (pushed) - continue; - - shared_bitmap b = shared_bitmap(new bitmap()); - - for (gi i = parents.first; i != parents.second; ++i) - { - ctx parent = intern.intern(i->second.inner()()); - - // set all parents - if (b->size() <= parent) - b->resize(parent + 1); - b->set(parent); - - // ensure all parents are loaded into the ancestor map - I(ancestors.find(parent) != ancestors.end()); - - // union them into our map - map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent); - I(j != ancestors.end()); - add_bitset_to_union(j->second, b); - } - - add_bitset_to_union(b, total_union); - ancestors.insert(make_pair(us, b)); - stk.pop(); - } -} - -void -toposort(database & db, - set const & revisions, - vector & sorted) -{ - map work; - - for (set::const_iterator i = revisions.begin(); - i != revisions.end(); ++i) - { - rev_height height; - db.get_rev_height(*i, height); - work.insert(make_pair(height, *i)); - } - - sorted.clear(); - - for (map::const_iterator i = work.begin(); - i != work.end(); ++i) - { - sorted.push_back(i->second); - } -} - -static void -accumulate_strict_ancestors(database & db, - revision_id const & start, - set & all_ancestors, - multimap const & inverse_graph, - rev_height const & min_height) -{ - typedef multimap::const_iterator gi; - - vector frontier; - frontier.push_back(start); - - while (!frontier.empty()) - { - revision_id rid = frontier.back(); - frontier.pop_back(); - pair parents = inverse_graph.equal_range(rid); - for (gi i = parents.first; i != parents.second; ++i) - { - revision_id const & parent = i->second; - if (all_ancestors.find(parent) == all_ancestors.end()) - { - // prune if we're below min_height - rev_height h; - db.get_rev_height(parent, h); - if (h >= min_height) - { - all_ancestors.insert(parent); - frontier.push_back(parent); - } - } - } - } -} - -// this call is equivalent to running: -// erase(remove_if(candidates.begin(), candidates.end(), p)); -// erase_ancestors(candidates, db); -// however, by interleaving the two operations, it can in common cases make -// many fewer calls to the predicate, which can be a significant speed win. - -void -erase_ancestors_and_failures(database & db, - std::set & candidates, - is_failure & p, - multimap *inverse_graph_cache_ptr) -{ - // Load up the ancestry graph - multimap inverse_graph; - - if (candidates.empty()) - return; - - if (inverse_graph_cache_ptr == NULL) - inverse_graph_cache_ptr = &inverse_graph; - if (inverse_graph_cache_ptr->empty()) - { - multimap graph; - db.get_revision_ancestry(graph); - for (multimap::const_iterator i = graph.begin(); - i != graph.end(); ++i) - inverse_graph_cache_ptr->insert(make_pair(i->second, i->first)); - } - - // Keep a set of all ancestors that we've traversed -- to avoid - // combinatorial explosion. - set all_ancestors; - - rev_height min_height; - db.get_rev_height(*candidates.begin(), min_height); - for (std::set::const_iterator it = candidates.begin(); it != candidates.end(); it++) - { - rev_height h; - db.get_rev_height(*it, h); - if (h < min_height) - min_height = h; - } - - vector todo(candidates.begin(), candidates.end()); - std::random_shuffle(todo.begin(), todo.end()); - - size_t predicates = 0; - while (!todo.empty()) - { - revision_id rid = todo.back(); - todo.pop_back(); - // check if this one has already been eliminated - if (all_ancestors.find(rid) != all_ancestors.end()) - continue; - // and then whether it actually should stay in the running: - ++predicates; - if (p(rid)) - { - candidates.erase(rid); - continue; - } - // okay, it is good enough that all its ancestors should be - // eliminated - accumulate_strict_ancestors(db, rid, all_ancestors, *inverse_graph_cache_ptr, min_height); - } - - // now go and eliminate the ancestors - for (set::const_iterator i = all_ancestors.begin(); - i != all_ancestors.end(); ++i) - candidates.erase(*i); - - L(FL("called predicate %s times") % predicates); -} - -// This function looks at a set of revisions, and for every pair A, B in that -// set such that A is an ancestor of B, it erases A. - -namespace -{ - struct no_failures : public is_failure - { - virtual bool operator()(revision_id const & rid) - { - return false; - } - }; -} -void -erase_ancestors(database & db, set & revisions) -{ - no_failures p; - erase_ancestors_and_failures(db, revisions, p); -} - -// This function takes a revision A and a set of revision Bs, calculates the -// ancestry of each, and returns the set of revisions that are in A's ancestry -// but not in the ancestry of any of the Bs. It tells you 'what's new' in A -// that's not in the Bs. If the output set if non-empty, then A will -// certainly be in it; but the output set might be empty. -void -ancestry_difference(database & db, revision_id const & a, - set const & bs, - set & new_stuff) -{ - new_stuff.clear(); - typedef multimap::const_iterator gi; - multimap graph; - multimap inverse_graph; - - db.get_revision_ancestry(graph); - for (gi i = graph.begin(); i != graph.end(); ++i) - inverse_graph.insert(make_pair(i->second, i->first)); - - interner intern; - map< ctx, shared_bitmap > ancestors; - - shared_bitmap u = shared_bitmap(new bitmap()); - - for (set::const_iterator i = bs.begin(); - i != bs.end(); ++i) - { - calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u); - ctx c = intern.intern(i->inner()()); - if (u->size() <= c) - u->resize(c + 1); - u->set(c); - } - - shared_bitmap au = shared_bitmap(new bitmap()); - calculate_ancestors_from_graph(intern, a, inverse_graph, ancestors, au); - { - ctx c = intern.intern(a.inner()()); - if (au->size() <= c) - au->resize(c + 1); - au->set(c); - } - - au->resize(max(au->size(), u->size())); - u->resize(max(au->size(), u->size())); - - *au -= *u; - - for (unsigned int i = 0; i != au->size(); ++i) - { - if (au->test(i)) - { - revision_id rid(intern.lookup(i), origin::internal); - if (!null_id(rid)) - new_stuff.insert(rid); - } - } -} - -void -select_nodes_modified_by_rev(database & db, - revision_t const & rev, - roster_t const new_roster, - set & nodes_modified) -{ - nodes_modified.clear(); - - for (edge_map::const_iterator i = rev.edges.begin(); - i != rev.edges.end(); ++i) - { - set edge_nodes_modified; - roster_t old_roster; - db.get_roster(edge_old_revision(i), old_roster); - select_nodes_modified_by_cset(edge_changes(i), - old_roster, - new_roster, - edge_nodes_modified); - - copy(edge_nodes_modified.begin(), edge_nodes_modified.end(), - inserter(nodes_modified, nodes_modified.begin())); - } -} - - -void make_revision(revision_id const & old_rev_id, roster_t const & old_roster, roster_t const & new_roster, ============================================================ --- roster.cc 98ea42f99e59a884a7f00dfab6a76334c5b89772 +++ roster.cc 2ffc18c20c2e53c540b7312a125681d60a6778f9 @@ -130,43 +130,32 @@ dump(marking_map const & markings, strin out = oss.str(); } -namespace -{ - // - // We have a few concepts of "nullness" here: - // - // - the_null_node is a node_id. It does not correspond to a real node; - // it's an id you use for the parent of the root, or of any node which - // is detached. - // - // - the root node has a real node id, just like any other directory. - // - // - the path_component whose string representation is "", the empty - // string, is the *name* of the root node. write it as - // path_component() and test for it with component.empty(). - // - // - similarly, the file_path whose string representation is "" also - // names the root node. write it as file_path() and test for it - // with path.empty(). - // - // - there is no file_path or path_component corresponding to the_null_node. - // - // We do this in order to support the notion of moving the root directory - // around, or applying attributes to the root directory. Note that the - // only supported way to move the root is with the 'pivot_root' operation, - // which atomically turns the root directory into a subdirectory and some - // existing subdirectory into the root directory. This is an UI constraint, - // not a constraint at this level. +// +// We have a few concepts of "nullness" here: +// +// - the_null_node is a node_id. It does not correspond to a real node; +// it's an id you use for the parent of the root, or of any node which +// is detached. +// +// - the root node has a real node id, just like any other directory. +// +// - the path_component whose string representation is "", the empty +// string, is the *name* of the root node. write it as +// path_component() and test for it with component.empty(). +// +// - similarly, the file_path whose string representation is "" also +// names the root node. write it as file_path() and test for it +// with path.empty(). +// +// - there is no file_path or path_component corresponding to the_null_node. +// +// We do this in order to support the notion of moving the root directory +// around, or applying attributes to the root directory. Note that the +// only supported way to move the root is with the 'pivot_root' operation, +// which atomically turns the root directory into a subdirectory and some +// existing subdirectory into the root directory. This is an UI constraint, +// not a constraint at this level. - const node_id first_node = 1; - const node_id first_temp_node = widen(1) << (sizeof(node_id) * 8 - 1); - inline bool temp_node(node_id n) - { - return n & first_temp_node; - } -} - - node::node(node_id i) : self(i), parent(the_null_node), @@ -1213,20 +1202,6 @@ namespace namespace { - struct true_node_id_source - : public node_id_source - { - true_node_id_source(database & db) : db(db) {} - virtual node_id next() - { - node_id n = db.next_node_id(); - I(!temp_node(n)); - return n; - } - database & db; - }; - - class editable_roster_for_merge : public editable_roster_base { @@ -1811,158 +1786,116 @@ namespace { marking_map & markings; }; - // Interface note: make_roster_for_merge and make_roster_for_nonmerge - // each exist in two variants: - // - // 1. A variant that does all of the actual work, taking every single - // relevant base-level data object as a separate argument. This - // variant is called directly by the unit tests, and also by variant 2. - // - // 2. A variant that takes a revision object, a revision ID, a database, - // and a node_id_source. This variant uses those four things to look - // up all of the low-level data required by variant 1, then calls - // variant 1 to get the real work done. This is the one called by - // (one variant of) make_roster_for_revision. +} // anonymous namespace - // yes, this function takes 14 arguments. I'm very sorry. - void - make_roster_for_merge(revision_id const & left_rid, - roster_t const & left_roster, - marking_map const & left_markings, - cset const & left_cs, - set const & left_uncommon_ancestors, +// Interface note: make_roster_for_merge and make_roster_for_nonmerge +// each exist in two variants: +// +// 1. A variant that does all of the actual work, taking every single +// relevant base-level data object as a separate argument. This +// variant is called directly by the unit tests, and also by variant 2. +// It is defined in this file. +// +// 2. A variant that takes a revision object, a revision ID, a database, +// and a node_id_source. This variant uses those four things to look +// up all of the low-level data required by variant 1, then calls +// variant 1 to get the real work done. This is the one called by +// (one variant of) make_roster_for_revision. +// It, and make_roster_for_revision, is defined in ancestry.cc. - revision_id const & right_rid, - roster_t const & right_roster, - marking_map const & right_markings, - cset const & right_cs, - set const & right_uncommon_ancestors, +// yes, this function takes 14 arguments. I'm very sorry. +void +make_roster_for_merge(revision_id const & left_rid, + roster_t const & left_roster, + marking_map const & left_markings, + cset const & left_cs, + set const & left_uncommon_ancestors, - revision_id const & new_rid, - roster_t & new_roster, - marking_map & new_markings, - node_id_source & nis) + revision_id const & right_rid, + roster_t const & right_roster, + marking_map const & right_markings, + cset const & right_cs, + set const & right_uncommon_ancestors, + + revision_id const & new_rid, + roster_t & new_roster, + marking_map & new_markings, + node_id_source & nis) +{ + I(!null_id(left_rid) && !null_id(right_rid)); + I(left_uncommon_ancestors.find(left_rid) != left_uncommon_ancestors.end()); + I(left_uncommon_ancestors.find(right_rid) == left_uncommon_ancestors.end()); + I(right_uncommon_ancestors.find(right_rid) != right_uncommon_ancestors.end()); + I(right_uncommon_ancestors.find(left_rid) == right_uncommon_ancestors.end()); + MM(left_rid); + MM(left_roster); + MM(left_markings); + MM(left_cs); + MM(left_uncommon_ancestors); + MM(right_rid); + MM(right_roster); + MM(right_markings); + MM(right_cs); + MM(right_uncommon_ancestors); + MM(new_rid); + MM(new_roster); + MM(new_markings); { - I(!null_id(left_rid) && !null_id(right_rid)); - I(left_uncommon_ancestors.find(left_rid) != left_uncommon_ancestors.end()); - I(left_uncommon_ancestors.find(right_rid) == left_uncommon_ancestors.end()); - I(right_uncommon_ancestors.find(right_rid) != right_uncommon_ancestors.end()); - I(right_uncommon_ancestors.find(left_rid) == right_uncommon_ancestors.end()); - MM(left_rid); - MM(left_roster); - MM(left_markings); - MM(left_cs); - MM(left_uncommon_ancestors); - MM(right_rid); - MM(right_roster); - MM(right_markings); - MM(right_cs); - MM(right_uncommon_ancestors); - MM(new_rid); - MM(new_roster); - MM(new_markings); - { - temp_node_id_source temp_nis; - new_roster = left_roster; - roster_t from_right_r(right_roster); - MM(from_right_r); + temp_node_id_source temp_nis; + new_roster = left_roster; + roster_t from_right_r(right_roster); + MM(from_right_r); - editable_roster_for_merge from_left_er(new_roster, temp_nis); - editable_roster_for_merge from_right_er(from_right_r, temp_nis); + editable_roster_for_merge from_left_er(new_roster, temp_nis); + editable_roster_for_merge from_right_er(from_right_r, temp_nis); - left_cs.apply_to(from_left_er); - right_cs.apply_to(from_right_er); + left_cs.apply_to(from_left_er); + right_cs.apply_to(from_right_er); - unify_rosters(new_roster, from_left_er.new_nodes, - from_right_r, from_right_er.new_nodes, - nis); + unify_rosters(new_roster, from_left_er.new_nodes, + from_right_r, from_right_er.new_nodes, + nis); - // Kluge: If both csets have no content changes, and the node_id_source - // passed to this function is a temp_node_id_source, then we are being - // called from get_current_roster_shape, and we should not attempt to - // verify that these rosters match as far as content IDs. - if (left_cs.deltas_applied.empty() - && right_cs.deltas_applied.empty() - && typeid(nis) == typeid(temp_node_id_source)) - I(equal_shapes(new_roster, from_right_r)); - else - I(new_roster == from_right_r); - } - - // SPEEDUP?: instead of constructing new marking from scratch, track which - // nodes were modified, and scan only them - // load one of the parent markings directly into the new marking map - new_markings.clear(); - mark_merge_roster(left_roster, left_markings, left_uncommon_ancestors, - right_roster, right_markings, right_uncommon_ancestors, - new_rid, new_roster, new_markings); + // Kluge: If both csets have no content changes, and the node_id_source + // passed to this function is a temp_node_id_source, then we are being + // called from get_current_roster_shape, and we should not attempt to + // verify that these rosters match as far as content IDs. + if (left_cs.deltas_applied.empty() + && right_cs.deltas_applied.empty() + && typeid(nis) == typeid(temp_node_id_source)) + I(equal_shapes(new_roster, from_right_r)); + else + I(new_roster == from_right_r); } + // SPEEDUP?: instead of constructing new marking from scratch, track which + // nodes were modified, and scan only them + // load one of the parent markings directly into the new marking map + new_markings.clear(); + mark_merge_roster(left_roster, left_markings, left_uncommon_ancestors, + right_roster, right_markings, right_uncommon_ancestors, + new_rid, new_roster, new_markings); +} - // WARNING: this function is not tested directly (no unit tests). Do not - // put real logic in it. - void - make_roster_for_merge(revision_t const & rev, revision_id const & new_rid, - roster_t & new_roster, marking_map & new_markings, - database & db, node_id_source & nis) - { - edge_map::const_iterator i = rev.edges.begin(); - revision_id const & left_rid = edge_old_revision(i); - cset const & left_cs = edge_changes(i); - ++i; - revision_id const & right_rid = edge_old_revision(i); - cset const & right_cs = edge_changes(i); - I(!null_id(left_rid) && !null_id(right_rid)); - cached_roster left_cached, right_cached; - db.get_roster(left_rid, left_cached); - db.get_roster(right_rid, right_cached); - set left_uncommon_ancestors, right_uncommon_ancestors; - db.get_uncommon_ancestors(left_rid, right_rid, - left_uncommon_ancestors, - right_uncommon_ancestors); - - make_roster_for_merge(left_rid, *left_cached.first, *left_cached.second, - left_cs, left_uncommon_ancestors, - right_rid, *right_cached.first, *right_cached.second, - right_cs, right_uncommon_ancestors, - new_rid, - new_roster, new_markings, - nis); - } - - // Warning: this function expects the parent's roster and markings in the - // 'new_roster' and 'new_markings' parameters, and they are modified - // destructively! - // This function performs an almost identical task to - // mark_roster_with_one_parent; however, for efficiency, it is implemented - // in a different, destructive way. - void - make_roster_for_nonmerge(cset const & cs, - revision_id const & new_rid, - roster_t & new_roster, marking_map & new_markings, - node_id_source & nis) - { - editable_roster_for_nonmerge er(new_roster, nis, new_rid, new_markings); - cs.apply_to(er); - } - - // WARNING: this function is not tested directly (no unit tests). Do not - // put real logic in it. - void - make_roster_for_nonmerge(revision_t const & rev, - revision_id const & new_rid, - roster_t & new_roster, marking_map & new_markings, - database & db, node_id_source & nis) - { - revision_id const & parent_rid = edge_old_revision(rev.edges.begin()); - cset const & parent_cs = edge_changes(rev.edges.begin()); - db.get_roster(parent_rid, new_roster, new_markings); - make_roster_for_nonmerge(parent_cs, new_rid, new_roster, new_markings, nis); - } +// Warning: this function expects the parent's roster and markings in the +// 'new_roster' and 'new_markings' parameters, and they are modified +// destructively! +// This function performs an almost identical task to +// mark_roster_with_one_parent; however, for efficiency, it is implemented +// in a different, destructive way. +void +make_roster_for_nonmerge(cset const & cs, + revision_id const & new_rid, + roster_t & new_roster, marking_map & new_markings, + node_id_source & nis) +{ + editable_roster_for_nonmerge er(new_roster, nis, new_rid, new_markings); + cs.apply_to(er); } + void mark_roster_with_no_parents(revision_id const & rid, roster_t const & roster, @@ -2006,41 +1939,7 @@ mark_roster_with_one_parent(roster_t con child.check_sane_against(child_markings, true); } -// WARNING: this function is not tested directly (no unit tests). Do not put -// real logic in it. -void -make_roster_for_revision(database & db, node_id_source & nis, - revision_t const & rev, revision_id const & new_rid, - roster_t & new_roster, marking_map & new_markings) -{ - MM(rev); - MM(new_rid); - MM(new_roster); - MM(new_markings); - if (rev.edges.size() == 1) - make_roster_for_nonmerge(rev, new_rid, new_roster, new_markings, db, nis); - else if (rev.edges.size() == 2) - make_roster_for_merge(rev, new_rid, new_roster, new_markings, db, nis); - else - I(false); - // If nis is not a true_node_id_source, we have to assume we can get temp - // node ids out of it. ??? Provide a predicate method on node_id_sources - // instead of doing a typeinfo comparison. - new_roster.check_sane_against(new_markings, - typeid(nis) != typeid(true_node_id_source)); -} - -void -make_roster_for_revision(database & db, - revision_t const & rev, revision_id const & new_rid, - roster_t & new_roster, marking_map & new_markings) -{ - true_node_id_source nis(db); - make_roster_for_revision(db, nis, rev, new_rid, new_roster, new_markings); -} - - //////////////////////////////////////////////////////////////////// // Calculation of a cset //////////////////////////////////////////////////////////////////// @@ -3233,6 +3132,7 @@ void perform_random_action(roster_t & r, apply_cset_and_do_testing(r, c, nis); } +const node_id first_node = 1; testing_node_id_source::testing_node_id_source() : curr(first_node) {} ============================================================ --- roster.hh 61c885cf725e7b20bbbe5a8e595904a4edc38c14 +++ roster.hh ef1b40df5043af074ee007ea5efdb726e1b4112f @@ -363,6 +363,35 @@ mark_merge_roster(roster_t const & left_ roster_t const & merge, marking_map & new_markings); +// These functions are an internal interface between ancestry.cc and +// roster.cc; unless you know exactly what you're doing you probably want +// something else. + +void +make_roster_for_merge(revision_id const & left_rid, + roster_t const & left_roster, + marking_map const & left_markings, + cset const & left_cs, + std::set const & left_uncommon_ancestors, + + revision_id const & right_rid, + roster_t const & right_roster, + marking_map const & right_markings, + cset const & right_cs, + std::set const & right_uncommon_ancestors, + + revision_id const & new_rid, + roster_t & new_roster, + marking_map & new_markings, + node_id_source & nis); + +void +make_roster_for_nonmerge(cset const & cs, + revision_id const & new_rid, + roster_t & new_roster, marking_map & new_markings, + node_id_source & nis); + + // This is for revisions that are being written to the db, only. It assigns // permanent node ids. void @@ -404,6 +433,51 @@ void parse_marking(basic_io::parser & pa void push_marking(basic_io::stanza & st, bool is_file, marking_t const & mark); void parse_marking(basic_io::parser & pa, marking_t & marking); +// Parent maps are used in a number of places to keep track of all the +// parent rosters of a given revision. + +inline revision_id const & parent_id(parent_entry const & p) +{ + return p.first; +} + +inline revision_id const & parent_id(parent_map::const_iterator i) +{ + return i->first; +} + +inline cached_roster const & +parent_cached_roster(parent_entry const & p) +{ + return p.second; +} + +inline cached_roster const & +parent_cached_roster(parent_map::const_iterator i) +{ + return i->second; +} + +inline roster_t const & parent_roster(parent_entry const & p) +{ + return *(p.second.first); +} + +inline roster_t const & parent_roster(parent_map::const_iterator i) +{ + return *(i->second.first); +} + +inline marking_map const & parent_marking(parent_entry const & p) +{ + return *(p.second.second); +} + +inline marking_map const & parent_marking(parent_map::const_iterator i) +{ + return *(i->second.second); +} + #ifdef BUILD_UNIT_TESTS struct testing_node_id_source