# # # patch "database.cc" # from [c609818513960b330c4d833ba6abe267fa635c47] # to [3a4df9126b98bf1a5e863f32cc8cb83357d39ee2] # # patch "database.hh" # from [dbcf6a4985c8319fe05b5d012a4772316d00a07b] # to [aa0ee865053a7c27df33ab9906ce681db1e9d6c8] # # patch "graph.cc" # from [31b3824a0397177e00fddfb9e29c7134161a44e5] # to [8c67407d007b2849a6f287d086d0a82bd885329b] # # patch "graph.hh" # from [82374ad3b2f3c2da39e2c9f15018c309459114f1] # to [9791121bb0fb8d3063c97b32329fa513b51cd216] # # patch "revision.cc" # from [94ffffda8ea5623d05fef8d062b2a4401599578d] # to [3892413bb144d335f6ef06001d7f9a573792392a] # ============================================================ --- database.cc c609818513960b330c4d833ba6abe267fa635c47 +++ database.cc 3a4df9126b98bf1a5e863f32cc8cb83357d39ee2 @@ -1791,7 +1791,7 @@ void void -database::get_revision_ancestry(multimap & graph) +database::get_revision_ancestry(rev_ancestry_map & graph) { results res; graph.clear(); @@ -3241,40 +3241,26 @@ database::put_roster(revision_id const & guard.commit(); } -// helper for get_uncommon_ancestors -void -database::do_step_ancestor(set & this_frontier, - set & this_seen, - set const & other_seen, - set & this_uncommon_ancs) +// for get_uncommon_ancestors +struct rev_height_graph : rev_graph { - const height_rev_pair h_rev = *this_frontier.rbegin(); - const revision_id & rid(h_rev.second); - - this_frontier.erase(h_rev); - - if (other_seen.find(rid) == other_seen.end()) + rev_height_graph(database & db) : db(db) {} + virtual void get_parents(revision_id const & rev, set & parents) const { - this_uncommon_ancs.insert(rid); + db.get_revision_parents(rev, parents); } - - // extend the frontier with parents - results res; - fetch(res, 2, any_rows, - query("SELECT parent, height FROM revision_ancestry r, heights h" - " WHERE child = ? AND r.parent = h.revision") - % text(rid.inner()())); - - for (size_t i = 0; i < res.size(); ++i) + virtual void get_children(revision_id const & rev, set & parents) const { - revision_id par_rid(res[i][0]); - if (this_seen.find(par_rid) == this_seen.end()) - { - this_frontier.insert(make_pair(rev_height(res[i][1]), par_rid)); - this_seen.insert(par_rid); - } + // not required + I(false); } -} + virtual void get_height(revision_id const & rev, rev_height & h) const + { + db.get_rev_height(rev, h); + } + + database & db; +}; void database::get_uncommon_ancestors(revision_id const & a, @@ -3282,65 +3268,9 @@ database::get_uncommon_ancestors(revisio set & a_uncommon_ancs, set & b_uncommon_ancs) { - a_uncommon_ancs.clear(); - b_uncommon_ancs.clear(); - - // We extend a frontier from each revision until it reaches - // a revision that has been seen by the other frontier. By - // traversing in ascending height order we can ensure that - // any common ancestor will have been 'seen' by both sides - // before it is traversed. - - set a_frontier, b_frontier; - rev_height height; - get_rev_height(a, height); - a_frontier.insert(make_pair(height, a)); - get_rev_height(b, height); - b_frontier.insert(make_pair(height, b)); - set a_seen, b_seen; - a_seen.insert(a); - b_seen.insert(b); - - while (!a_frontier.empty() || !b_frontier.empty()) - { - // We take the leaf-most (ie highest) height entry from either - // a_frontier or b_frontier. - // If one of them is empty, we take entries from the other. - bool step_a; - if (a_frontier.empty()) - { - step_a = false; - } - else - { - if (!b_frontier.empty()) - { - if (a_frontier == b_frontier) - { - // if both frontiers are the same, then we can safely say that - // we've found all uncommon ancestors. This stopping condition - // can result in traversing more nodes than required, but is simple. - break; - } - step_a = (*a_frontier.rbegin() > *b_frontier.rbegin()); - } - else - { - // b is empty - step_a = true; - } - } - - if (step_a) - { - do_step_ancestor(a_frontier, a_seen, b_seen, a_uncommon_ancs); - } - else - { - do_step_ancestor(b_frontier, b_seen, a_seen, b_uncommon_ancs); - } - } + rev_height_graph graph(*this); + ::get_uncommon_ancestors(a, b, graph, a_uncommon_ancs, b_uncommon_ancs); } ============================================================ --- database.hh dbcf6a4985c8319fe05b5d012a4772316d00a07b +++ database.hh aa0ee865053a7c27df33ab9906ce681db1e9d6c8 @@ -26,6 +26,7 @@ int sqlite3_finalize(sqlite3_stmt *); #include "cleanup.hh" #include "roster.hh" #include "selectors.hh" +#include "graph.hh" // FIXME: would be better not to include this everywhere #include "outdated_indicator.hh" @@ -287,7 +288,7 @@ public: // --== The ancestry graph ==-- // public: - void get_revision_ancestry(std::multimap & graph); + void get_revision_ancestry(rev_ancestry_map & graph); void get_revision_parents(revision_id const & ident, std::set & parents); @@ -552,12 +553,6 @@ private: hexenc const & new_id, delta const & del, database & db); - typedef std::pair height_rev_pair; - void do_step_ancestor(std::set & this_frontier, - std::set & this_seen, - std::set const & other_seen, - std::set & this_uncommon_ancs); - public: // branches outdated_indicator get_branches(std::vector & names); ============================================================ --- graph.cc 31b3824a0397177e00fddfb9e29c7134161a44e5 +++ graph.cc 8c67407d007b2849a6f287d086d0a82bd885329b @@ -1,12 +1,22 @@ +#include +#include +#include #include #include "sanity.hh" #include "graph.hh" +#include "safe_map.hh" +#include "numeric_vocab.hh" using boost::shared_ptr; using std::string; using std::vector; using std::set; +using std::pair; +using std::map; +using std::multimap; +using std::make_pair; +using std::list; void get_reconstruction_path(std::string const & start, @@ -115,6 +125,7 @@ get_reconstruction_path(std::string cons path = *selected_path; } + #ifdef BUILD_UNIT_TESTS #include @@ -229,6 +240,435 @@ UNIT_TEST(graph, random_get_reconstructi #endif // BUILD_UNIT_TESTS + +// graph is a parent->child map +void toposort_rev_ancestry(rev_ancestry_map const & graph, + vector & revisions) +{ + typedef multimap::const_iterator gi; + typedef map::iterator pi; + + revisions.clear(); + // determine the number of parents for each rev + map pcount; + for (gi i = graph.begin(); i != graph.end(); ++i) + pcount.insert(make_pair(i->first, 0)); + for (gi i = graph.begin(); i != graph.end(); ++i) + ++(pcount[i->second]); + + // find the set of graph roots + list roots; + for (pi i = pcount.begin(); i != pcount.end(); ++i) + if(i->second==0) + roots.push_back(i->first); + + while (!roots.empty()) + { + revision_id cur = roots.front(); + roots.pop_front(); + if (!null_id(cur)) + revisions.push_back(cur); + + for(gi i = graph.lower_bound(cur); + i != graph.upper_bound(cur); i++) + if(--(pcount[i->second]) == 0) + roots.push_back(i->second); + } +} + + +// get_uncommon_ancestors +typedef std::pair height_rev_pair; + +static void +do_step_ancestor(set & this_frontier, + set & this_seen, + set const & other_seen, + set & this_uncommon_ancs, + rev_graph const & rg) +{ + const height_rev_pair h_node = *this_frontier.rbegin(); + const revision_id & node(h_node.second); + this_frontier.erase(h_node); + if (other_seen.find(node) == other_seen.end()) + { + this_uncommon_ancs.insert(node); + } + + // extend the frontier with parents + set parents; + rg.get_parents(node, parents); + for (set::const_iterator r = parents.begin(); + r != parents.end(); r++) + { + if (this_seen.find(*r) == this_seen.end()) + { + rev_height h; + rg.get_height(*r, h); + this_frontier.insert(make_pair(h, *r)); + this_seen.insert(*r); + } + } +} + + +void +get_uncommon_ancestors(revision_id const & a, + revision_id const & b, + rev_graph const & rg, + set & a_uncommon_ancs, + set & b_uncommon_ancs) +{ + a_uncommon_ancs.clear(); + b_uncommon_ancs.clear(); + + // We extend a frontier from each revision until it reaches + // a revision that has been seen by the other frontier. By + // traversing in ascending height order we can ensure that + // any common ancestor will have been 'seen' by both sides + // before it is traversed. + + set a_frontier, b_frontier; + { + rev_height h; + rg.get_height(a, h); + a_frontier.insert(make_pair(h, a)); + rg.get_height(b, h); + b_frontier.insert(make_pair(h, b)); + } + + set a_seen, b_seen; + a_seen.insert(a); + b_seen.insert(b); + + while (!a_frontier.empty() || !b_frontier.empty()) + { + // We take the leaf-most (ie highest) height entry from either + // a_frontier or b_frontier. + // If one of them is empty, we take entries from the other. + bool step_a; + if (a_frontier.empty()) + { + step_a = false; + } + else + { + if (!b_frontier.empty()) + { + if (a_frontier == b_frontier) + { + // if both frontiers are the same, then we can safely say that + // we've found all uncommon ancestors. This stopping condition + // can result in traversing more nodes than required, but is simple. + break; + } + step_a = (*a_frontier.rbegin() > *b_frontier.rbegin()); + } + else + { + // b is empty + step_a = true; + } + } + + if (step_a) + { + do_step_ancestor(a_frontier, a_seen, b_seen, a_uncommon_ancs, rg); + } + else + { + do_step_ancestor(b_frontier, b_seen, a_seen, b_uncommon_ancs, rg); + } + } +} + +#ifdef BUILD_UNIT_TESTS + +#include +#include "unit_tests.hh" +#include "randomizer.hh" +#include "roster.hh" + + +static void +get_all_ancestors(revision_id const & start, rev_ancestry_map const & child_to_parent_map, + set & ancestors) +{ + ancestors.clear(); + vector frontier; + frontier.push_back(start); + while (!frontier.empty()) + { + revision_id rid = frontier.back(); + frontier.pop_back(); + if (ancestors.find(rid) != ancestors.end()) + continue; + safe_insert(ancestors, rid); + typedef rev_ancestry_map::const_iterator ci; + pair range = child_to_parent_map.equal_range(rid); + for (ci i = range.first; i != range.second; ++i) + frontier.push_back(i->second); + } +} + +struct mock_rev_graph : rev_graph +{ + mock_rev_graph(rev_ancestry_map const & child_to_parent_map) + : child_to_parent_map(child_to_parent_map) + { + // assign sensible heights + height_map.clear(); + + // toposort expects parent->child + rev_ancestry_map parent_to_child; + for (rev_ancestry_map::const_iterator i = child_to_parent_map.begin(); + i != child_to_parent_map.end(); i++) + { + parent_to_child.insert(make_pair(i->second, i->first)); + } + vector topo_revs; + toposort_rev_ancestry(parent_to_child, topo_revs); + + // this is ugly but works. just give each one a sequential number. + rev_height top = rev_height::root_height(); + u32 num = 1; + for (vector::const_iterator r = topo_revs.begin(); + r != topo_revs.end(); r++, num++) + { + height_map.insert(make_pair(*r, top.child_height(num))); + } + } + + virtual void get_parents(revision_id const & node, set & parents) const + { + parents.clear(); + for (rev_ancestry_map::const_iterator i = child_to_parent_map.lower_bound(node); + i != child_to_parent_map.upper_bound(node); i++) + { + if (!null_id(i->second)) + safe_insert(parents, i->second); + } + } + + virtual void get_children(revision_id const & node, set & parents) const + { + // not required + I(false); + } + + virtual void get_height(revision_id const & rev, rev_height & h) const + { + MM(rev); + h = safe_get(height_map, rev); + } + + + rev_ancestry_map const & child_to_parent_map; + map height_map; +}; + + +static void +run_a_get_uncommon_ancestors_test(rev_ancestry_map const & child_to_parent_map, + revision_id const & left, revision_id const & right) +{ + set true_left_ancestors, true_right_ancestors; + get_all_ancestors(left, child_to_parent_map, true_left_ancestors); + get_all_ancestors(right, child_to_parent_map, true_right_ancestors); + set true_left_uncommon_ancestors, true_right_uncommon_ancestors; + MM(true_left_uncommon_ancestors); + MM(true_right_uncommon_ancestors); + set_difference(true_left_ancestors.begin(), true_left_ancestors.end(), + true_right_ancestors.begin(), true_right_ancestors.end(), + inserter(true_left_uncommon_ancestors, true_left_uncommon_ancestors.begin())); + set_difference(true_right_ancestors.begin(), true_right_ancestors.end(), + true_left_ancestors.begin(), true_left_ancestors.end(), + inserter(true_right_uncommon_ancestors, true_right_uncommon_ancestors.begin())); + + set calculated_left_uncommon_ancestors, calculated_right_uncommon_ancestors; + MM(calculated_left_uncommon_ancestors); + MM(calculated_right_uncommon_ancestors); + mock_rev_graph rg(child_to_parent_map); + get_uncommon_ancestors(left, right, rg, + calculated_left_uncommon_ancestors, + calculated_right_uncommon_ancestors); + I(calculated_left_uncommon_ancestors == true_left_uncommon_ancestors); + I(calculated_right_uncommon_ancestors == true_right_uncommon_ancestors); + get_uncommon_ancestors(right, left, rg, + calculated_right_uncommon_ancestors, + calculated_left_uncommon_ancestors); + I(calculated_left_uncommon_ancestors == true_left_uncommon_ancestors); + I(calculated_right_uncommon_ancestors == true_right_uncommon_ancestors); +} + +UNIT_TEST(graph, get_uncommon_ancestors_nasty_convexity_case) +{ + // This tests the nasty case described in the giant comment above + // get_uncommon_ancestors: + // + // 9 + // |\ . Extraneous dots brought to you by the + // 8 \ . Committee to Shut Up the C Preprocessor + // /| \ . (CSUCPP), and viewers like you and me. + // / | | + // / 7 | + // | | | + // | 6 | + // | | | + // | 5 | + // | | | + // | 4 | + // | | | + // | : | <-- insert arbitrarily many revisions at the ellipsis + // | : | + // | | | + // 1 2 3 + // \ / \ / + // L R + + rev_ancestry_map child_to_parent_map; + revision_id left(fake_id()), right(fake_id()); + revision_id one(fake_id()), two(fake_id()), eight(fake_id()), three(fake_id()), nine(fake_id()); + MM(left); + MM(right); + MM(one); + MM(two); + MM(three); + MM(eight); + MM(nine); + child_to_parent_map.insert(make_pair(left, one)); + child_to_parent_map.insert(make_pair(one, eight)); + child_to_parent_map.insert(make_pair(eight, nine)); + child_to_parent_map.insert(make_pair(right, three)); + child_to_parent_map.insert(make_pair(three, nine)); + + revision_id middle(fake_id()); + child_to_parent_map.insert(make_pair(left, two)); + child_to_parent_map.insert(make_pair(right, two)); + // We insert a _lot_ of revisions at the ellipsis, to make sure that + // whatever sort of step-size is used on the expansion, we can't take the + // entire middle portion in one big gulp and make the test pointless. + for (int i = 0; i != 1000; ++i) + { + revision_id next(fake_id()); + child_to_parent_map.insert(make_pair(middle, next)); + middle = next; + } + child_to_parent_map.insert(make_pair(middle, eight)); + + run_a_get_uncommon_ancestors_test(child_to_parent_map, left, right); +} + +double const new_root_freq = 0.05; +double const merge_node_freq = 0.2; +double const skip_up_freq = 0.5; + +static revision_id +pick_node_from_set(set const & heads, + randomizer & rng) +{ + I(!heads.empty()); + size_t which_start = rng.uniform(heads.size()); + set::const_iterator i = heads.begin(); + for (size_t j = 0; j != which_start; ++j) + ++i; + return *i; +} + +static revision_id +pick_node_or_ancestor(set const & heads, + rev_ancestry_map const & child_to_parent_map, + randomizer & rng) +{ + revision_id rev = pick_node_from_set(heads, rng); + // now we recurse up from this starting point + while (rng.bernoulli(skip_up_freq)) + { + typedef rev_ancestry_map::const_iterator ci; + pair range = child_to_parent_map.equal_range(rev); + if (range.first == range.second) + break; + ci second = range.first; + ++second; + if (second == range.second) + // there was only one parent + rev = range.first->second; + else + { + // there are two parents, pick one randomly + if (rng.flip()) + rev = range.first->second; + else + rev = second->second; + } + } + return rev; +} + +static void +make_random_graph(size_t num_nodes, + rev_ancestry_map & child_to_parent_map, + vector & nodes, + randomizer & rng) +{ + set heads; + + for (size_t i = 0; i != num_nodes; ++i) + { + revision_id new_rid = revision_id(fake_id()); + nodes.push_back(new_rid); + set parents; + if (heads.empty() || rng.bernoulli(new_root_freq)) + parents.insert(revision_id()); + else if (rng.bernoulli(merge_node_freq) && heads.size() > 1) + { + // maybe we'll pick the same node twice and end up not doing a + // merge, oh well... + parents.insert(pick_node_from_set(heads, rng)); + parents.insert(pick_node_from_set(heads, rng)); + } + else + { + parents.insert(pick_node_or_ancestor(heads, child_to_parent_map, rng)); + } + for (set::const_iterator j = parents.begin(); + j != parents.end(); ++j) + { + heads.erase(*j); + child_to_parent_map.insert(std::make_pair(new_rid, *j)); + } + safe_insert(heads, new_rid); + } +} + +static void +run_a_get_uncommon_ancestors_random_test(size_t num_nodes, + size_t iterations, + randomizer & rng) +{ + rev_ancestry_map child_to_parent_map; + vector nodes; + make_random_graph(num_nodes, child_to_parent_map, nodes, rng); + for (size_t i = 0; i != iterations; ++i) + { + L(FL("get_uncommon_ancestors: random test %s-%s") % num_nodes % i); + revision_id left = idx(nodes, rng.uniform(nodes.size())); + revision_id right = idx(nodes, rng.uniform(nodes.size())); + run_a_get_uncommon_ancestors_test(child_to_parent_map, left, right); + } +} + +UNIT_TEST(graph, get_uncommon_ancestors_randomly) +{ + randomizer rng; + run_a_get_uncommon_ancestors_random_test(100, 100, rng); + run_a_get_uncommon_ancestors_random_test(1000, 100, rng); + run_a_get_uncommon_ancestors_random_test(10000, 1000, rng); +} + + +#endif // BUILD_UNIT_TESTS + // Local Variables: // mode: C++ // fill-column: 76 ============================================================ --- graph.hh 82374ad3b2f3c2da39e2c9f15018c309459114f1 +++ graph.hh 9791121bb0fb8d3063c97b32329fa513b51cd216 @@ -1,4 +1,5 @@ #ifndef __GRAPH__HH__ +#define __GRAPH__HH__ // Copyright (C) 2006 Nathaniel Smith // @@ -19,7 +20,11 @@ #include #include #include +#include +#include "vocab.hh" +#include "rev_height.hh" + struct reconstruction_graph { virtual bool is_base(std::string const & node) const = 0; @@ -34,6 +39,28 @@ get_reconstruction_path(std::string cons reconstruction_graph const & graph, reconstruction_path & path); +typedef std::multimap rev_ancestry_map; + +void toposort_rev_ancestry(rev_ancestry_map const & graph, + std::vector & revisions); + +struct rev_graph +{ + virtual void get_parents(revision_id const & rev, std::set & parents) const = 0; + virtual void get_children(revision_id const & rev, std::set & children) const = 0; + virtual void get_height(revision_id const & rev, rev_height & h) const = 0; + virtual ~rev_graph() {}; +}; + +void +get_uncommon_ancestors(revision_id const & a, + revision_id const & b, + rev_graph const & hg, + std::set & a_uncommon_ancs, + std::set & b_uncommon_ancs); + + + #endif // __GRAPH__HH__ ============================================================ --- revision.cc 94ffffda8ea5623d05fef8d062b2a4401599578d +++ revision.cc 3892413bb144d335f6ef06001d7f9a573792392a @@ -1733,41 +1733,10 @@ allrevs_toposorted(vector & allrevs_toposorted(vector & revisions, app_state & app) { - - typedef multimap::const_iterator gi; - typedef map::iterator pi; - - revisions.clear(); - // get the complete ancestry - multimap graph; + rev_ancestry_map graph; app.db.get_revision_ancestry(graph); - - // determine the number of parents for each rev - map pcount; - for (gi i = graph.begin(); i != graph.end(); ++i) - pcount.insert(make_pair(i->first, 0)); - for (gi i = graph.begin(); i != graph.end(); ++i) - ++(pcount[i->second]); - - // find the set of graph roots - list roots; - for (pi i = pcount.begin(); i != pcount.end(); ++i) - if(i->second==0) - roots.push_back(i->first); - - while (!roots.empty()) - { - revision_id cur = roots.front(); - roots.pop_front(); - if (!null_id(cur)) - revisions.push_back(cur); - - for(gi i = graph.lower_bound(cur); - i != graph.upper_bound(cur); i++) - if(--(pcount[i->second]) == 0) - roots.push_back(i->second); - } + toposort_rev_ancestry(graph, revisions); } void