#
#
# add_file "ancestry.cc"
#  content [a8362c21538ff7576166507a9b87fd4ebde69a00]
# 
# patch "Makefile.am"
#  from [61aa922983ae89a8bb50cf5ad203cbadf1e943fb]
#    to [607163f9d88dec6858dfada0e5b0169fa5ec7bb4]
# 
# patch "database.hh"
#  from [31ba75343ca21813eb814b814d6576440f4f0cfc]
#    to [a4a07097a63f5877e7d4b5ca36f39ad263ff685c]
# 
# patch "rev_types.hh"
#  from [99327382c7c617bc5965569a55d7789d95b0fa99]
#    to [af3c5cf58a78153d7b6a515a25dd4d6d584b851f]
# 
# patch "revision.cc"
#  from [c265c4dfdc6ad23d44811a145d47cc43c263fa3b]
#    to [5b5f6c11cf218a22285d6939f14a1d5d64d3e482]
# 
# patch "roster.cc"
#  from [98ea42f99e59a884a7f00dfab6a76334c5b89772]
#    to [2ffc18c20c2e53c540b7312a125681d60a6778f9]
# 
# patch "roster.hh"
#  from [61c885cf725e7b20bbbe5a8e595904a4edc38c14]
#    to [ef1b40df5043af074ee007ea5efdb726e1b4112f]
#
============================================================
--- ancestry.cc	a8362c21538ff7576166507a9b87fd4ebde69a00
+++ ancestry.cc	a8362c21538ff7576166507a9b87fd4ebde69a00
@@ -0,0 +1,575 @@
+// Copyright (C) 2004 Graydon Hoare <address@hidden>
+//
+// This program is made available under the GNU GPL version 2.0 or
+// greater. See the accompanying file COPYING for details.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the
+// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+// PURPOSE.
+
+#include "base.hh"
+#include "sanity.hh"
+#include "revision.hh"
+#include "rev_height.hh"
+#include "roster.hh"
+
+#include "database.hh"
+#include "interner.hh"
+
+#include "safe_map.hh"
+#include <stack>
+#include <boost/shared_ptr.hpp>
+#include <boost/dynamic_bitset.hpp>
+
+using std::make_pair;
+using std::map;
+using std::max;
+using std::multimap;
+using std::pair;
+using std::set;
+using std::stack;
+using std::vector;
+
+using boost::shared_ptr;
+using boost::dynamic_bitset;
+
+// For a surprisingly long time, we have been using an algorithm which
+// is nonsense, based on a misunderstanding of what "LCA" means. The
+// LCA of two nodes is *not* the first common ancestor which you find
+// when iteratively expanding their ancestor sets. Instead, the LCA is
+// the common ancestor which is a descendent of all other common
+// ancestors.
+//
+// In general, a set of nodes in a DAG doesn't always have an
+// LCA. There might be multiple common ancestors which are not parents
+// of one another. So we implement something which is "functionally
+// useful" for finding a merge point (and moreover, which always
+// terminates): we find an LCA of the input set if it exists,
+// otherwise we replace the input set with the nodes we did find and
+// repeat.
+//
+// All previous discussions in monotone-land, before say August 2005,
+// of LCA (and LCAD) are essentially wrong due to our silly
+// misunderstanding. It's unfortunate, but our half-baked
+// approximations worked almost well enough to take us through 3 years
+// of deployed use. Hopefully this more accurate new use will serve us
+// even longer.
+
+typedef unsigned long ctx;
+typedef dynamic_bitset<> bitmap;
+typedef shared_ptr<bitmap> shared_bitmap;
+
+static void
+calculate_ancestors_from_graph(interner<ctx> & intern,
+                               revision_id const & init,
+                               multimap<revision_id, revision_id> const & graph,
+                               map< ctx, shared_bitmap > & ancestors,
+                               shared_bitmap & total_union);
+
+void
+find_common_ancestor_for_merge(database & db,
+                               revision_id const & left,
+                               revision_id const & right,
+                               revision_id & anc)
+{
+  interner<ctx> intern;
+  set<ctx> leaves;
+  map<ctx, shared_bitmap> ancestors;
+
+  shared_bitmap isect = shared_bitmap(new bitmap());
+  shared_bitmap isect_ancs = shared_bitmap(new bitmap());
+
+  leaves.insert(intern.intern(left.inner()()));
+  leaves.insert(intern.intern(right.inner()()));
+
+
+  multimap<revision_id, revision_id> inverse_graph;
+  {
+    multimap<revision_id, revision_id> graph;
+    db.get_revision_ancestry(graph);
+    typedef multimap<revision_id, revision_id>::const_iterator gi;
+    for (gi i = graph.begin(); i != graph.end(); ++i)
+      inverse_graph.insert(make_pair(i->second, i->first));
+  }
+
+
+  while (leaves.size() != 1)
+    {
+      isect->clear();
+      isect_ancs->clear();
+
+      // First intersect all ancestors of current leaf set
+      for (set<ctx>::const_iterator i = leaves.begin(); i != leaves.end(); ++i)
+        {
+          ctx curr_leaf = *i;
+          shared_bitmap curr_leaf_ancestors;
+          map<ctx, shared_bitmap >::const_iterator j = ancestors.find(*i);
+          if (j != ancestors.end())
+            curr_leaf_ancestors = j->second;
+          else
+            {
+              curr_leaf_ancestors = shared_bitmap(new bitmap());
+              calculate_ancestors_from_graph(intern, revision_id(intern.lookup(curr_leaf),
+                                                                 origin::internal),
+                                             inverse_graph, ancestors,
+                                             curr_leaf_ancestors);
+            }
+          if (isect->size() > curr_leaf_ancestors->size())
+            curr_leaf_ancestors->resize(isect->size());
+
+          if (curr_leaf_ancestors->size() > isect->size())
+            isect->resize(curr_leaf_ancestors->size());
+
+          if (i == leaves.begin())
+            *isect = *curr_leaf_ancestors;
+          else
+            (*isect) &= (*curr_leaf_ancestors);
+        }
+
+      // isect is now the set of common ancestors of leaves, but that is not enough.
+      // We need the set of leaves of isect; to do that we calculate the set of
+      // ancestors of isect, in order to subtract it from isect (below).
+      set<ctx> new_leaves;
+      for (ctx i = 0; i < isect->size(); ++i)
+        {
+          if (isect->test(i))
+            {
+              calculate_ancestors_from_graph(intern, revision_id(intern.lookup(i),
+                                                                 origin::internal),
+                                             inverse_graph, ancestors, isect_ancs);
+            }
+        }
+
+      // Finally, the subtraction step: for any element i of isect, if
+      // it's *not* in isect_ancs, it survives as a new leaf.
+      leaves.clear();
+      for (ctx i = 0; i < isect->size(); ++i)
+        {
+          if (!isect->test(i))
+            continue;
+          if (i < isect_ancs->size() && isect_ancs->test(i))
+            continue;
+          safe_insert(leaves, i);
+        }
+    }
+
+  I(leaves.size() == 1);
+  anc = revision_id(intern.lookup(*leaves.begin()), origin::internal);
+}
+
+static void
+add_bitset_to_union(shared_bitmap src,
+                    shared_bitmap dst)
+{
+  if (dst->size() > src->size())
+    src->resize(dst->size());
+  if (src->size() > dst->size())
+    dst->resize(src->size());
+  *dst |= *src;
+}
+
+
+static void
+calculate_ancestors_from_graph(interner<ctx> & intern,
+                               revision_id const & init,
+                               multimap<revision_id, revision_id> const & graph,
+                               map< ctx, shared_bitmap > & ancestors,
+                               shared_bitmap & total_union)
+{
+  typedef multimap<revision_id, revision_id>::const_iterator gi;
+  stack<ctx> stk;
+
+  stk.push(intern.intern(init.inner()()));
+
+  while (! stk.empty())
+    {
+      ctx us = stk.top();
+      revision_id rev(intern.lookup(us), origin::internal);
+
+      pair<gi,gi> parents = graph.equal_range(rev);
+      bool pushed = false;
+
+      // first make sure all parents are done
+      for (gi i = parents.first; i != parents.second; ++i)
+        {
+          ctx parent = intern.intern(i->second.inner()());
+          if (ancestors.find(parent) == ancestors.end())
+            {
+              stk.push(parent);
+              pushed = true;
+              break;
+            }
+        }
+
+      // if we pushed anything we stop now. we'll come back later when all
+      // the parents are done.
+      if (pushed)
+        continue;
+
+      shared_bitmap b = shared_bitmap(new bitmap());
+
+      for (gi i = parents.first; i != parents.second; ++i)
+        {
+          ctx parent = intern.intern(i->second.inner()());
+
+          // set all parents
+          if (b->size() <= parent)
+            b->resize(parent + 1);
+          b->set(parent);
+
+          // ensure all parents are loaded into the ancestor map
+          I(ancestors.find(parent) != ancestors.end());
+
+          // union them into our map
+          map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent);
+          I(j != ancestors.end());
+          add_bitset_to_union(j->second, b);
+        }
+
+      add_bitset_to_union(b, total_union);
+      ancestors.insert(make_pair(us, b));
+      stk.pop();
+    }
+}
+
+void
+toposort(database & db,
+         set<revision_id> const & revisions,
+         vector<revision_id> & sorted)
+{
+  map<rev_height, revision_id> work;
+
+  for (set<revision_id>::const_iterator i = revisions.begin();
+       i != revisions.end(); ++i) 
+    {
+      rev_height height;
+      db.get_rev_height(*i, height);
+      work.insert(make_pair(height, *i));
+    }
+
+  sorted.clear();
+  
+  for (map<rev_height, revision_id>::const_iterator i = work.begin();
+       i != work.end(); ++i)
+    {
+      sorted.push_back(i->second);
+    }
+}
+
+static void
+accumulate_strict_ancestors(database & db,
+                            revision_id const & start,
+                            set<revision_id> & all_ancestors,
+                            multimap<revision_id, revision_id> const & inverse_graph,
+                            rev_height const & min_height)
+{
+  typedef multimap<revision_id, revision_id>::const_iterator gi;
+
+  vector<revision_id> frontier;
+  frontier.push_back(start);
+
+  while (!frontier.empty())
+    {
+      revision_id rid = frontier.back();
+      frontier.pop_back();
+      pair<gi, gi> parents = inverse_graph.equal_range(rid);
+      for (gi i = parents.first; i != parents.second; ++i)
+        {
+          revision_id const & parent = i->second;
+          if (all_ancestors.find(parent) == all_ancestors.end())
+            {
+              // prune if we're below min_height
+              rev_height h;
+              db.get_rev_height(parent, h);
+              if (h >= min_height)
+                {
+                  all_ancestors.insert(parent);
+                  frontier.push_back(parent);
+                }
+            }
+        }
+    }
+}
+
+// this call is equivalent to running:
+//   erase(remove_if(candidates.begin(), candidates.end(), p));
+//   erase_ancestors(candidates, db);
+// however, by interleaving the two operations, it can in common cases make
+// many fewer calls to the predicate, which can be a significant speed win.
+
+void
+erase_ancestors_and_failures(database & db,
+                             std::set<revision_id> & candidates,
+                             is_failure & p,
+                             multimap<revision_id, revision_id> *inverse_graph_cache_ptr)
+{
+  // Load up the ancestry graph
+  multimap<revision_id, revision_id> inverse_graph;
+  
+  if (candidates.empty())
+    return;
+  
+  if (inverse_graph_cache_ptr == NULL)
+    inverse_graph_cache_ptr = &inverse_graph;
+  if (inverse_graph_cache_ptr->empty())
+  {
+    multimap<revision_id, revision_id> graph;
+    db.get_revision_ancestry(graph);
+    for (multimap<revision_id, revision_id>::const_iterator i = graph.begin();
+         i != graph.end(); ++i)
+      inverse_graph_cache_ptr->insert(make_pair(i->second, i->first));
+  }
+
+  // Keep a set of all ancestors that we've traversed -- to avoid
+  // combinatorial explosion.
+  set<revision_id> all_ancestors;
+
+  rev_height min_height;
+  db.get_rev_height(*candidates.begin(), min_height);
+  for (std::set<revision_id>::const_iterator it = candidates.begin(); it != candidates.end(); it++)
+    {
+      rev_height h;
+      db.get_rev_height(*it, h);
+      if (h < min_height)
+        min_height = h;
+    }
+
+  vector<revision_id> todo(candidates.begin(), candidates.end());
+  std::random_shuffle(todo.begin(), todo.end());
+
+  size_t predicates = 0;
+  while (!todo.empty())
+    {
+      revision_id rid = todo.back();
+      todo.pop_back();
+      // check if this one has already been eliminated
+      if (all_ancestors.find(rid) != all_ancestors.end())
+        continue;
+      // and then whether it actually should stay in the running:
+      ++predicates;
+      if (p(rid))
+        {
+          candidates.erase(rid);
+          continue;
+        }
+      // okay, it is good enough that all its ancestors should be
+      // eliminated
+      accumulate_strict_ancestors(db, rid, all_ancestors, *inverse_graph_cache_ptr, min_height);
+    }
+
+  // now go and eliminate the ancestors
+  for (set<revision_id>::const_iterator i = all_ancestors.begin();
+       i != all_ancestors.end(); ++i)
+    candidates.erase(*i);
+
+  L(FL("called predicate %s times") % predicates);
+}
+
+// This function looks at a set of revisions, and for every pair A, B in that
+// set such that A is an ancestor of B, it erases A.
+
+namespace
+{
+  struct no_failures : public is_failure
+  {
+    virtual bool operator()(revision_id const & rid)
+    {
+      return false;
+    }
+  };
+}
+void
+erase_ancestors(database & db, set<revision_id> & revisions)
+{
+  no_failures p;
+  erase_ancestors_and_failures(db, revisions, p);
+}
+
+// This function takes a revision A and a set of revision Bs, calculates the
+// ancestry of each, and returns the set of revisions that are in A's ancestry
+// but not in the ancestry of any of the Bs.  It tells you 'what's new' in A
+// that's not in the Bs.  If the output set if non-empty, then A will
+// certainly be in it; but the output set might be empty.
+void
+ancestry_difference(database & db, revision_id const & a,
+                    set<revision_id> const & bs,
+                    set<revision_id> & new_stuff)
+{
+  new_stuff.clear();
+  typedef multimap<revision_id, revision_id>::const_iterator gi;
+  multimap<revision_id, revision_id> graph;
+  multimap<revision_id, revision_id> inverse_graph;
+
+  db.get_revision_ancestry(graph);
+  for (gi i = graph.begin(); i != graph.end(); ++i)
+    inverse_graph.insert(make_pair(i->second, i->first));
+
+  interner<ctx> intern;
+  map< ctx, shared_bitmap > ancestors;
+
+  shared_bitmap u = shared_bitmap(new bitmap());
+
+  for (set<revision_id>::const_iterator i = bs.begin();
+       i != bs.end(); ++i)
+    {
+      calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
+      ctx c = intern.intern(i->inner()());
+      if (u->size() <= c)
+        u->resize(c + 1);
+      u->set(c);
+    }
+
+  shared_bitmap au = shared_bitmap(new bitmap());
+  calculate_ancestors_from_graph(intern, a, inverse_graph, ancestors, au);
+  {
+    ctx c = intern.intern(a.inner()());
+    if (au->size() <= c)
+      au->resize(c + 1);
+    au->set(c);
+  }
+
+  au->resize(max(au->size(), u->size()));
+  u->resize(max(au->size(), u->size()));
+
+  *au -= *u;
+
+  for (unsigned int i = 0; i != au->size(); ++i)
+  {
+    if (au->test(i))
+      {
+        revision_id rid(intern.lookup(i), origin::internal);
+        if (!null_id(rid))
+          new_stuff.insert(rid);
+      }
+  }
+}
+
+void
+select_nodes_modified_by_rev(database & db,
+                             revision_t const & rev,
+                             roster_t const new_roster,
+                             set<node_id> & nodes_modified)
+{
+  nodes_modified.clear();
+
+  for (edge_map::const_iterator i = rev.edges.begin();
+       i != rev.edges.end(); ++i)
+    {
+      set<node_id> edge_nodes_modified;
+      roster_t old_roster;
+      db.get_roster(edge_old_revision(i), old_roster);
+      select_nodes_modified_by_cset(edge_changes(i),
+                                    old_roster,
+                                    new_roster,
+                                    edge_nodes_modified);
+
+      copy(edge_nodes_modified.begin(), edge_nodes_modified.end(),
+                inserter(nodes_modified, nodes_modified.begin()));
+    }
+}
+
+// These functions create new ancestry!
+
+namespace {
+  struct true_node_id_source
+    : public node_id_source
+  {
+    true_node_id_source(database & db) : db(db) {}
+    virtual node_id next()
+    {
+      node_id n = db.next_node_id();
+      I(!temp_node(n));
+      return n;
+    }
+    database & db;
+  };
+}
+
+// WARNING: these functions have no unit tests.  All the real work
+// should be done in the alternative overloads in roster.cc, where it
+// can be unit tested.  (See comments in that file for further explanation.)
+static void
+make_roster_for_merge(revision_t const & rev, revision_id const & new_rid,
+		      roster_t & new_roster, marking_map & new_markings,
+		      database & db, node_id_source & nis)
+{
+  edge_map::const_iterator i = rev.edges.begin();
+  revision_id const & left_rid = edge_old_revision(i);
+  cset const & left_cs = edge_changes(i);
+  ++i;
+  revision_id const & right_rid = edge_old_revision(i);
+  cset const & right_cs = edge_changes(i);
+
+  I(!null_id(left_rid) && !null_id(right_rid));
+  cached_roster left_cached, right_cached;
+  db.get_roster(left_rid, left_cached);
+  db.get_roster(right_rid, right_cached);
+
+  set<revision_id> left_uncommon_ancestors, right_uncommon_ancestors;
+  db.get_uncommon_ancestors(left_rid, right_rid,
+			    left_uncommon_ancestors,
+			    right_uncommon_ancestors);
+
+  make_roster_for_merge(left_rid, *left_cached.first, *left_cached.second,
+			left_cs, left_uncommon_ancestors,
+			right_rid, *right_cached.first, *right_cached.second,
+			right_cs, right_uncommon_ancestors,
+			new_rid,
+			new_roster, new_markings,
+			nis);
+}
+
+static void
+make_roster_for_nonmerge(revision_t const & rev,
+                         revision_id const & new_rid,
+                         roster_t & new_roster, marking_map & new_markings,
+                         database & db, node_id_source & nis)
+{
+  revision_id const & parent_rid = edge_old_revision(rev.edges.begin());
+  cset const & parent_cs = edge_changes(rev.edges.begin());
+  db.get_roster(parent_rid, new_roster, new_markings);
+  make_roster_for_nonmerge(parent_cs, new_rid, new_roster, new_markings, nis);
+}
+
+void
+make_roster_for_revision(database & db, node_id_source & nis,
+                         revision_t const & rev, revision_id const & new_rid,
+                         roster_t & new_roster, marking_map & new_markings)
+{
+  MM(rev);
+  MM(new_rid);
+  MM(new_roster);
+  MM(new_markings);
+  if (rev.edges.size() == 1)
+    make_roster_for_nonmerge(rev, new_rid, new_roster, new_markings, db, nis);
+  else if (rev.edges.size() == 2)
+    make_roster_for_merge(rev, new_rid, new_roster, new_markings, db, nis);
+  else
+    I(false);
+
+  // If nis is not a true_node_id_source, we have to assume we can get temp
+  // node ids out of it.  ??? Provide a predicate method on node_id_sources
+  // instead of doing a typeinfo comparison.
+  new_roster.check_sane_against(new_markings,
+                                typeid(nis) != typeid(true_node_id_source));
+}
+
+void
+make_roster_for_revision(database & db,
+                         revision_t const & rev, revision_id const & new_rid,
+                         roster_t & new_roster, marking_map & new_markings)
+{
+  true_node_id_source nis(db);
+  make_roster_for_revision(db, nis, rev, new_rid, new_roster, new_markings);
+}
+
+
+
+
+// Local Variables:
+// mode: C++
+// fill-column: 76
+// c-file-style: "gnu"
+// indent-tabs-mode: nil
+// End:
+// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
============================================================
--- Makefile.am	61aa922983ae89a8bb50cf5ad203cbadf1e943fb
+++ Makefile.am	607163f9d88dec6858dfada0e5b0169fa5ec7bb4
@@ -53,7 +53,7 @@ MOST_SOURCES = 								\
 	merkle_tree.cc merkle_tree.hh 					\
 	lcs.cc lcs.hh							\
 	rcs_import.cc rcs_import.hh 					\
-	revision.cc revision.hh						\
+	revision.cc ancestry.cc revision.hh				\
 	cset.cc cset.hh							\
 	roster.cc roster.hh 						\
 	mt_version.cc mt_version.hh					\
@@ -313,11 +313,11 @@ UNIT_TEST_OBJ_SUPPORT =							\
 # testing, but can be used "as is" from the main build.  (many of
 # these _should_ have unit tests, but they haven't been written yet.)
 UNIT_TEST_OBJ_SUPPORT =							\
-	mtn-cert.$(OBJEXT) mtn-constants.$(OBJEXT)			\
-	mtn-database.$(OBJEXT) mtn-epoch.$(OBJEXT)			\
-	mtn-file_io.$(OBJEXT) mtn-gzip.$(OBJEXT) mtn-hmac.$(OBJEXT)	\
-	mtn-inodeprint.$(OBJEXT) mtn-key_store.$(OBJEXT)		\
-	mtn-keys.$(OBJEXT) mtn-lcs.$(OBJEXT) mtn-legacy.$(OBJEXT)	\
+	mtn-ancestry.$(OBJEXT) mtn-cert.$(OBJEXT)			\
+	mtn-constants.$(OBJEXT) mtn-database.$(OBJEXT)			\
+	mtn-epoch.$(OBJEXT) mtn-file_io.$(OBJEXT) mtn-gzip.$(OBJEXT)	\
+	mtn-hmac.$(OBJEXT) mtn-inodeprint.$(OBJEXT)			\
+	mtn-key_store.$(OBJEXT) mtn-keys.$(OBJEXT) mtn-lcs.$(OBJEXT)	\
 	mtn-lua.$(OBJEXT) mtn-lua_hooks.$(OBJEXT)			\
 	mtn-merkle_tree.$(OBJEXT) mtn-pcrewrap.$(OBJEXT)		\
 	mtn-project.$(OBJEXT) mtn-sanity.$(OBJEXT)			\
============================================================
--- database.hh	31ba75343ca21813eb814b814d6576440f4f0cfc
+++ database.hh	a4a07097a63f5877e7d4b5ca36f39ad263ff685c
@@ -443,51 +443,6 @@ void check_db(database & db);
 // not a member function, defined in database_check.cc
 void check_db(database & db);
 
-// Parent maps are used in a number of places to keep track of all the
-// parent rosters of a given revision.
-
-inline revision_id const & parent_id(parent_entry const & p)
-{
-  return p.first;
-}
-
-inline revision_id const & parent_id(parent_map::const_iterator i)
-{
-  return i->first;
-}
-
-inline cached_roster const &
-parent_cached_roster(parent_entry const & p)
-{
-  return p.second;
-}
-
-inline cached_roster const &
-parent_cached_roster(parent_map::const_iterator i)
-{
-  return i->second;
-}
-
-inline roster_t const & parent_roster(parent_entry const & p)
-{
-  return *(p.second.first);
-}
-
-inline roster_t const & parent_roster(parent_map::const_iterator i)
-{
-  return *(i->second.first);
-}
-
-inline marking_map const & parent_marking(parent_entry const & p)
-{
-  return *(p.second.second);
-}
-
-inline marking_map const & parent_marking(parent_map::const_iterator i)
-{
-  return *(i->second.second);
-}
-
 // Transaction guards nest. Acquire one in any scope you'd like
 // transaction-protected, and it'll make sure the db aborts a transaction
 // if there's any exception before you call commit().
============================================================
--- rev_types.hh	99327382c7c617bc5965569a55d7789d95b0fa99
+++ rev_types.hh	af3c5cf58a78153d7b6a515a25dd4d6d584b851f
@@ -35,6 +35,12 @@ struct editable_tree;
 struct cset;
 struct editable_tree;
 
+const node_id first_temp_node = 1U << (sizeof(node_id) * 8 - 1);
+inline bool temp_node(node_id n)
+{
+  return n & first_temp_node;
+}
+
 // full definitions in graph.hh
 struct rev_graph;
 struct reconstruction_graph;
============================================================
--- revision.cc	c265c4dfdc6ad23d44811a145d47cc43c263fa3b
+++ revision.cc	5b5f6c11cf218a22285d6939f14a1d5d64d3e482
@@ -8,35 +8,19 @@
 // PURPOSE.
 
 #include "base.hh"
-#include "sanity.hh"
 #include "revision.hh"
-#include "cset.hh"
-#include "rev_height.hh"
 #include "roster.hh"
-#include "database.hh"
 
-#include "interner.hh"
+#include "sanity.hh"
 #include "basic_io.hh"
 #include "transforms.hh"
 
 #include "safe_map.hh"
-#include "vector.hh"
-#include <set>
-#include <stack>
-#include <boost/dynamic_bitset.hpp>
 #include <boost/shared_ptr.hpp>
 
 using std::make_pair;
 using std::map;
-using std::max;
-using std::multimap;
-using std::pair;
-using std::set;
-using std::stack;
 using std::string;
-using std::vector;
-
-using boost::dynamic_bitset;
 using boost::shared_ptr;
 
 void revision_t::check_sane() const
@@ -106,444 +90,7 @@ revision_t::operator=(revision_t const &
   return *this;
 }
 
-
-// For a surprisingly long time, we have been using an algorithm which
-// is nonsense, based on a misunderstanding of what "LCA" means. The
-// LCA of two nodes is *not* the first common ancestor which you find
-// when iteratively expanding their ancestor sets. Instead, the LCA is
-// the common ancestor which is a descendent of all other common
-// ancestors.
-//
-// In general, a set of nodes in a DAG doesn't always have an
-// LCA. There might be multiple common ancestors which are not parents
-// of one another. So we implement something which is "functionally
-// useful" for finding a merge point (and moreover, which always
-// terminates): we find an LCA of the input set if it exists,
-// otherwise we replace the input set with the nodes we did find and
-// repeat.
-//
-// All previous discussions in monotone-land, before say August 2005,
-// of LCA (and LCAD) are essentially wrong due to our silly
-// misunderstanding. It's unfortunate, but our half-baked
-// approximations worked almost well enough to take us through 3 years
-// of deployed use. Hopefully this more accurate new use will serve us
-// even longer.
-
-typedef unsigned long ctx;
-typedef dynamic_bitset<> bitmap;
-typedef shared_ptr<bitmap> shared_bitmap;
-
-static void
-calculate_ancestors_from_graph(interner<ctx> & intern,
-                               revision_id const & init,
-                               multimap<revision_id, revision_id> const & graph,
-                               map< ctx, shared_bitmap > & ancestors,
-                               shared_bitmap & total_union);
-
 void
-find_common_ancestor_for_merge(database & db,
-                               revision_id const & left,
-                               revision_id const & right,
-                               revision_id & anc)
-{
-  interner<ctx> intern;
-  set<ctx> leaves;
-  map<ctx, shared_bitmap> ancestors;
-
-  shared_bitmap isect = shared_bitmap(new bitmap());
-  shared_bitmap isect_ancs = shared_bitmap(new bitmap());
-
-  leaves.insert(intern.intern(left.inner()()));
-  leaves.insert(intern.intern(right.inner()()));
-
-
-  multimap<revision_id, revision_id> inverse_graph;
-  {
-    multimap<revision_id, revision_id> graph;
-    db.get_revision_ancestry(graph);
-    typedef multimap<revision_id, revision_id>::const_iterator gi;
-    for (gi i = graph.begin(); i != graph.end(); ++i)
-      inverse_graph.insert(make_pair(i->second, i->first));
-  }
-
-
-  while (leaves.size() != 1)
-    {
-      isect->clear();
-      isect_ancs->clear();
-
-      // First intersect all ancestors of current leaf set
-      for (set<ctx>::const_iterator i = leaves.begin(); i != leaves.end(); ++i)
-        {
-          ctx curr_leaf = *i;
-          shared_bitmap curr_leaf_ancestors;
-          map<ctx, shared_bitmap >::const_iterator j = ancestors.find(*i);
-          if (j != ancestors.end())
-            curr_leaf_ancestors = j->second;
-          else
-            {
-              curr_leaf_ancestors = shared_bitmap(new bitmap());
-              calculate_ancestors_from_graph(intern, revision_id(intern.lookup(curr_leaf),
-                                                                 origin::internal),
-                                             inverse_graph, ancestors,
-                                             curr_leaf_ancestors);
-            }
-          if (isect->size() > curr_leaf_ancestors->size())
-            curr_leaf_ancestors->resize(isect->size());
-
-          if (curr_leaf_ancestors->size() > isect->size())
-            isect->resize(curr_leaf_ancestors->size());
-
-          if (i == leaves.begin())
-            *isect = *curr_leaf_ancestors;
-          else
-            (*isect) &= (*curr_leaf_ancestors);
-        }
-
-      // isect is now the set of common ancestors of leaves, but that is not enough.
-      // We need the set of leaves of isect; to do that we calculate the set of
-      // ancestors of isect, in order to subtract it from isect (below).
-      set<ctx> new_leaves;
-      for (ctx i = 0; i < isect->size(); ++i)
-        {
-          if (isect->test(i))
-            {
-              calculate_ancestors_from_graph(intern, revision_id(intern.lookup(i),
-                                                                 origin::internal),
-                                             inverse_graph, ancestors, isect_ancs);
-            }
-        }
-
-      // Finally, the subtraction step: for any element i of isect, if
-      // it's *not* in isect_ancs, it survives as a new leaf.
-      leaves.clear();
-      for (ctx i = 0; i < isect->size(); ++i)
-        {
-          if (!isect->test(i))
-            continue;
-          if (i < isect_ancs->size() && isect_ancs->test(i))
-            continue;
-          safe_insert(leaves, i);
-        }
-    }
-
-  I(leaves.size() == 1);
-  anc = revision_id(intern.lookup(*leaves.begin()), origin::internal);
-}
-
-static void
-add_bitset_to_union(shared_bitmap src,
-                    shared_bitmap dst)
-{
-  if (dst->size() > src->size())
-    src->resize(dst->size());
-  if (src->size() > dst->size())
-    dst->resize(src->size());
-  *dst |= *src;
-}
-
-
-static void
-calculate_ancestors_from_graph(interner<ctx> & intern,
-                               revision_id const & init,
-                               multimap<revision_id, revision_id> const & graph,
-                               map< ctx, shared_bitmap > & ancestors,
-                               shared_bitmap & total_union)
-{
-  typedef multimap<revision_id, revision_id>::const_iterator gi;
-  stack<ctx> stk;
-
-  stk.push(intern.intern(init.inner()()));
-
-  while (! stk.empty())
-    {
-      ctx us = stk.top();
-      revision_id rev(intern.lookup(us), origin::internal);
-
-      pair<gi,gi> parents = graph.equal_range(rev);
-      bool pushed = false;
-
-      // first make sure all parents are done
-      for (gi i = parents.first; i != parents.second; ++i)
-        {
-          ctx parent = intern.intern(i->second.inner()());
-          if (ancestors.find(parent) == ancestors.end())
-            {
-              stk.push(parent);
-              pushed = true;
-              break;
-            }
-        }
-
-      // if we pushed anything we stop now. we'll come back later when all
-      // the parents are done.
-      if (pushed)
-        continue;
-
-      shared_bitmap b = shared_bitmap(new bitmap());
-
-      for (gi i = parents.first; i != parents.second; ++i)
-        {
-          ctx parent = intern.intern(i->second.inner()());
-
-          // set all parents
-          if (b->size() <= parent)
-            b->resize(parent + 1);
-          b->set(parent);
-
-          // ensure all parents are loaded into the ancestor map
-          I(ancestors.find(parent) != ancestors.end());
-
-          // union them into our map
-          map< ctx, shared_bitmap >::const_iterator j = ancestors.find(parent);
-          I(j != ancestors.end());
-          add_bitset_to_union(j->second, b);
-        }
-
-      add_bitset_to_union(b, total_union);
-      ancestors.insert(make_pair(us, b));
-      stk.pop();
-    }
-}
-
-void
-toposort(database & db,
-         set<revision_id> const & revisions,
-         vector<revision_id> & sorted)
-{
-  map<rev_height, revision_id> work;
-
-  for (set<revision_id>::const_iterator i = revisions.begin();
-       i != revisions.end(); ++i) 
-    {
-      rev_height height;
-      db.get_rev_height(*i, height);
-      work.insert(make_pair(height, *i));
-    }
-
-  sorted.clear();
-  
-  for (map<rev_height, revision_id>::const_iterator i = work.begin();
-       i != work.end(); ++i)
-    {
-      sorted.push_back(i->second);
-    }
-}
-
-static void
-accumulate_strict_ancestors(database & db,
-                            revision_id const & start,
-                            set<revision_id> & all_ancestors,
-                            multimap<revision_id, revision_id> const & inverse_graph,
-                            rev_height const & min_height)
-{
-  typedef multimap<revision_id, revision_id>::const_iterator gi;
-
-  vector<revision_id> frontier;
-  frontier.push_back(start);
-
-  while (!frontier.empty())
-    {
-      revision_id rid = frontier.back();
-      frontier.pop_back();
-      pair<gi, gi> parents = inverse_graph.equal_range(rid);
-      for (gi i = parents.first; i != parents.second; ++i)
-        {
-          revision_id const & parent = i->second;
-          if (all_ancestors.find(parent) == all_ancestors.end())
-            {
-              // prune if we're below min_height
-              rev_height h;
-              db.get_rev_height(parent, h);
-              if (h >= min_height)
-                {
-                  all_ancestors.insert(parent);
-                  frontier.push_back(parent);
-                }
-            }
-        }
-    }
-}
-
-// this call is equivalent to running:
-//   erase(remove_if(candidates.begin(), candidates.end(), p));
-//   erase_ancestors(candidates, db);
-// however, by interleaving the two operations, it can in common cases make
-// many fewer calls to the predicate, which can be a significant speed win.
-
-void
-erase_ancestors_and_failures(database & db,
-                             std::set<revision_id> & candidates,
-                             is_failure & p,
-                             multimap<revision_id, revision_id> *inverse_graph_cache_ptr)
-{
-  // Load up the ancestry graph
-  multimap<revision_id, revision_id> inverse_graph;
-  
-  if (candidates.empty())
-    return;
-  
-  if (inverse_graph_cache_ptr == NULL)
-    inverse_graph_cache_ptr = &inverse_graph;
-  if (inverse_graph_cache_ptr->empty())
-  {
-    multimap<revision_id, revision_id> graph;
-    db.get_revision_ancestry(graph);
-    for (multimap<revision_id, revision_id>::const_iterator i = graph.begin();
-         i != graph.end(); ++i)
-      inverse_graph_cache_ptr->insert(make_pair(i->second, i->first));
-  }
-
-  // Keep a set of all ancestors that we've traversed -- to avoid
-  // combinatorial explosion.
-  set<revision_id> all_ancestors;
-
-  rev_height min_height;
-  db.get_rev_height(*candidates.begin(), min_height);
-  for (std::set<revision_id>::const_iterator it = candidates.begin(); it != candidates.end(); it++)
-    {
-      rev_height h;
-      db.get_rev_height(*it, h);
-      if (h < min_height)
-        min_height = h;
-    }
-
-  vector<revision_id> todo(candidates.begin(), candidates.end());
-  std::random_shuffle(todo.begin(), todo.end());
-
-  size_t predicates = 0;
-  while (!todo.empty())
-    {
-      revision_id rid = todo.back();
-      todo.pop_back();
-      // check if this one has already been eliminated
-      if (all_ancestors.find(rid) != all_ancestors.end())
-        continue;
-      // and then whether it actually should stay in the running:
-      ++predicates;
-      if (p(rid))
-        {
-          candidates.erase(rid);
-          continue;
-        }
-      // okay, it is good enough that all its ancestors should be
-      // eliminated
-      accumulate_strict_ancestors(db, rid, all_ancestors, *inverse_graph_cache_ptr, min_height);
-    }
-
-  // now go and eliminate the ancestors
-  for (set<revision_id>::const_iterator i = all_ancestors.begin();
-       i != all_ancestors.end(); ++i)
-    candidates.erase(*i);
-
-  L(FL("called predicate %s times") % predicates);
-}
-
-// This function looks at a set of revisions, and for every pair A, B in that
-// set such that A is an ancestor of B, it erases A.
-
-namespace
-{
-  struct no_failures : public is_failure
-  {
-    virtual bool operator()(revision_id const & rid)
-    {
-      return false;
-    }
-  };
-}
-void
-erase_ancestors(database & db, set<revision_id> & revisions)
-{
-  no_failures p;
-  erase_ancestors_and_failures(db, revisions, p);
-}
-
-// This function takes a revision A and a set of revision Bs, calculates the
-// ancestry of each, and returns the set of revisions that are in A's ancestry
-// but not in the ancestry of any of the Bs.  It tells you 'what's new' in A
-// that's not in the Bs.  If the output set if non-empty, then A will
-// certainly be in it; but the output set might be empty.
-void
-ancestry_difference(database & db, revision_id const & a,
-                    set<revision_id> const & bs,
-                    set<revision_id> & new_stuff)
-{
-  new_stuff.clear();
-  typedef multimap<revision_id, revision_id>::const_iterator gi;
-  multimap<revision_id, revision_id> graph;
-  multimap<revision_id, revision_id> inverse_graph;
-
-  db.get_revision_ancestry(graph);
-  for (gi i = graph.begin(); i != graph.end(); ++i)
-    inverse_graph.insert(make_pair(i->second, i->first));
-
-  interner<ctx> intern;
-  map< ctx, shared_bitmap > ancestors;
-
-  shared_bitmap u = shared_bitmap(new bitmap());
-
-  for (set<revision_id>::const_iterator i = bs.begin();
-       i != bs.end(); ++i)
-    {
-      calculate_ancestors_from_graph(intern, *i, inverse_graph, ancestors, u);
-      ctx c = intern.intern(i->inner()());
-      if (u->size() <= c)
-        u->resize(c + 1);
-      u->set(c);
-    }
-
-  shared_bitmap au = shared_bitmap(new bitmap());
-  calculate_ancestors_from_graph(intern, a, inverse_graph, ancestors, au);
-  {
-    ctx c = intern.intern(a.inner()());
-    if (au->size() <= c)
-      au->resize(c + 1);
-    au->set(c);
-  }
-
-  au->resize(max(au->size(), u->size()));
-  u->resize(max(au->size(), u->size()));
-
-  *au -= *u;
-
-  for (unsigned int i = 0; i != au->size(); ++i)
-  {
-    if (au->test(i))
-      {
-        revision_id rid(intern.lookup(i), origin::internal);
-        if (!null_id(rid))
-          new_stuff.insert(rid);
-      }
-  }
-}
-
-void
-select_nodes_modified_by_rev(database & db,
-                             revision_t const & rev,
-                             roster_t const new_roster,
-                             set<node_id> & nodes_modified)
-{
-  nodes_modified.clear();
-
-  for (edge_map::const_iterator i = rev.edges.begin();
-       i != rev.edges.end(); ++i)
-    {
-      set<node_id> edge_nodes_modified;
-      roster_t old_roster;
-      db.get_roster(edge_old_revision(i), old_roster);
-      select_nodes_modified_by_cset(edge_changes(i),
-                                    old_roster,
-                                    new_roster,
-                                    edge_nodes_modified);
-
-      copy(edge_nodes_modified.begin(), edge_nodes_modified.end(),
-                inserter(nodes_modified, nodes_modified.begin()));
-    }
-}
-
-
-void
 make_revision(revision_id const & old_rev_id,
               roster_t const & old_roster,
               roster_t const & new_roster,
============================================================
--- roster.cc	98ea42f99e59a884a7f00dfab6a76334c5b89772
+++ roster.cc	2ffc18c20c2e53c540b7312a125681d60a6778f9
@@ -130,43 +130,32 @@ dump(marking_map const & markings, strin
   out = oss.str();
 }
 
-namespace
-{
-  //
-  // We have a few concepts of "nullness" here:
-  //
-  // - the_null_node is a node_id. It does not correspond to a real node;
-  //   it's an id you use for the parent of the root, or of any node which
-  //   is detached.
-  //
-  // - the root node has a real node id, just like any other directory.
-  //
-  // - the path_component whose string representation is "", the empty
-  //   string, is the *name* of the root node.  write it as
-  //   path_component() and test for it with component.empty().
-  //
-  // - similarly, the file_path whose string representation is "" also
-  //   names the root node.  write it as file_path() and test for it
-  //   with path.empty().
-  //
-  // - there is no file_path or path_component corresponding to the_null_node.
-  //
-  // We do this in order to support the notion of moving the root directory
-  // around, or applying attributes to the root directory.  Note that the
-  // only supported way to move the root is with the 'pivot_root' operation,
-  // which atomically turns the root directory into a subdirectory and some
-  // existing subdirectory into the root directory.  This is an UI constraint,
-  // not a constraint at this level.
+//
+// We have a few concepts of "nullness" here:
+//
+// - the_null_node is a node_id. It does not correspond to a real node;
+//   it's an id you use for the parent of the root, or of any node which
+//   is detached.
+//
+// - the root node has a real node id, just like any other directory.
+//
+// - the path_component whose string representation is "", the empty
+//   string, is the *name* of the root node.  write it as
+//   path_component() and test for it with component.empty().
+//
+// - similarly, the file_path whose string representation is "" also
+//   names the root node.  write it as file_path() and test for it
+//   with path.empty().
+//
+// - there is no file_path or path_component corresponding to the_null_node.
+//
+// We do this in order to support the notion of moving the root directory
+// around, or applying attributes to the root directory.  Note that the
+// only supported way to move the root is with the 'pivot_root' operation,
+// which atomically turns the root directory into a subdirectory and some
+// existing subdirectory into the root directory.  This is an UI constraint,
+// not a constraint at this level.
 
-  const node_id first_node = 1;
-  const node_id first_temp_node = widen<node_id, int>(1) << (sizeof(node_id) * 8 - 1);
-  inline bool temp_node(node_id n)
-  {
-    return n & first_temp_node;
-  }
-}
-
-
 node::node(node_id i)
   : self(i),
     parent(the_null_node),
@@ -1213,20 +1202,6 @@ namespace
 
 namespace
 {
-  struct true_node_id_source
-    : public node_id_source
-  {
-    true_node_id_source(database & db) : db(db) {}
-    virtual node_id next()
-    {
-      node_id n = db.next_node_id();
-      I(!temp_node(n));
-      return n;
-    }
-    database & db;
-  };
-
-
   class editable_roster_for_merge
     : public editable_roster_base
   {
@@ -1811,158 +1786,116 @@ namespace {
     marking_map & markings;
   };
 
-  // Interface note: make_roster_for_merge and make_roster_for_nonmerge
-  // each exist in two variants:
-  //
-  // 1. A variant that does all of the actual work, taking every single
-  //    relevant base-level data object as a separate argument.  This
-  //    variant is called directly by the unit tests, and also by variant 2.
-  //
-  // 2. A variant that takes a revision object, a revision ID, a database,
-  //    and a node_id_source.  This variant uses those four things to look
-  //    up all of the low-level data required by variant 1, then calls
-  //    variant 1 to get the real work done.  This is the one called by
-  //    (one variant of) make_roster_for_revision.
+} // anonymous namespace
 
-  // yes, this function takes 14 arguments.  I'm very sorry.
-  void
-  make_roster_for_merge(revision_id const & left_rid,
-                        roster_t const & left_roster,
-                        marking_map const & left_markings,
-                        cset const & left_cs,
-                        set<revision_id> const & left_uncommon_ancestors,
+// Interface note: make_roster_for_merge and make_roster_for_nonmerge
+// each exist in two variants:
+//
+// 1. A variant that does all of the actual work, taking every single
+//    relevant base-level data object as a separate argument.  This
+//    variant is called directly by the unit tests, and also by variant 2.
+//    It is defined in this file.
+//
+// 2. A variant that takes a revision object, a revision ID, a database,
+//    and a node_id_source.  This variant uses those four things to look
+//    up all of the low-level data required by variant 1, then calls
+//    variant 1 to get the real work done.  This is the one called by
+//    (one variant of) make_roster_for_revision.
+//    It, and make_roster_for_revision, is defined in ancestry.cc.
 
-                        revision_id const & right_rid,
-                        roster_t const & right_roster,
-                        marking_map const & right_markings,
-                        cset const & right_cs,
-                        set<revision_id> const & right_uncommon_ancestors,
+// yes, this function takes 14 arguments.  I'm very sorry.
+void
+make_roster_for_merge(revision_id const & left_rid,
+                      roster_t const & left_roster,
+                      marking_map const & left_markings,
+                      cset const & left_cs,
+                      set<revision_id> const & left_uncommon_ancestors,
 
-                        revision_id const & new_rid,
-                        roster_t & new_roster,
-                        marking_map & new_markings,
-                        node_id_source & nis)
+                      revision_id const & right_rid,
+                      roster_t const & right_roster,
+                      marking_map const & right_markings,
+                      cset const & right_cs,
+                      set<revision_id> const & right_uncommon_ancestors,
+
+                      revision_id const & new_rid,
+                      roster_t & new_roster,
+                      marking_map & new_markings,
+                      node_id_source & nis)
+{
+  I(!null_id(left_rid) && !null_id(right_rid));
+  I(left_uncommon_ancestors.find(left_rid) != left_uncommon_ancestors.end());
+  I(left_uncommon_ancestors.find(right_rid) == left_uncommon_ancestors.end());
+  I(right_uncommon_ancestors.find(right_rid) != right_uncommon_ancestors.end());
+  I(right_uncommon_ancestors.find(left_rid) == right_uncommon_ancestors.end());
+  MM(left_rid);
+  MM(left_roster);
+  MM(left_markings);
+  MM(left_cs);
+  MM(left_uncommon_ancestors);
+  MM(right_rid);
+  MM(right_roster);
+  MM(right_markings);
+  MM(right_cs);
+  MM(right_uncommon_ancestors);
+  MM(new_rid);
+  MM(new_roster);
+  MM(new_markings);
   {
-    I(!null_id(left_rid) && !null_id(right_rid));
-    I(left_uncommon_ancestors.find(left_rid) != left_uncommon_ancestors.end());
-    I(left_uncommon_ancestors.find(right_rid) == left_uncommon_ancestors.end());
-    I(right_uncommon_ancestors.find(right_rid) != right_uncommon_ancestors.end());
-    I(right_uncommon_ancestors.find(left_rid) == right_uncommon_ancestors.end());
-    MM(left_rid);
-    MM(left_roster);
-    MM(left_markings);
-    MM(left_cs);
-    MM(left_uncommon_ancestors);
-    MM(right_rid);
-    MM(right_roster);
-    MM(right_markings);
-    MM(right_cs);
-    MM(right_uncommon_ancestors);
-    MM(new_rid);
-    MM(new_roster);
-    MM(new_markings);
-    {
-      temp_node_id_source temp_nis;
-      new_roster = left_roster;
-      roster_t from_right_r(right_roster);
-      MM(from_right_r);
+    temp_node_id_source temp_nis;
+    new_roster = left_roster;
+    roster_t from_right_r(right_roster);
+    MM(from_right_r);
 
-      editable_roster_for_merge from_left_er(new_roster, temp_nis);
-      editable_roster_for_merge from_right_er(from_right_r, temp_nis);
+    editable_roster_for_merge from_left_er(new_roster, temp_nis);
+    editable_roster_for_merge from_right_er(from_right_r, temp_nis);
 
-      left_cs.apply_to(from_left_er);
-      right_cs.apply_to(from_right_er);
+    left_cs.apply_to(from_left_er);
+    right_cs.apply_to(from_right_er);
 
-      unify_rosters(new_roster, from_left_er.new_nodes,
-                    from_right_r, from_right_er.new_nodes,
-                    nis);
+    unify_rosters(new_roster, from_left_er.new_nodes,
+                  from_right_r, from_right_er.new_nodes,
+                  nis);
 
-      // Kluge: If both csets have no content changes, and the node_id_source
-      // passed to this function is a temp_node_id_source, then we are being
-      // called from get_current_roster_shape, and we should not attempt to
-      // verify that these rosters match as far as content IDs.
-      if (left_cs.deltas_applied.empty()
-          && right_cs.deltas_applied.empty()
-          && typeid(nis) == typeid(temp_node_id_source))
-        I(equal_shapes(new_roster, from_right_r));
-      else
-        I(new_roster == from_right_r);
-    }
-
-    // SPEEDUP?: instead of constructing new marking from scratch, track which
-    // nodes were modified, and scan only them
-    // load one of the parent markings directly into the new marking map
-    new_markings.clear();
-    mark_merge_roster(left_roster, left_markings, left_uncommon_ancestors,
-                      right_roster, right_markings, right_uncommon_ancestors,
-                      new_rid, new_roster, new_markings);
+    // Kluge: If both csets have no content changes, and the node_id_source
+    // passed to this function is a temp_node_id_source, then we are being
+    // called from get_current_roster_shape, and we should not attempt to
+    // verify that these rosters match as far as content IDs.
+    if (left_cs.deltas_applied.empty()
+        && right_cs.deltas_applied.empty()
+        && typeid(nis) == typeid(temp_node_id_source))
+      I(equal_shapes(new_roster, from_right_r));
+    else
+      I(new_roster == from_right_r);
   }
 
+  // SPEEDUP?: instead of constructing new marking from scratch, track which
+  // nodes were modified, and scan only them
+  // load one of the parent markings directly into the new marking map
+  new_markings.clear();
+  mark_merge_roster(left_roster, left_markings, left_uncommon_ancestors,
+                    right_roster, right_markings, right_uncommon_ancestors,
+                    new_rid, new_roster, new_markings);
+}
 
-  // WARNING: this function is not tested directly (no unit tests).  Do not
-  // put real logic in it.
-  void
-  make_roster_for_merge(revision_t const & rev, revision_id const & new_rid,
-                        roster_t & new_roster, marking_map & new_markings,
-                        database & db, node_id_source & nis)
-  {
-    edge_map::const_iterator i = rev.edges.begin();
-    revision_id const & left_rid = edge_old_revision(i);
-    cset const & left_cs = edge_changes(i);
-    ++i;
-    revision_id const & right_rid = edge_old_revision(i);
-    cset const & right_cs = edge_changes(i);
 
-    I(!null_id(left_rid) && !null_id(right_rid));
-    cached_roster left_cached, right_cached;
-    db.get_roster(left_rid, left_cached);
-    db.get_roster(right_rid, right_cached);
 
-    set<revision_id> left_uncommon_ancestors, right_uncommon_ancestors;
-    db.get_uncommon_ancestors(left_rid, right_rid,
-                                  left_uncommon_ancestors,
-                                  right_uncommon_ancestors);
-
-    make_roster_for_merge(left_rid, *left_cached.first, *left_cached.second,
-                          left_cs, left_uncommon_ancestors,
-                          right_rid, *right_cached.first, *right_cached.second,
-                          right_cs, right_uncommon_ancestors,
-                          new_rid,
-                          new_roster, new_markings,
-                          nis);
-  }
-
-  // Warning: this function expects the parent's roster and markings in the
-  // 'new_roster' and 'new_markings' parameters, and they are modified
-  // destructively!
-  // This function performs an almost identical task to
-  // mark_roster_with_one_parent; however, for efficiency, it is implemented
-  // in a different, destructive way.
-  void
-  make_roster_for_nonmerge(cset const & cs,
-                           revision_id const & new_rid,
-                           roster_t & new_roster, marking_map & new_markings,
-                           node_id_source & nis)
-  {
-    editable_roster_for_nonmerge er(new_roster, nis, new_rid, new_markings);
-    cs.apply_to(er);
-  }
-
-  // WARNING: this function is not tested directly (no unit tests).  Do not
-  // put real logic in it.
-  void
-  make_roster_for_nonmerge(revision_t const & rev,
-                           revision_id const & new_rid,
-                           roster_t & new_roster, marking_map & new_markings,
-                           database & db, node_id_source & nis)
-  {
-    revision_id const & parent_rid = edge_old_revision(rev.edges.begin());
-    cset const & parent_cs = edge_changes(rev.edges.begin());
-    db.get_roster(parent_rid, new_roster, new_markings);
-    make_roster_for_nonmerge(parent_cs, new_rid, new_roster, new_markings, nis);
-  }
+// Warning: this function expects the parent's roster and markings in the
+// 'new_roster' and 'new_markings' parameters, and they are modified
+// destructively!
+// This function performs an almost identical task to
+// mark_roster_with_one_parent; however, for efficiency, it is implemented
+// in a different, destructive way.
+void
+make_roster_for_nonmerge(cset const & cs,
+                         revision_id const & new_rid,
+                         roster_t & new_roster, marking_map & new_markings,
+                         node_id_source & nis)
+{
+  editable_roster_for_nonmerge er(new_roster, nis, new_rid, new_markings);
+  cs.apply_to(er);
 }
 
+
 void
 mark_roster_with_no_parents(revision_id const & rid,
                             roster_t const & roster,
@@ -2006,41 +1939,7 @@ mark_roster_with_one_parent(roster_t con
   child.check_sane_against(child_markings, true);
 }
 
-// WARNING: this function is not tested directly (no unit tests).  Do not put
-// real logic in it.
-void
-make_roster_for_revision(database & db, node_id_source & nis,
-                         revision_t const & rev, revision_id const & new_rid,
-                         roster_t & new_roster, marking_map & new_markings)
-{
-  MM(rev);
-  MM(new_rid);
-  MM(new_roster);
-  MM(new_markings);
-  if (rev.edges.size() == 1)
-    make_roster_for_nonmerge(rev, new_rid, new_roster, new_markings, db, nis);
-  else if (rev.edges.size() == 2)
-    make_roster_for_merge(rev, new_rid, new_roster, new_markings, db, nis);
-  else
-    I(false);
 
-  // If nis is not a true_node_id_source, we have to assume we can get temp
-  // node ids out of it.  ??? Provide a predicate method on node_id_sources
-  // instead of doing a typeinfo comparison.
-  new_roster.check_sane_against(new_markings,
-                                typeid(nis) != typeid(true_node_id_source));
-}
-
-void
-make_roster_for_revision(database & db,
-                         revision_t const & rev, revision_id const & new_rid,
-                         roster_t & new_roster, marking_map & new_markings)
-{
-  true_node_id_source nis(db);
-  make_roster_for_revision(db, nis, rev, new_rid, new_roster, new_markings);
-}
-
-
 ////////////////////////////////////////////////////////////////////
 //   Calculation of a cset
 ////////////////////////////////////////////////////////////////////
@@ -3233,6 +3132,7 @@ void perform_random_action(roster_t & r,
   apply_cset_and_do_testing(r, c, nis);
 }
 
+const node_id first_node = 1;
 testing_node_id_source::testing_node_id_source()
   : curr(first_node)
 {}
============================================================
--- roster.hh	61c885cf725e7b20bbbe5a8e595904a4edc38c14
+++ roster.hh	ef1b40df5043af074ee007ea5efdb726e1b4112f
@@ -363,6 +363,35 @@ mark_merge_roster(roster_t const & left_
                   roster_t const & merge,
                   marking_map & new_markings);
 
+// These functions are an internal interface between ancestry.cc and
+// roster.cc; unless you know exactly what you're doing you probably want
+// something else.
+
+void
+make_roster_for_merge(revision_id const & left_rid,
+                      roster_t const & left_roster,
+                      marking_map const & left_markings,
+                      cset const & left_cs,
+                      std::set<revision_id> const & left_uncommon_ancestors,
+
+                      revision_id const & right_rid,
+                      roster_t const & right_roster,
+                      marking_map const & right_markings,
+                      cset const & right_cs,
+                      std::set<revision_id> const & right_uncommon_ancestors,
+
+                      revision_id const & new_rid,
+                      roster_t & new_roster,
+                      marking_map & new_markings,
+                      node_id_source & nis);
+
+void
+make_roster_for_nonmerge(cset const & cs,
+                         revision_id const & new_rid,
+                         roster_t & new_roster, marking_map & new_markings,
+                         node_id_source & nis);
+
+
 // This is for revisions that are being written to the db, only.  It assigns
 // permanent node ids.
 void
@@ -404,6 +433,51 @@ void parse_marking(basic_io::parser & pa
 void push_marking(basic_io::stanza & st, bool is_file, marking_t const & mark);
 void parse_marking(basic_io::parser & pa, marking_t & marking);
 
+// Parent maps are used in a number of places to keep track of all the
+// parent rosters of a given revision.
+
+inline revision_id const & parent_id(parent_entry const & p)
+{
+  return p.first;
+}
+
+inline revision_id const & parent_id(parent_map::const_iterator i)
+{
+  return i->first;
+}
+
+inline cached_roster const &
+parent_cached_roster(parent_entry const & p)
+{
+  return p.second;
+}
+
+inline cached_roster const &
+parent_cached_roster(parent_map::const_iterator i)
+{
+  return i->second;
+}
+
+inline roster_t const & parent_roster(parent_entry const & p)
+{
+  return *(p.second.first);
+}
+
+inline roster_t const & parent_roster(parent_map::const_iterator i)
+{
+  return *(i->second.first);
+}
+
+inline marking_map const & parent_marking(parent_entry const & p)
+{
+  return *(p.second.second);
+}
+
+inline marking_map const & parent_marking(parent_map::const_iterator i)
+{
+  return *(i->second.second);
+}
+
 #ifdef BUILD_UNIT_TESTS
 
 struct testing_node_id_source