# # # add_dir "tests/(minor)_update_cleans_emptied_directories" # # add_dir "tests/merge(<>,_)" # # add_dir "tests/merge(<>,_)" # # add_dir "tests/merge(<>,_)" # # add_dir "tests/merge(,_)" # # add_dir "tests/merging__with_" # # add_dir "tests/merging_an_add_edge" # # add_dir "tests/mtn_add_." # # add_dir "tests/tags_and_tagging_of_revisions" # # add_dir "tests/test_a_merge_2" # # add_file "tests/(minor)_update_cleans_emptied_directories/__driver__.lua" # content [41b21eae0ae20ea501388ca9c1a90710ea5cf501] # # add_file "tests/merge(<>,_)/__driver__.lua" # content [b0c2c29a5d98eff77f2fc62a12987ef53a05420f] # # add_file "tests/merge(<>,_)/__driver__.lua" # content [7a40015398252bda3fca04a1af6ba108f5633b65] # # add_file "tests/merge(<>,_)/__driver__.lua" # content [606e29b2a9ee0908c8e6cd49deca0f4924b9e48f] # # add_file "tests/merge(,_)/__driver__.lua" # content [91b4b480be1eb7445c73c4a9d889c1237a7700bb] # # add_file "tests/merging__with_/__driver__.lua" # content [a85e07e9d549c8faa150e10957f529a8cead3948] # # add_file "tests/merging_an_add_edge/__driver__.lua" # content [73d4d648e483f04fe35c9d70f127b080870f2bed] # # add_file "tests/mtn_add_./__driver__.lua" # content [f415932a2ab7d4b107f73369c67c58be9c6d05e3] # # add_file "tests/tags_and_tagging_of_revisions/__driver__.lua" # content [6bfccf13721299af333b1a8abea99594ca9d562c] # # add_file "tests/test_a_merge_2/__driver__.lua" # content [36bed683bb6f955ae6ed03f9f8b4f507ab0c3cca] # # add_file "tests/test_a_merge_2/correct" # content [1bd5e72a777a6e207f85fb9f86c48c4b9737325a] # # add_file "tests/test_a_merge_2/left" # content [20aea360d9048851ce84102050b4c0d5f05ea589] # # add_file "tests/test_a_merge_2/parent" # content [fec273dc32b623917fe772d21278826fbd9f1368] # # add_file "tests/test_a_merge_2/right" # content [6100e250e2a4b26af3bd65df1b86278a264f2deb] # # patch "tester.lua" # from [fd12e0c5032a3d5fe98c1ba1186c580c24822421] # to [69bd2108dd3803934fca93b26a40ba8f46b746e8] # # patch "testsuite.at" # from [477a9f90a62d5f5b3861e368988bd194ad681d49] # to [e20997eff0240c5470652d33fb39242490d63040] # # patch "testsuite.lua" # from [e3316fe979a171964970a1676312a20da63e399e] # to [c7dc38733d0fc0a03252b55c4fa9583422e83eea] # ============================================================ --- tests/(minor)_update_cleans_emptied_directories/__driver__.lua 41b21eae0ae20ea501388ca9c1a90710ea5cf501 +++ tests/(minor)_update_cleans_emptied_directories/__driver__.lua 41b21eae0ae20ea501388ca9c1a90710ea5cf501 @@ -0,0 +1,20 @@ + +mtn_setup() + +mkdir("testdir") +addfile("testdir/foo", "blah blah blah") +commit() +base = base_revision() + +remove_recursive("testdir") +check(cmd(mtn("drop", "testdir/foo", "testdir")), 0, false, false) +commit() + +revert_to(base) + +check(exists("testdir")) +check(cmd(mtn("update")), 0, false, false) + +check(base ~= base_revision()) + +check(not exists("testdir")) ============================================================ --- tests/merge(<>,_)/__driver__.lua b0c2c29a5d98eff77f2fc62a12987ef53a05420f +++ tests/merge(<>,_)/__driver__.lua b0c2c29a5d98eff77f2fc62a12987ef53a05420f @@ -0,0 +1,30 @@ + +mtn_setup() + +writefile("v1", "foo blah") +writefile("v2", "baz blah") + +addfile("randomfile", "blah blah blah") +commit() +base = base_revision() + +copyfile("v1", "testfile") +check(cmd(mtn("add", "testfile")), 0, false, false) +commit() + +remove("testfile") +check(cmd(mtn("drop", "testfile")), 0, false, false) +commit() + +copyfile("v2", "testfile") +check(cmd(mtn("add", "testfile")), 0, false, false) +commit() + +revert_to(base) + +addfile("otherfile", "this space for rent") +commit() + +check(cmd(mtn("merge")), 0, false, false) +check(cmd(mtn("update")), 0, false, false) +check(samefile("testfile", "v2")) ============================================================ --- tests/merge(<>,_)/__driver__.lua 7a40015398252bda3fca04a1af6ba108f5633b65 +++ tests/merge(<>,_)/__driver__.lua 7a40015398252bda3fca04a1af6ba108f5633b65 @@ -0,0 +1,34 @@ + +mtn_setup() + +writefile("v1a", "foo blah") +writefile("v1b", "bar blah") +writefile("v2a", "baz blah") + +addfile("randomfile", "blah blah blah") +commit() +base = base_revision() + +copyfile("v1a", "testfile") +check(cmd(mtn("add", "testfile")), 0, false, false) +commit() + +copyfile("v1b", "testfile") +commit() + +remove("testfile") +check(cmd(mtn("drop", "testfile")), 0, false, false) +commit() + +copyfile("v2a", "testfile") +check(cmd(mtn("add", "testfile")), 0, false, false) +commit() + +revert_to(base) + +addfile("otherfile", "this space for rent") +commit() + +check(cmd(mtn("merge")), 0, false, false) +check(cmd(mtn("update")), 0, false, false) +check(samefile("testfile", "v2a")) ============================================================ --- tests/merge(<>,_)/__driver__.lua 606e29b2a9ee0908c8e6cd49deca0f4924b9e48f +++ tests/merge(<>,_)/__driver__.lua 606e29b2a9ee0908c8e6cd49deca0f4924b9e48f @@ -0,0 +1,31 @@ + +mtn_setup() + +writefile("v1a", "foo blah") +writefile("v1b", "bar blah") +writefile("v2a", "baz blah") + +copyfile("v1a", "testfile") +check(cmd(mtn("add", "testfile")), 0, false, false) +commit() +base = base_revision() + +copyfile("v1b", "testfile") +commit() + +remove("testfile") +check(cmd(mtn("drop", "testfile")), 0, false, false) +commit() + +copyfile("v2a", "testfile") +check(cmd(mtn("add", "testfile")), 0, false, false) +commit() + +revert_to(base) + +addfile("otherfile", "this space for rent") +commit() + +check(cmd(mtn("merge")), 0, false, false) +check(cmd(mtn("update")), 0, false, false) +check(samefile("testfile", "v2a")) ============================================================ --- tests/merge(,_)/__driver__.lua 91b4b480be1eb7445c73c4a9d889c1237a7700bb +++ tests/merge(,_)/__driver__.lua 91b4b480be1eb7445c73c4a9d889c1237a7700bb @@ -0,0 +1,39 @@ + +mtn_setup() + +-- This test relies on file-suturing + +writefile("right_1_a", "foo blah") +writefile("right_1_b", "bar blah") +writefile("right_2_a", "baz blah") + +writefile("left", "quux blah") + +addfile("otherfile", "this space for rent") +commit() +base = base_revision() + +copyfile("right_1_a", "testfile") +check(cmd(mtn("add", "testfile")), 0, false, false) +commit() + +copyfile("right_1_b", "testfile") +commit() + +remove("testfile") +check(cmd(mtn("drop", "testfile")), 0, false, false) +commit() + +copyfile("right_2_a", "testfile") +check(cmd(mtn("add", "testfile")), 0, false, false) +commit() + +revert_to(base) + +copyfile("left", "testfile") +check(cmd(mtn("add", "testfile")), 0, false, false) +commit() + +xfail_if(true, cmd(mtn("merge")), 0, false, false) +check(cmd(mtn("update")), 0, false, false) +check(samefile("testfile", "right_2_a") or samefile("testfile", "left")) ============================================================ --- tests/merging__with_/__driver__.lua a85e07e9d549c8faa150e10957f529a8cead3948 +++ tests/merging__with_/__driver__.lua a85e07e9d549c8faa150e10957f529a8cead3948 @@ -0,0 +1,76 @@ + +mtn_setup() + +-- We want a graph which looks like: +-- A +-- / | \ +-- F C G +-- |/ | | +-- D E | +-- \ / +-- B + +-- B and D are heads of branch.main and branch.fork respectively, we want to +-- "propagate branch.main branch.fork". + +-- The revs F, C, and D are members of branch.fork. +-- A, C, E, G, and B are members of branch.main (C is shared) + +-- C is "add bar", E is "drop bar", other revisions involve non-conflicting +-- file additions or merges. + +writefile("foo", "extra blah blah foo") +writefile("bar", "extra blah blah bar") +writefile("quux", "extra blah blah quux") +writefile("iced", "stuff here") + +revs = {} + +-- produce state A +check(cmd(mtn("add", "foo")), 0, false, false) +commit("branch.main") +revs.a = base_revision() + +-- produce state C +check(cmd(mtn("add", "bar")), 0, false, false) +commit("branch.main") +revs.c = base_revision() +check(cmd(mtn("cert", revs.c, "branch", "branch.fork"))) + +-- produce state F +revert_to(revs.a) +check(cmd(mtn("add", "iced")), 0, false, false) +commit("branch.fork") +revs.f = base_revision() + +-- merge heads of branch.fork to make D +check(cmd(mtn("--branch=branch.fork", "merge")), 0, false, false) + +-- produce state E +revert_to(revs.c, "branch.main") +check(cmd(mtn("drop", "bar")), 0, false, false) +commit("branch.main") +revs.e = base_revision() + +-- state G +revert_to(revs.a) +check(cmd(mtn("add", "quux")), 0, false, false) +commit("branch.main") +revs.g = base_revision() + +-- merge to get state B +check(cmd(mtn("--branch=branch.main", "merge")), 0, false, false) + +-- now try the propagate +check(cmd(mtn("propagate", "branch.main", "branch.fork")), 0, false, false) + +-- check +remove_recursive("_MTN") +check(cmd(mtn("--branch=branch.fork", "checkout", "."))) + +check(cmd(mtn("automate", "get_manifest_of")), 0, true) +rename("stdout", "manifest") +check(not qgrep("bar", "manifest")) +check(qgrep("quux", "manifest")) +check(qgrep("foo", "manifest")) +check(qgrep("iced", "manifest")) ============================================================ --- tests/merging_an_add_edge/__driver__.lua 73d4d648e483f04fe35c9d70f127b080870f2bed +++ tests/merging_an_add_edge/__driver__.lua 73d4d648e483f04fe35c9d70f127b080870f2bed @@ -0,0 +1,32 @@ + +mtn_setup() + +-- This test relies on file-suturing + +mkdir("zz") + +addfile("ancfile", "ancestral file") +addfile("zz/testfile0", "added file") +commit() +anc = base_revision() + + +addfile("zz/testfile1", "added file") +commit() + +revert_to(anc) + +writefile("ancfile", "changed anc") + +addfile("zz/testfile1", "added file") + +commit() + +xfail_if(true, cmd(mtn("--branch=testbranch", "merge")), 0, false, false) +check(cmd(mtn("update")), 0, false, false) + +merged = base_revision() + +check(cmd(mtn("automate", "get_revision", merged)), 0, true) +rename("stdout", "rev") +check(not qgrep("add_file", "rev")) ============================================================ --- tests/mtn_add_./__driver__.lua f415932a2ab7d4b107f73369c67c58be9c6d05e3 +++ tests/mtn_add_./__driver__.lua f415932a2ab7d4b107f73369c67c58be9c6d05e3 @@ -0,0 +1,32 @@ + +mtn_setup() + +mkdir("subdir") + +writefile("subdir/testfile1", "foo") +writefile("subdir/testfile2", "bar") +mkdir("subdir/testdir1") +writefile("subdir/testdir1/subfile1", "baz") +writefile("subdir/testdir1/subfile2", "quux") + +check(cmd(mtn("setup", "--branch=testbranch", "subdir")), 0, false, false) + +-- Make sure that "add ." works, even at the root of the tree +chdir("subdir") +check(cmd(mtn("add", ".")), 0, false, false) + +-- Make sure that it took +check(cmd(mtn("commit", "--message=foo")), 0, false, false) +chdir("..") + +remove("subdir/testfile1") +remove("subdir/testfile2") +remove_recursive("subdir/testdir1") +chdir("subdir") +check(cmd(mtn("revert", ".")), 0, false, false) +chdir("..") + +check(exists("subdir/testfile1")) +check(exists("subdir/testfile2")) +check(exists("subdir/testdir1/subfile1")) +check(exists("subdir/testdir1/subfile2")) ============================================================ --- tests/tags_and_tagging_of_revisions/__driver__.lua 6bfccf13721299af333b1a8abea99594ca9d562c +++ tests/tags_and_tagging_of_revisions/__driver__.lua 6bfccf13721299af333b1a8abea99594ca9d562c @@ -0,0 +1,62 @@ + +mtn_setup() + +writefile("file1", "file 1") +writefile("file2", "file 2") +writefile("file3", "file 3") + +-- make sure a tag of a nonexistent revision fails +check(cmd(mtn("tag", "af2f6c1f3b7892672357a1018124ee80c752475d", "foo")), 1, false, false) + +revs = {} + +-- make and tag revision 1 + +check(cmd(mtn("add", "file1")), 0, false, false) +commit() +revs[1] = base_revision() +check(cmd(mtn("tag", revs[1], "tag1")), 0, false, false) + +-- make and tag revision 2 + +check(cmd(mtn("add", "file2")), 0, false, false) +commit() +revs[2] = base_revision() +check(cmd(mtn("tag", revs[2], "tag2")), 0, false, false) + +-- make and tag revision 3 + +check(cmd(mtn("add", "file3")), 0, false, false) +commit() +revs[3] = base_revision() +check(cmd(mtn("tag", revs[3], "tag3")), 0, true, true) + +-- check tags created above + +check(cmd(mtn("ls", "tags")), 0, true, false) +check(qgrep("tag1", "stdout")) +check(qgrep("tag2", "stdout")) +check(qgrep("tag3", "stdout")) + +-- make sure 'ls tags' output is sorted +if existsonpath("sort") then + canonicalize("stdout") + copyfile("stdout", "stdin") + rename("stdout", "stdout-orig") + check(cmd("sort"), 0, readfile("stdout-orig"), false, true) +end + +for i,x in {{true, false, false}, + {true, true, false}, + {true, true, true}} do + remove_recursive("_MTN") + remove("file1") + remove("file2") + remove("file3") + + check(cmd(mtn("co", "--revision=tag"..i, ".")), 0, false, false) + check(base_revision() == revs[i]) + for j = 1,3 do + check(exists("file"..j) == x[j]) + end +end ============================================================ --- tests/test_a_merge_2/__driver__.lua 36bed683bb6f955ae6ed03f9f8b4f507ab0c3cca +++ tests/test_a_merge_2/__driver__.lua 36bed683bb6f955ae6ed03f9f8b4f507ab0c3cca @@ -0,0 +1,43 @@ + +mtn_setup() + +-- Another real merge error. This is the diff between the correct +-- result and the file produced by the merge: +-- +-- --- correct +-- +++ testfile +-- @@ -336,10 +336,10 @@ +-- { +-- L(F("found node %d, ancestor of left %s and right %s\n") +-- % anc % left % right); +-- - return true; +-- } +-- } +-- // dump_bitset_map("ancestors", ancestors); +-- +// dump_bitset_map("dominators", dominators); +-- // dump_bitset_map("parents", parents); +-- return false; +-- } + +getfile("parent") +getfile("left") +getfile("right") +getfile("correct") + +copyfile("parent", "testfile") +check(cmd(mtn("add", "testfile")), 0, false, false) +commit() +parent = base_revision() + +copyfile("left", "testfile") +commit() + +revert_to(parent) + +copyfile("right", "testfile") +commit() + +check(cmd(mtn("merge")), 0, false, false) + +check(cmd(mtn("update")), 0, false, false) +check(samefile("testfile", "correct")) ============================================================ --- tests/test_a_merge_2/correct 1bd5e72a777a6e207f85fb9f86c48c4b9737325a +++ tests/test_a_merge_2/correct 1bd5e72a777a6e207f85fb9f86c48c4b9737325a @@ -0,0 +1,746 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "revision.hh" +#include "sanity.hh" +#include "transforms.hh" +#include "vocab.hh" + + +// calculating least common ancestors is a delicate thing. +// +// it turns out that we cannot choose the simple "least common ancestor" +// for purposes of a merge, because it is possible that there are two +// equally reachable common ancestors, and this produces ambiguity in the +// merge. the result -- in a pathological case -- is silently accepting one +// set of edits while discarding another; not exactly what you want a +// version control tool to do. +// +// a conservative approximation is what we'll call a "subgraph recurring" +// LCA algorithm. this is somewhat like locating the least common dominator +// node, but not quite. it is actually just a vanilla LCA search, except +// that any time there's a fork (a historical merge looks like a fork from +// our perspective, working backwards from children to parents) it reduces +// the fork to a common parent via a sequence of pairwise recursive calls +// to itself before proceeding. this will always resolve to a common parent +// with no ambiguity, unless it falls off the root of the graph. +// +// unfortunately the subgraph recurring algorithm sometimes goes too far +// back in history -- for example if there is an unambiguous propagate from +// one branch to another, the entire subgraph preceeding the propagate on +// the recipient branch is elided, since it is a merge. +// +// our current hypothesis is that the *exact* condition we're looking for, +// when doing a merge, is the least node which dominates one side of the +// merge and is an ancestor of the other. + +typedef unsigned long ctx; +typedef boost::dynamic_bitset<> bitmap; +typedef boost::shared_ptr shared_bitmap; + +static void +ensure_parents_loaded(ctx child, + std::map & parents, + interner & intern, + app_state & app) +{ + if (parents.find(child) != parents.end()) + return; + + L(F("loading parents for node %d\n") % child); + + std::set imm_parents; + app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents); + + // The null revision is not a parent for purposes of finding common + // ancestors. + for (std::set::iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + if (null_id(*p)) + imm_parents.erase(p); + } + + shared_bitmap bits = shared_bitmap(new bitmap(parents.size())); + + for (std::set::const_iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + ctx pn = intern.intern(p->inner()()); + L(F("parent %s -> node %d\n") % *p % pn); + if (pn >= bits->size()) + bits->resize(pn+1); + bits->set(pn); + } + + parents.insert(std::make_pair(child, bits)); +} + +static bool +expand_dominators(std::map & parents, + std::map & dominators, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(dominators.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = dominators.begin(); + e != dominators.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the dominator entries we can + for (std::vector::const_iterator n = nodes.begin(); + n != nodes.end(); ++n) + { + shared_bitmap bits = dominators[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + + bitmap intersection(bits->size()); + + bool first = true; + for (unsigned long parent = 0; + parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (dominators.find(parent) == dominators.end()) + dominators.insert(std::make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = dominators[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + if (first) + { + intersection = (*pbits); + first = false; + } + else + intersection &= (*pbits); + } + + (*bits) |= intersection; + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + + +static bool +expand_ancestors(std::map & parents, + std::map & ancestors, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(ancestors.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = ancestors.begin(); + e != ancestors.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the ancestor entries we can + for (std::vector::const_iterator n = nodes.begin(); n != nodes.end(); ++n) + { + shared_bitmap bits = ancestors[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + for (ctx parent = 0; parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (bits->size() <= parent) + bits->resize(parent + 1); + bits->set(parent); + + if (ancestors.find(parent) == ancestors.end()) + ancestors.insert(make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = ancestors[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + (*bits) |= (*pbits); + } + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + +static bool +find_intersecting_node(bitmap & fst, + bitmap & snd, + interner const & intern, + revision_id & anc) +{ + + if (fst.size() > snd.size()) + snd.resize(fst.size()); + else if (snd.size() > fst.size()) + fst.resize(snd.size()); + + bitmap intersection = fst & snd; + if (intersection.any()) + { + L(F("found %d intersecting nodes\n") % intersection.count()); + for (ctx i = 0; i < intersection.size(); ++i) + { + if (intersection.test(i)) + { + anc = revision_id(intern.lookup(i)); + return true; + } + } + } + return false; +} + +// static void +// dump_bitset_map(std::string const & hdr, +// std::map< ctx, shared_bitmap > const & mm) +// { +// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size()); +// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin(); +// i != mm.end(); ++i) +// { +// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second))); +// } +// } + +bool +find_common_ancestor_for_merge(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors, dominators; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + shared_bitmap ldom = shared_bitmap(new bitmap()); + shared_bitmap rdom = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + dominators.insert(make_pair(ln, ldom)); + dominators.insert(make_pair(rn, rdom)); + + L(F("searching for common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app) || + expand_dominators(parents, dominators, intern, app)) + { + L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") % + parents.size() % ancestors.size() % dominators.size()); + + if (find_intersecting_node(*lanc, *rdom, intern, anc)) + { + L(F("found node %d, ancestor of left %s and dominating right %s\n") + % anc % left % right); + return true; + } + + else if (find_intersecting_node(*ranc, *ldom, intern, anc)) + { + L(F("found node %d, ancestor of right %s and dominating left %s\n") + % anc % right % left); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("dominators", dominators); +// dump_bitset_map("parents", parents); + return false; +} + + +bool +find_least_common_ancestor(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors, dominators; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + + L(F("searching for least common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app)) + { + L(F("least common ancestor scan [par=%d,anc=%d]\n") % + parents.size() % ancestors.size()); + + if (find_intersecting_node(*lanc, *ranc, intern, anc)) + { + L(F("found node %d, ancestor of left %s and right %s\n") + % anc % left % right); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("parents", parents); + return false; +} + + +// +// The idea with this algorithm is to walk from child up to ancestor, +// recursively, accumulating all the change_sets associated with +// intermediate nodes into *one big change_set*. +// +// clever readers will realize this is an overlapping-subproblem type +// situation and thus needs to keep a dynamic programming map to keep +// itself in linear complexity. +// +// in fact, we keep two: one which maps to computed results (partial_csets) +// and one which just keeps a set of all nodes we traversed +// (visited_nodes). in theory it could be one map with an extra bool stuck +// on each entry, but I think that would make it even less readable. it's +// already quite ugly. +// + +static bool +calculate_change_sets_recursive(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & cumulative_cset, + std::map > & partial_csets, + std::set & visited_nodes) +{ + + if (ancestor == child) + return true; + + visited_nodes.insert(child); + + bool relevant_child = false; + + revision_set rev; + app.db.get_revision(child, rev); + + L(F("exploring changesets from parents of %s, seeking towards %s\n") + % child % ancestor); + + for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i) + { + bool relevant_parent = false; + revision_id curr_parent = edge_old_revision(i); + + if (curr_parent.inner()().empty()) + continue; + + change_set cset_to_curr_parent; + + L(F("considering parent %s of %s\n") % curr_parent % child); + + std::map >::const_iterator j = + partial_csets.find(curr_parent); + if (j != partial_csets.end()) + { + // a recursive call has traversed this parent before and built an + // existing cset. just reuse that rather than re-traversing + cset_to_curr_parent = *(j->second); + relevant_parent = true; + } + else if (visited_nodes.find(curr_parent) != visited_nodes.end()) + { + // a recursive call has traversed this parent, but there was no + // path from it to the root, so the parent is irrelevant. skip. + relevant_parent = false; + } + else + relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app, + cset_to_curr_parent, + partial_csets, + visited_nodes); + + if (relevant_parent) + { + L(F("revision %s is relevant, composing with edge to %s\n") + % curr_parent % child); + concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset); + relevant_child = true; + break; + } + else + L(F("parent %s of %s is not relevant\n") % curr_parent % child); + } + + // store the partial edge from ancestor -> child, so that if anyone + // re-traverses this edge they'll just fetch from the partial_edges + // cache. + if (relevant_child) + partial_csets.insert(std::make_pair(child, + boost::shared_ptr + (new change_set(cumulative_cset)))); + + return relevant_child; +} + +void +calculate_composite_change_set(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & composed) +{ + L(F("calculating composite changeset between %s and %s\n") + % ancestor % child); + std::set visited; + std::map > partial; + calculate_change_sets_recursive(ancestor, child, app, composed, partial, visited); +} + +// migration stuff +// +// FIXME: these are temporary functions, once we've done the migration to +// revisions / changesets, we can remove them. + +static void +analyze_manifest_changes(app_state & app, + manifest_id const & parent, + manifest_id const & child, + change_set & cs) +{ + manifest_map m_parent, m_child; + app.db.get_manifest(parent, m_parent); + app.db.get_manifest(child, m_child); + + for (manifest_map::const_iterator i = m_parent.begin(); + i != m_parent.end(); ++i) + { + manifest_map::const_iterator j = m_child.find(manifest_entry_path(i)); + if (j == m_child.end()) + cs.delete_file(manifest_entry_path(i)); + else if (! (manifest_entry_id(i) == manifest_entry_id(j))) + { + cs.apply_delta(manifest_entry_path(i), + manifest_entry_id(i), + manifest_entry_id(j)); + } + } + for (manifest_map::const_iterator i = m_child.begin(); + i != m_child.end(); ++i) + { + manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i)); + if (j == m_parent.end()) + cs.add_file(manifest_entry_path(i), + manifest_entry_id(i)); + } +} + +static revision_id +construct_revisions(app_state & app, + manifest_id const & child, + std::multimap< manifest_id, manifest_id > const & ancestry, + std::map & mapped) +{ + revision_set rev; + typedef std::multimap< manifest_id, manifest_id >::const_iterator ci; + std::pair range = ancestry.equal_range(child); + for (ci i = range.first; i != range.second; ++i) + { + manifest_id parent(i->second); + revision_id parent_rid; + std::map::const_iterator j = mapped.find(parent); + + if (j != mapped.end()) + parent_rid = j->second; + else + { + parent_rid = construct_revisions(app, parent, ancestry, mapped); + P(F("inserting mapping %d : %s -> %s\n") % mapped.size() % parent % parent_rid);; + mapped.insert(std::make_pair(parent, parent_rid)); + } + + change_set cs; + analyze_manifest_changes(app, parent, child, cs); + rev.edges.insert(std::make_pair(parent_rid, + std::make_pair(parent, cs))); + } + + revision_id rid; + if (rev.edges.empty()) + { + P(F("ignoring empty revision for manifest %s\n") % child); + return rid; + } + + rev.new_manifest = child; + calculate_ident(rev, rid); + + if (!app.db.revision_exists (rid)) + { + P(F("mapping manifest %s to revision %s\n") % child % rid); + app.db.put_revision(rid, rev); + } + else + { + P(F("skipping additional path to revision %s\n") % rid); + } + + // now hoist all the interesting certs up to the revision + std::set cnames; + cnames.insert(cert_name(branch_cert_name)); + cnames.insert(cert_name(date_cert_name)); + cnames.insert(cert_name(author_cert_name)); + cnames.insert(cert_name(tag_cert_name)); + cnames.insert(cert_name(changelog_cert_name)); + cnames.insert(cert_name(comment_cert_name)); + cnames.insert(cert_name(testresult_cert_name)); + + std::vector< manifest > tmp; + app.db.get_manifest_certs(child, tmp); + erase_bogus_certs(tmp, app); + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + if (cnames.find(i->inner().name) == cnames.end()) + continue; + cert new_cert; + cert_value tv; + decode_base64(i->inner().value, tv); + make_simple_cert(rid.inner(), i->inner().name, tv, app, new_cert); + if (! app.db.revision_cert_exists(revision(new_cert))) + app.db.put_revision_cert(revision(new_cert)); + } + return rid; +} + + +void +build_changesets(app_state & app) +{ + std::vector< manifest > tmp; + app.db.get_manifest_certs(cert_name("ancestor"), tmp); + erase_bogus_certs(tmp, app); + + std::multimap< manifest_id, manifest_id > ancestry; + std::set heads; + std::set total; + + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + cert_value tv; + decode_base64(i->inner().value, tv); + manifest_id child, parent; + child = i->inner().ident; + parent = hexenc(tv()); + heads.insert(child); + heads.erase(parent); + total.insert(child); + total.insert(parent); + ancestry.insert(std::make_pair(child, parent)); + } + + P(F("found a total of %d manifests\n") % total.size()); + + transaction_guard guard(app.db); + std::map mapped; + for (std::set::const_iterator i = heads.begin(); + i != heads.end(); ++i) + { + construct_revisions(app, *i, ancestry, mapped); + } + guard.commit(); +} + + +// i/o stuff + +std::string revision_file_name("revision"); + +namespace +{ + namespace syms + { + std::string const old_revision("old_revision"); + std::string const new_manifest("new_manifest"); + std::string const old_manifest("old_manifest"); + } +} + + +void +print_edge(basic_io::printer & printer, + edge_entry const & e) +{ + basic_io::stanza st; + st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()()); + st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()()); + printer.print_stanza(st); + print_change_set(printer, edge_changes(e)); +} + + +void +print_revision(basic_io::printer & printer, + revision_set const & rev) +{ + basic_io::stanza st; + st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()()); + printer.print_stanza(st); + for (edge_map::const_iterator edge = rev.edges.begin(); + edge != rev.edges.end(); ++edge) + print_edge(printer, *edge); +} + + +void +parse_edge(basic_io::parser & parser, + edge_map & es) +{ + change_set cs; + manifest_id old_man; + revision_id old_rev; + std::string tmp; + + parser.esym(syms::old_revision); + parser.hex(tmp); + old_rev = revision_id(tmp); + + parser.esym(syms::old_manifest); + parser.hex(tmp); + old_man = manifest_id(tmp); + + parse_change_set(parser, cs); + + es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs))); +} + + +void +parse_revision(basic_io::parser & parser, + revision_set & rev) +{ + rev.edges.clear(); + std::string tmp; + parser.esym(syms::new_manifest); + parser.hex(tmp); + rev.new_manifest = manifest_id(tmp); + while (parser.symp(syms::old_revision)) + parse_edge(parser, rev.edges); +} + +void +read_revision_set(data const & dat, + revision_set & rev) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "revision"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_revision(pars, rev); + I(src.lookahead == EOF); +} + +void +read_revision_set(revision_data const & dat, + revision_set & rev) +{ + data unpacked; + unpack(dat.inner(), unpacked); + read_revision_set(unpacked, rev); +} + +void +write_revision_set(revision_set const & rev, + data & dat) +{ + std::ostringstream oss; + basic_io::printer pr(oss); + print_revision(pr, rev); + dat = data(oss.str()); +} + +void +write_revision_set(revision_set const & rev, + revision_data & dat) +{ + data d; + write_revision_set(rev, d); + base64< gzip > packed; + pack(d, packed); + dat = revision_data(packed); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void +revision_test() +{ +} + +void +add_revision_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&revision_test)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tests/test_a_merge_2/left 20aea360d9048851ce84102050b4c0d5f05ea589 +++ tests/test_a_merge_2/left 20aea360d9048851ce84102050b4c0d5f05ea589 @@ -0,0 +1,707 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "revision.hh" +#include "sanity.hh" +#include "transforms.hh" +#include "vocab.hh" + + +// calculating least common ancestors is a delicate thing. +// +// it turns out that we cannot choose the simple "least common ancestor" +// for purposes of a merge, because it is possible that there are two +// equally reachable common ancestors, and this produces ambiguity in the +// merge. the result -- in a pathological case -- is silently accepting one +// set of edits while discarding another; not exactly what you want a +// version control tool to do. +// +// a conservative approximation is what we'll call a "subgraph recurring" +// LCA algorithm. this is somewhat like locating the least common dominator +// node, but not quite. it is actually just a vanilla LCA search, except +// that any time there's a fork (a historical merge looks like a fork from +// our perspective, working backwards from children to parents) it reduces +// the fork to a common parent via a sequence of pairwise recursive calls +// to itself before proceeding. this will always resolve to a common parent +// with no ambiguity, unless it falls off the root of the graph. +// +// unfortunately the subgraph recurring algorithm sometimes goes too far +// back in history -- for example if there is an unambiguous propagate from +// one branch to another, the entire subgraph preceeding the propagate on +// the recipient branch is elided, since it is a merge. +// +// our current hypothesis is that the *exact* condition we're looking for, +// when doing a merge, is the least node which dominates one side of the +// merge and is an ancestor of the other. + +typedef unsigned long ctx; +typedef boost::dynamic_bitset<> bitmap; +typedef boost::shared_ptr shared_bitmap; + +static void +ensure_parents_loaded(ctx child, + std::map & parents, + interner & intern, + app_state & app) +{ + if (parents.find(child) != parents.end()) + return; + + L(F("loading parents for node %d\n") % child); + + std::set imm_parents; + app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents); + + // The null revision is not a parent for purposes of finding common + // ancestors. + for (std::set::iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + if (null_id(*p)) + imm_parents.erase(p); + } + + shared_bitmap bits = shared_bitmap(new bitmap(parents.size())); + + for (std::set::const_iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + ctx pn = intern.intern(p->inner()()); + L(F("parent %s -> node %d\n") % *p % pn); + if (pn >= bits->size()) + bits->resize(pn+1); + bits->set(pn); + } + + parents.insert(std::make_pair(child, bits)); +} + +static bool +expand_dominators(std::map & parents, + std::map & dominators, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(dominators.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = dominators.begin(); + e != dominators.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the dominator entries we can + for (std::vector::const_iterator n = nodes.begin(); + n != nodes.end(); ++n) + { + shared_bitmap bits = dominators[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + + bitmap intersection(bits->size()); + + bool first = true; + for (unsigned long parent = 0; + parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (dominators.find(parent) == dominators.end()) + dominators.insert(std::make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = dominators[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + if (first) + { + intersection = (*pbits); + first = false; + } + else + intersection &= (*pbits); + } + + (*bits) |= intersection; + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + + +static bool +expand_ancestors(std::map & parents, + std::map & ancestors, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(ancestors.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = ancestors.begin(); + e != ancestors.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the ancestor entries we can + for (std::vector::const_iterator n = nodes.begin(); n != nodes.end(); ++n) + { + shared_bitmap bits = ancestors[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + for (ctx parent = 0; parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (bits->size() <= parent) + bits->resize(parent + 1); + bits->set(parent); + + if (ancestors.find(parent) == ancestors.end()) + ancestors.insert(make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = ancestors[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + (*bits) |= (*pbits); + } + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + +static bool +find_intersecting_node(bitmap & fst, + bitmap & snd, + interner const & intern, + revision_id & anc) +{ + + if (fst.size() > snd.size()) + snd.resize(fst.size()); + else if (snd.size() > fst.size()) + fst.resize(snd.size()); + + bitmap intersection = fst & snd; + if (intersection.any()) + { + L(F("found %d intersecting nodes\n") % intersection.count()); + for (ctx i = 0; i < intersection.size(); ++i) + { + if (intersection.test(i)) + { + anc = revision_id(intern.lookup(i)); + return true; + } + } + } + return false; +} + +// static void +// dump_bitset_map(string const & hdr, +// std::map< ctx, shared_bitmap > const & mm) +// { +// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size()); +// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin(); +// i != mm.end(); ++i) +// { +// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second))); +// } +// } + +bool +find_common_ancestor(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors, dominators; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + shared_bitmap ldom = shared_bitmap(new bitmap()); + shared_bitmap rdom = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + dominators.insert(make_pair(ln, ldom)); + dominators.insert(make_pair(rn, rdom)); + + L(F("searching for common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app) || + expand_dominators(parents, dominators, intern, app)) + { + L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") % + parents.size() % ancestors.size() % dominators.size()); + + if (find_intersecting_node(*lanc, *rdom, intern, anc)) + { + L(F("found node %d, ancestor of left %s and dominating right %s\n") + % anc % left % right); + return true; + } + + else if (find_intersecting_node(*ranc, *ldom, intern, anc)) + { + L(F("found node %d, ancestor of right %s and dominating left %s\n") + % anc % right % left); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("dominators", dominators); +// dump_bitset_map("parents", parents); + return false; +} + + +// +// The idea with this algorithm is to walk from child up to ancestor, +// recursively, accumulating all the change_sets associated with +// intermediate nodes into *one big change_set*. +// +// clever readers will realize this is an overlapping-subproblem type +// situation and thus needs to keep a dynamic programming map to keep +// itself in linear complexity. +// +// in fact, we keep two: one which maps to computed results (partial_csets) +// and one which just keeps a set of all nodes we traversed +// (visited_nodes). in theory it could be one map with an extra bool stuck +// on each entry, but I think that would make it even less readable. it's +// already quite ugly. +// + +static bool +calculate_change_sets_recursive(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & cumulative_cset, + std::map > & partial_csets, + std::set & visited_nodes) +{ + + if (ancestor == child) + return true; + + visited_nodes.insert(child); + + bool relevant_child = false; + + revision_set rev; + app.db.get_revision(child, rev); + + L(F("exploring changesets from parents of %s, seeking towards %s\n") + % child % ancestor); + + for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i) + { + bool relevant_parent = false; + revision_id curr_parent = edge_old_revision(i); + + if (curr_parent.inner()().empty()) + continue; + + change_set cset_to_curr_parent; + + L(F("considering parent %s of %s\n") % curr_parent % child); + + std::map >::const_iterator j = + partial_csets.find(curr_parent); + if (j != partial_csets.end()) + { + // a recursive call has traversed this parent before and built an + // existing cset. just reuse that rather than re-traversing + cset_to_curr_parent = *(j->second); + relevant_parent = true; + } + else if (visited_nodes.find(curr_parent) != visited_nodes.end()) + { + // a recursive call has traversed this parent, but there was no + // path from it to the root, so the parent is irrelevant. skip. + relevant_parent = false; + } + else + relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app, + cset_to_curr_parent, + partial_csets, + visited_nodes); + + if (relevant_parent) + { + L(F("revision %s is relevant, composing with edge to %s\n") + % curr_parent % child); + concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset); + relevant_child = true; + break; + } + else + L(F("parent %s of %s is not relevant\n") % curr_parent % child); + } + + // store the partial edge from ancestor -> child, so that if anyone + // re-traverses this edge they'll just fetch from the partial_edges + // cache. + if (relevant_child) + partial_csets.insert(std::make_pair(child, + boost::shared_ptr + (new change_set(cumulative_cset)))); + + return relevant_child; +} + +void +calculate_composite_change_set(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & composed) +{ + L(F("calculating composite changeset between %s and %s\n") + % ancestor % child); + std::set visited; + std::map > partial; + calculate_change_sets_recursive(ancestor, child, app, composed, partial, visited); +} + +// migration stuff +// +// FIXME: these are temporary functions, once we've done the migration to +// revisions / changesets, we can remove them. + +static void +analyze_manifest_changes(app_state & app, + manifest_id const & parent, + manifest_id const & child, + change_set & cs) +{ + manifest_map m_parent, m_child; + app.db.get_manifest(parent, m_parent); + app.db.get_manifest(child, m_child); + + for (manifest_map::const_iterator i = m_parent.begin(); + i != m_parent.end(); ++i) + { + manifest_map::const_iterator j = m_child.find(manifest_entry_path(i)); + if (j == m_child.end()) + cs.delete_file(manifest_entry_path(i)); + else if (! (manifest_entry_id(i) == manifest_entry_id(j))) + { + cs.apply_delta(manifest_entry_path(i), + manifest_entry_id(i), + manifest_entry_id(j)); + } + } + for (manifest_map::const_iterator i = m_child.begin(); + i != m_child.end(); ++i) + { + manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i)); + if (j == m_parent.end()) + cs.add_file(manifest_entry_path(i), + manifest_entry_id(i)); + } +} + +static revision_id +construct_revisions(app_state & app, + manifest_id const & child, + std::multimap< manifest_id, manifest_id > const & ancestry, + std::map & mapped) +{ + revision_set rev; + typedef std::multimap< manifest_id, manifest_id >::const_iterator ci; + std::pair range = ancestry.equal_range(child); + for (ci i = range.first; i != range.second; ++i) + { + manifest_id parent(i->second); + revision_id parent_rid; + std::map::const_iterator j = mapped.find(parent); + + if (j != mapped.end()) + parent_rid = j->second; + else + { + parent_rid = construct_revisions(app, parent, ancestry, mapped); + P(F("inserting mapping %d : %s -> %s\n") % mapped.size() % parent % parent_rid);; + mapped.insert(std::make_pair(parent, parent_rid)); + } + + change_set cs; + analyze_manifest_changes(app, parent, child, cs); + rev.edges.insert(std::make_pair(parent_rid, + std::make_pair(parent, cs))); + } + + revision_id rid; + if (rev.edges.empty()) + { + P(F("ignoring empty revision for manifest %s\n") % child); + return rid; + } + + rev.new_manifest = child; + calculate_ident(rev, rid); + + if (!app.db.revision_exists (rid)) + { + P(F("mapping manifest %s to revision %s\n") % child % rid); + app.db.put_revision(rid, rev); + } + else + { + P(F("skipping additional path to revision %s\n") % rid); + } + + // now hoist all the interesting certs up to the revision + std::set cnames; + cnames.insert(cert_name(branch_cert_name)); + cnames.insert(cert_name(date_cert_name)); + cnames.insert(cert_name(author_cert_name)); + cnames.insert(cert_name(tag_cert_name)); + cnames.insert(cert_name(changelog_cert_name)); + cnames.insert(cert_name(comment_cert_name)); + cnames.insert(cert_name(testresult_cert_name)); + + std::vector< manifest > tmp; + app.db.get_manifest_certs(child, tmp); + erase_bogus_certs(tmp, app); + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + if (cnames.find(i->inner().name) == cnames.end()) + continue; + cert new_cert; + cert_value tv; + decode_base64(i->inner().value, tv); + make_simple_cert(rid.inner(), i->inner().name, tv, app, new_cert); + if (! app.db.revision_cert_exists(revision(new_cert))) + app.db.put_revision_cert(revision(new_cert)); + } + return rid; +} + + +void +build_changesets(app_state & app) +{ + std::vector< manifest > tmp; + app.db.get_manifest_certs(cert_name("ancestor"), tmp); + erase_bogus_certs(tmp, app); + + std::multimap< manifest_id, manifest_id > ancestry; + std::set heads; + std::set total; + + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + cert_value tv; + decode_base64(i->inner().value, tv); + manifest_id child, parent; + child = i->inner().ident; + parent = hexenc(tv()); + heads.insert(child); + heads.erase(parent); + total.insert(child); + total.insert(parent); + ancestry.insert(std::make_pair(child, parent)); + } + + P(F("found a total of %d manifests\n") % total.size()); + + transaction_guard guard(app.db); + std::map mapped; + for (std::set::const_iterator i = heads.begin(); + i != heads.end(); ++i) + { + construct_revisions(app, *i, ancestry, mapped); + } + guard.commit(); +} + + +// i/o stuff + +std::string revision_file_name("revision"); + +namespace +{ + namespace syms + { + std::string const old_revision("old_revision"); + std::string const new_manifest("new_manifest"); + std::string const old_manifest("old_manifest"); + } +} + + +void +print_edge(basic_io::printer & printer, + edge_entry const & e) +{ + basic_io::stanza st; + st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()()); + st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()()); + printer.print_stanza(st); + print_change_set(printer, edge_changes(e)); +} + + +void +print_revision(basic_io::printer & printer, + revision_set const & rev) +{ + basic_io::stanza st; + st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()()); + printer.print_stanza(st); + for (edge_map::const_iterator edge = rev.edges.begin(); + edge != rev.edges.end(); ++edge) + print_edge(printer, *edge); +} + + +void +parse_edge(basic_io::parser & parser, + edge_map & es) +{ + change_set cs; + manifest_id old_man; + revision_id old_rev; + std::string tmp; + + parser.esym(syms::old_revision); + parser.hex(tmp); + old_rev = revision_id(tmp); + + parser.esym(syms::old_manifest); + parser.hex(tmp); + old_man = manifest_id(tmp); + + parse_change_set(parser, cs); + + es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs))); +} + + +void +parse_revision(basic_io::parser & parser, + revision_set & rev) +{ + rev.edges.clear(); + std::string tmp; + parser.esym(syms::new_manifest); + parser.hex(tmp); + rev.new_manifest = manifest_id(tmp); + while (parser.symp(syms::old_revision)) + parse_edge(parser, rev.edges); +} + +void +read_revision_set(data const & dat, + revision_set & rev) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "revision"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_revision(pars, rev); + I(src.lookahead == EOF); +} + +void +read_revision_set(revision_data const & dat, + revision_set & rev) +{ + data unpacked; + unpack(dat.inner(), unpacked); + read_revision_set(unpacked, rev); +} + +void +write_revision_set(revision_set const & rev, + data & dat) +{ + std::ostringstream oss; + basic_io::printer pr(oss); + print_revision(pr, rev); + dat = data(oss.str()); +} + +void +write_revision_set(revision_set const & rev, + revision_data & dat) +{ + data d; + write_revision_set(rev, d); + base64< gzip > packed; + pack(d, packed); + dat = revision_data(packed); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void +revision_test() +{ +} + +void +add_revision_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&revision_test)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tests/test_a_merge_2/parent fec273dc32b623917fe772d21278826fbd9f1368 +++ tests/test_a_merge_2/parent fec273dc32b623917fe772d21278826fbd9f1368 @@ -0,0 +1,706 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "revision.hh" +#include "sanity.hh" +#include "transforms.hh" +#include "vocab.hh" + + +// calculating least common ancestors is a delicate thing. +// +// it turns out that we cannot choose the simple "least common ancestor" +// for purposes of a merge, because it is possible that there are two +// equally reachable common ancestors, and this produces ambiguity in the +// merge. the result -- in a pathological case -- is silently accepting one +// set of edits while discarding another; not exactly what you want a +// version control tool to do. +// +// a conservative approximation is what we'll call a "subgraph recurring" +// LCA algorithm. this is somewhat like locating the least common dominator +// node, but not quite. it is actually just a vanilla LCA search, except +// that any time there's a fork (a historical merge looks like a fork from +// our perspective, working backwards from children to parents) it reduces +// the fork to a common parent via a sequence of pairwise recursive calls +// to itself before proceeding. this will always resolve to a common parent +// with no ambiguity, unless it falls off the root of the graph. +// +// unfortunately the subgraph recurring algorithm sometimes goes too far +// back in history -- for example if there is an unambiguous propagate from +// one branch to another, the entire subgraph preceeding the propagate on +// the recipient branch is elided, since it is a merge. +// +// our current hypothesis is that the *exact* condition we're looking for, +// when doing a merge, is the least node which dominates one side of the +// merge and is an ancestor of the other. + +typedef unsigned long ctx; +typedef boost::dynamic_bitset<> bitmap; +typedef boost::shared_ptr shared_bitmap; + +static void +ensure_parents_loaded(ctx child, + std::map & parents, + interner & intern, + app_state & app) +{ + if (parents.find(child) != parents.end()) + return; + + L(F("loading parents for node %d\n") % child); + + std::set imm_parents; + app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents); + + // The null revision is not a parent for purposes of finding common + // ancestors. + for (std::set::iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + if (null_id(*p)) + imm_parents.erase(p); + } + + shared_bitmap bits = shared_bitmap(new bitmap(parents.size())); + + for (std::set::const_iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + ctx pn = intern.intern(p->inner()()); + L(F("parent %s -> node %d\n") % *p % pn); + if (pn >= bits->size()) + bits->resize(pn+1); + bits->set(pn); + } + + parents.insert(std::make_pair(child, bits)); +} + +static bool +expand_dominators(std::map & parents, + std::map & dominators, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(dominators.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = dominators.begin(); + e != dominators.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the dominator entries we can + for (std::vector::const_iterator n = nodes.begin(); + n != nodes.end(); ++n) + { + shared_bitmap bits = dominators[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + + bitmap intersection(bits->size()); + + bool first = true; + for (unsigned long parent = 0; + parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (dominators.find(parent) == dominators.end()) + dominators.insert(std::make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = dominators[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + if (first) + { + intersection = (*pbits); + first = false; + } + else + intersection &= (*pbits); + } + + (*bits) |= intersection; + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + + +static bool +expand_ancestors(std::map & parents, + std::map & ancestors, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(ancestors.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = ancestors.begin(); + e != ancestors.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the ancestor entries we can + for (std::vector::const_iterator n = nodes.begin(); n != nodes.end(); ++n) + { + shared_bitmap bits = ancestors[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + for (ctx parent = 0; parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (bits->size() <= parent) + bits->resize(parent + 1); + bits->set(parent); + + if (ancestors.find(parent) == ancestors.end()) + ancestors.insert(make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = ancestors[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + (*bits) |= (*pbits); + } + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + +static bool +find_intersecting_node(bitmap & fst, + bitmap & snd, + interner const & intern, + revision_id & anc) +{ + + if (fst.size() > snd.size()) + snd.resize(fst.size()); + else if (snd.size() > fst.size()) + fst.resize(snd.size()); + + bitmap intersection = fst & snd; + if (intersection.any()) + { + L(F("found %d intersecting nodes\n") % intersection.count()); + for (ctx i = 0; i < intersection.size(); ++i) + { + if (intersection.test(i)) + { + anc = revision_id(intern.lookup(i)); + return true; + } + } + } + return false; +} + +// static void +// dump_bitset_map(string const & hdr, +// std::map< ctx, shared_bitmap > const & mm) +// { +// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size()); +// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin(); +// i != mm.end(); ++i) +// { +// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second))); +// } +// } + +bool +find_common_ancestor(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors, dominators; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + shared_bitmap ldom = shared_bitmap(new bitmap()); + shared_bitmap rdom = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + dominators.insert(make_pair(ln, ldom)); + dominators.insert(make_pair(rn, rdom)); + + L(F("searching for common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app) || + expand_dominators(parents, dominators, intern, app)) + { + L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") % + parents.size() % ancestors.size() % dominators.size()); + + if (find_intersecting_node(*lanc, *rdom, intern, anc)) + { + L(F("found node %d, ancestor of left %s and dominating right %s\n") + % anc % left % right); + return true; + } + + else if (find_intersecting_node(*ranc, *ldom, intern, anc)) + { + L(F("found node %d, ancestor of right %s and dominating left %s\n") + % anc % right % left); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("dominators", dominators); +// dump_bitset_map("parents", parents); + return false; +} + + +// +// The idea with this algorithm is to walk from child up to ancestor, +// recursively, accumulating all the change_sets associated with +// intermediate nodes into *one big change_set*. +// +// clever readers will realize this is an overlapping-subproblem type +// situation and thus needs to keep a dynamic programming map to keep +// itself in linear complexity. +// +// in fact, we keep two: one which maps to computed results (partial_csets) +// and one which just keeps a set of all nodes we traversed +// (visited_nodes). in theory it could be one map with an extra bool stuck +// on each entry, but I think that would make it even less readable. it's +// already quite ugly. +// + +static bool +calculate_change_sets_recursive(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & cumulative_cset, + std::map > & partial_csets, + std::set & visited_nodes) +{ + + if (ancestor == child) + return true; + + visited_nodes.insert(child); + + bool relevant_child = false; + + revision_set rev; + app.db.get_revision(child, rev); + + L(F("exploring changesets from parents of %s, seeking towards %s\n") + % child % ancestor); + + for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i) + { + bool relevant_parent = false; + revision_id curr_parent = edge_old_revision(i); + + if (curr_parent.inner()().empty()) + continue; + + change_set cset_to_curr_parent; + + L(F("considering parent %s of %s\n") % curr_parent % child); + + std::map >::const_iterator j = + partial_csets.find(curr_parent); + if (j != partial_csets.end()) + { + // a recursive call has traversed this parent before and built an + // existing cset. just reuse that rather than re-traversing + cset_to_curr_parent = *(j->second); + relevant_parent = true; + } + else if (visited_nodes.find(curr_parent) != visited_nodes.end()) + { + // a recursive call has traversed this parent, but there was no + // path from it to the root, so the parent is irrelevant. skip. + relevant_parent = false; + } + else + relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app, + cset_to_curr_parent, + partial_csets, + visited_nodes); + + if (relevant_parent) + { + L(F("revision %s is relevant, composing with edge to %s\n") + % curr_parent % child); + concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset); + relevant_child = true; + break; + } + else + L(F("parent %s of %s is not relevant\n") % curr_parent % child); + } + + // store the partial edge from ancestor -> child, so that if anyone + // re-traverses this edge they'll just fetch from the partial_edges + // cache. + if (relevant_child) + partial_csets.insert(std::make_pair(child, + boost::shared_ptr + (new change_set(cumulative_cset)))); + + return relevant_child; +} + +void +calculate_composite_change_set(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & composed) +{ + L(F("calculating composite changeset between %s and %s\n") + % ancestor % child); + std::set visited; + std::map > partial; + calculate_change_sets_recursive(ancestor, child, app, composed, partial, visited); +} + +// migration stuff +// +// FIXME: these are temporary functions, once we've done the migration to +// revisions / changesets, we can remove them. + +static void +analyze_manifest_changes(app_state & app, + manifest_id const & parent, + manifest_id const & child, + change_set & cs) +{ + manifest_map m_parent, m_child; + app.db.get_manifest(parent, m_parent); + app.db.get_manifest(child, m_child); + + for (manifest_map::const_iterator i = m_parent.begin(); + i != m_parent.end(); ++i) + { + manifest_map::const_iterator j = m_child.find(manifest_entry_path(i)); + if (j == m_child.end()) + cs.delete_file(manifest_entry_path(i)); + else if (! (manifest_entry_id(i) == manifest_entry_id(j))) + { + cs.apply_delta(manifest_entry_path(i), + manifest_entry_id(i), + manifest_entry_id(j)); + } + } + for (manifest_map::const_iterator i = m_child.begin(); + i != m_child.end(); ++i) + { + manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i)); + if (j == m_parent.end()) + cs.add_file(manifest_entry_path(i), + manifest_entry_id(i)); + } +} + +static revision_id +construct_revisions(app_state & app, + manifest_id const & child, + std::multimap< manifest_id, manifest_id > const & ancestry, + std::map & mapped) +{ + revision_set rev; + typedef std::multimap< manifest_id, manifest_id >::const_iterator ci; + std::pair range = ancestry.equal_range(child); + for (ci i = range.first; i != range.second; ++i) + { + manifest_id parent(i->second); + revision_id parent_rid; + std::map::const_iterator j = mapped.find(parent); + + if (j != mapped.end()) + parent_rid = j->second; + else + { + parent_rid = construct_revisions(app, parent, ancestry, mapped); + P(F("inserting mapping %d : %s -> %s\n") % mapped.size() % parent % parent_rid);; + mapped.insert(std::make_pair(parent, parent_rid)); + } + + change_set cs; + analyze_manifest_changes(app, parent, child, cs); + rev.edges.insert(std::make_pair(parent_rid, + std::make_pair(parent, cs))); + } + + revision_id rid; + if (rev.edges.empty()) + { + P(F("ignoring empty revision for manifest %s\n") % child); + return rid; + } + + rev.new_manifest = child; + calculate_ident(rev, rid); + + if (!app.db.revision_exists (rid)) + { + P(F("mapping manifest %s to revision %s\n") % child % rid); + app.db.put_revision(rid, rev); + } + else + { + P(F("skipping additional path to revision %s\n") % rid); + } + + // now hoist all the interesting certs up to the revision + std::set cnames; + cnames.insert(cert_name(branch_cert_name)); + cnames.insert(cert_name(date_cert_name)); + cnames.insert(cert_name(author_cert_name)); + cnames.insert(cert_name(tag_cert_name)); + cnames.insert(cert_name(changelog_cert_name)); + cnames.insert(cert_name(comment_cert_name)); + cnames.insert(cert_name(testresult_cert_name)); + + std::vector< manifest > tmp; + app.db.get_manifest_certs(child, tmp); + erase_bogus_certs(tmp, app); + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + if (cnames.find(i->inner().name) == cnames.end()) + continue; + cert new_cert; + cert_value tv; + decode_base64(i->inner().value, tv); + make_simple_cert(rid.inner(), i->inner().name, tv, app, new_cert); + if (! app.db.revision_cert_exists(revision(new_cert))) + app.db.put_revision_cert(revision(new_cert)); + } + return rid; +} + + +void +build_changesets(app_state & app) +{ + std::vector< manifest > tmp; + app.db.get_manifest_certs(cert_name("ancestor"), tmp); + erase_bogus_certs(tmp, app); + + std::multimap< manifest_id, manifest_id > ancestry; + std::set heads; + std::set total; + + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + cert_value tv; + decode_base64(i->inner().value, tv); + manifest_id child, parent; + child = i->inner().ident; + parent = hexenc(tv()); + heads.insert(child); + heads.erase(parent); + total.insert(child); + total.insert(parent); + ancestry.insert(std::make_pair(child, parent)); + } + + P(F("found a total of %d manifests\n") % total.size()); + + transaction_guard guard(app.db); + std::map mapped; + for (std::set::const_iterator i = heads.begin(); + i != heads.end(); ++i) + { + construct_revisions(app, *i, ancestry, mapped); + } + guard.commit(); +} + + +// i/o stuff + +std::string revision_file_name("revision"); + +namespace +{ + namespace syms + { + std::string const old_revision("old_revision"); + std::string const new_manifest("new_manifest"); + std::string const old_manifest("old_manifest"); + } +} + + +void +print_edge(basic_io::printer & printer, + edge_entry const & e) +{ + basic_io::stanza st; + st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()()); + st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()()); + printer.print_stanza(st); + print_change_set(printer, edge_changes(e)); +} + + +void +print_revision(basic_io::printer & printer, + revision_set const & rev) +{ + basic_io::stanza st; + st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()()); + printer.print_stanza(st); + for (edge_map::const_iterator edge = rev.edges.begin(); + edge != rev.edges.end(); ++edge) + print_edge(printer, *edge); +} + + +void +parse_edge(basic_io::parser & parser, + edge_map & es) +{ + change_set cs; + manifest_id old_man; + revision_id old_rev; + std::string tmp; + + parser.esym(syms::old_revision); + parser.hex(tmp); + old_rev = revision_id(tmp); + + parser.esym(syms::old_manifest); + parser.hex(tmp); + old_man = manifest_id(tmp); + + parse_change_set(parser, cs); + + es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs))); +} + + +void +parse_revision(basic_io::parser & parser, + revision_set & rev) +{ + rev.edges.clear(); + std::string tmp; + parser.esym(syms::new_manifest); + parser.hex(tmp); + rev.new_manifest = manifest_id(tmp); + while (parser.symp(syms::old_revision)) + parse_edge(parser, rev.edges); +} + +void +read_revision_set(data const & dat, + revision_set & rev) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "revision"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_revision(pars, rev); +} + +void +read_revision_set(revision_data const & dat, + revision_set & rev) +{ + data unpacked; + unpack(dat.inner(), unpacked); + read_revision_set(unpacked, rev); +} + +void +write_revision_set(revision_set const & rev, + data & dat) +{ + std::ostringstream oss; + basic_io::printer pr(oss); + print_revision(pr, rev); + dat = data(oss.str()); +} + +void +write_revision_set(revision_set const & rev, + revision_data & dat) +{ + data d; + write_revision_set(rev, d); + base64< gzip > packed; + pack(d, packed); + dat = revision_data(packed); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void +revision_test() +{ +} + +void +add_revision_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&revision_test)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tests/test_a_merge_2/right 6100e250e2a4b26af3bd65df1b86278a264f2deb +++ tests/test_a_merge_2/right 6100e250e2a4b26af3bd65df1b86278a264f2deb @@ -0,0 +1,745 @@ +// copyright (C) 2004 graydon hoare +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "basic_io.hh" +#include "change_set.hh" +#include "constants.hh" +#include "revision.hh" +#include "sanity.hh" +#include "transforms.hh" +#include "vocab.hh" + + +// calculating least common ancestors is a delicate thing. +// +// it turns out that we cannot choose the simple "least common ancestor" +// for purposes of a merge, because it is possible that there are two +// equally reachable common ancestors, and this produces ambiguity in the +// merge. the result -- in a pathological case -- is silently accepting one +// set of edits while discarding another; not exactly what you want a +// version control tool to do. +// +// a conservative approximation is what we'll call a "subgraph recurring" +// LCA algorithm. this is somewhat like locating the least common dominator +// node, but not quite. it is actually just a vanilla LCA search, except +// that any time there's a fork (a historical merge looks like a fork from +// our perspective, working backwards from children to parents) it reduces +// the fork to a common parent via a sequence of pairwise recursive calls +// to itself before proceeding. this will always resolve to a common parent +// with no ambiguity, unless it falls off the root of the graph. +// +// unfortunately the subgraph recurring algorithm sometimes goes too far +// back in history -- for example if there is an unambiguous propagate from +// one branch to another, the entire subgraph preceeding the propagate on +// the recipient branch is elided, since it is a merge. +// +// our current hypothesis is that the *exact* condition we're looking for, +// when doing a merge, is the least node which dominates one side of the +// merge and is an ancestor of the other. + +typedef unsigned long ctx; +typedef boost::dynamic_bitset<> bitmap; +typedef boost::shared_ptr shared_bitmap; + +static void +ensure_parents_loaded(ctx child, + std::map & parents, + interner & intern, + app_state & app) +{ + if (parents.find(child) != parents.end()) + return; + + L(F("loading parents for node %d\n") % child); + + std::set imm_parents; + app.db.get_revision_parents(revision_id(intern.lookup(child)), imm_parents); + + // The null revision is not a parent for purposes of finding common + // ancestors. + for (std::set::iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + if (null_id(*p)) + imm_parents.erase(p); + } + + shared_bitmap bits = shared_bitmap(new bitmap(parents.size())); + + for (std::set::const_iterator p = imm_parents.begin(); + p != imm_parents.end(); ++p) + { + ctx pn = intern.intern(p->inner()()); + L(F("parent %s -> node %d\n") % *p % pn); + if (pn >= bits->size()) + bits->resize(pn+1); + bits->set(pn); + } + + parents.insert(std::make_pair(child, bits)); +} + +static bool +expand_dominators(std::map & parents, + std::map & dominators, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(dominators.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = dominators.begin(); + e != dominators.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the dominator entries we can + for (std::vector::const_iterator n = nodes.begin(); + n != nodes.end(); ++n) + { + shared_bitmap bits = dominators[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + + bitmap intersection(bits->size()); + + bool first = true; + for (unsigned long parent = 0; + parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (dominators.find(parent) == dominators.end()) + dominators.insert(std::make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = dominators[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + if (first) + { + intersection = (*pbits); + first = false; + } + else + intersection &= (*pbits); + } + + (*bits) |= intersection; + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + + +static bool +expand_ancestors(std::map & parents, + std::map & ancestors, + interner & intern, + app_state & app) +{ + bool something_changed = false; + std::vector nodes; + + nodes.reserve(ancestors.size()); + + // pass 1, pull out all the node numbers we're going to scan this time around + for (std::map::const_iterator e = ancestors.begin(); + e != ancestors.end(); ++e) + nodes.push_back(e->first); + + // pass 2, update any of the ancestor entries we can + for (std::vector::const_iterator n = nodes.begin(); n != nodes.end(); ++n) + { + shared_bitmap bits = ancestors[*n]; + bitmap saved(*bits); + if (bits->size() <= *n) + bits->resize(*n + 1); + bits->set(*n); + + ensure_parents_loaded(*n, parents, intern, app); + shared_bitmap n_parents = parents[*n]; + for (ctx parent = 0; parent != n_parents->size(); ++parent) + { + if (! n_parents->test(parent)) + continue; + + if (bits->size() <= parent) + bits->resize(parent + 1); + bits->set(parent); + + if (ancestors.find(parent) == ancestors.end()) + ancestors.insert(make_pair(parent, + shared_bitmap(new bitmap()))); + shared_bitmap pbits = ancestors[parent]; + + if (bits->size() > pbits->size()) + pbits->resize(bits->size()); + + if (pbits->size() > bits->size()) + bits->resize(pbits->size()); + + (*bits) |= (*pbits); + } + if (*bits != saved) + something_changed = true; + } + return something_changed; +} + +static bool +find_intersecting_node(bitmap & fst, + bitmap & snd, + interner const & intern, + revision_id & anc) +{ + + if (fst.size() > snd.size()) + snd.resize(fst.size()); + else if (snd.size() > fst.size()) + fst.resize(snd.size()); + + bitmap intersection = fst & snd; + if (intersection.any()) + { + L(F("found %d intersecting nodes\n") % intersection.count()); + for (ctx i = 0; i < intersection.size(); ++i) + { + if (intersection.test(i)) + { + anc = revision_id(intern.lookup(i)); + return true; + } + } + } + return false; +} + +// static void +// dump_bitset_map(std::string const & hdr, +// std::map< ctx, shared_bitmap > const & mm) +// { +// L(F("dumping [%s] (%d entries)\n") % hdr % mm.size()); +// for (std::map< ctx, shared_bitmap >::const_iterator i = mm.begin(); +// i != mm.end(); ++i) +// { +// L(F("dump [%s]: %d -> %s\n") % hdr % i->first % (*(i->second))); +// } +// } + +bool +find_common_ancestor_for_merge(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors, dominators; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + shared_bitmap ldom = shared_bitmap(new bitmap()); + shared_bitmap rdom = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + dominators.insert(make_pair(ln, ldom)); + dominators.insert(make_pair(rn, rdom)); + + L(F("searching for common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app) || + expand_dominators(parents, dominators, intern, app)) + { + L(F("common ancestor scan [par=%d,anc=%d,dom=%d]\n") % + parents.size() % ancestors.size() % dominators.size()); + + if (find_intersecting_node(*lanc, *rdom, intern, anc)) + { + L(F("found node %d, ancestor of left %s and dominating right %s\n") + % anc % left % right); + return true; + } + + else if (find_intersecting_node(*ranc, *ldom, intern, anc)) + { + L(F("found node %d, ancestor of right %s and dominating left %s\n") + % anc % right % left); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("dominators", dominators); +// dump_bitset_map("parents", parents); + return false; +} + + +bool +find_least_common_ancestor(revision_id const & left, + revision_id const & right, + revision_id & anc, + app_state & app) +{ + interner intern; + std::map< ctx, shared_bitmap > + parents, ancestors, dominators; + + ctx ln = intern.intern(left.inner()()); + ctx rn = intern.intern(right.inner()()); + + shared_bitmap lanc = shared_bitmap(new bitmap()); + shared_bitmap ranc = shared_bitmap(new bitmap()); + + ancestors.insert(make_pair(ln, lanc)); + ancestors.insert(make_pair(rn, ranc)); + + L(F("searching for least common ancestor, left=%s right=%s\n") % left % right); + + while (expand_ancestors(parents, ancestors, intern, app)) + { + L(F("least common ancestor scan [par=%d,anc=%d]\n") % + parents.size() % ancestors.size()); + + if (find_intersecting_node(*lanc, *ranc, intern, anc)) + { + L(F("found node %d, ancestor of left %s and right %s\n") + % anc % left % right); + return true; + } + } +// dump_bitset_map("ancestors", ancestors); +// dump_bitset_map("parents", parents); + return false; +} + + +// +// The idea with this algorithm is to walk from child up to ancestor, +// recursively, accumulating all the change_sets associated with +// intermediate nodes into *one big change_set*. +// +// clever readers will realize this is an overlapping-subproblem type +// situation and thus needs to keep a dynamic programming map to keep +// itself in linear complexity. +// +// in fact, we keep two: one which maps to computed results (partial_csets) +// and one which just keeps a set of all nodes we traversed +// (visited_nodes). in theory it could be one map with an extra bool stuck +// on each entry, but I think that would make it even less readable. it's +// already quite ugly. +// + +static bool +calculate_change_sets_recursive(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & cumulative_cset, + std::map > & partial_csets, + std::set & visited_nodes) +{ + + if (ancestor == child) + return true; + + visited_nodes.insert(child); + + bool relevant_child = false; + + revision_set rev; + app.db.get_revision(child, rev); + + L(F("exploring changesets from parents of %s, seeking towards %s\n") + % child % ancestor); + + for(edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i) + { + bool relevant_parent = false; + revision_id curr_parent = edge_old_revision(i); + + if (curr_parent.inner()().empty()) + continue; + + change_set cset_to_curr_parent; + + L(F("considering parent %s of %s\n") % curr_parent % child); + + std::map >::const_iterator j = + partial_csets.find(curr_parent); + if (j != partial_csets.end()) + { + // a recursive call has traversed this parent before and built an + // existing cset. just reuse that rather than re-traversing + cset_to_curr_parent = *(j->second); + relevant_parent = true; + } + else if (visited_nodes.find(curr_parent) != visited_nodes.end()) + { + // a recursive call has traversed this parent, but there was no + // path from it to the root, so the parent is irrelevant. skip. + relevant_parent = false; + } + else + relevant_parent = calculate_change_sets_recursive(ancestor, curr_parent, app, + cset_to_curr_parent, + partial_csets, + visited_nodes); + + if (relevant_parent) + { + L(F("revision %s is relevant, composing with edge to %s\n") + % curr_parent % child); + concatenate_change_sets(cset_to_curr_parent, edge_changes(i), cumulative_cset); + relevant_child = true; + break; + } + else + L(F("parent %s of %s is not relevant\n") % curr_parent % child); + } + + // store the partial edge from ancestor -> child, so that if anyone + // re-traverses this edge they'll just fetch from the partial_edges + // cache. + if (relevant_child) + partial_csets.insert(std::make_pair(child, + boost::shared_ptr + (new change_set(cumulative_cset)))); + + return relevant_child; +} + +void +calculate_composite_change_set(revision_id const & ancestor, + revision_id const & child, + app_state & app, + change_set & composed) +{ + L(F("calculating composite changeset between %s and %s\n") + % ancestor % child); + std::set visited; + std::map > partial; + calculate_change_sets_recursive(ancestor, child, app, composed, partial, visited); +} + +// migration stuff +// +// FIXME: these are temporary functions, once we've done the migration to +// revisions / changesets, we can remove them. + +static void +analyze_manifest_changes(app_state & app, + manifest_id const & parent, + manifest_id const & child, + change_set & cs) +{ + manifest_map m_parent, m_child; + app.db.get_manifest(parent, m_parent); + app.db.get_manifest(child, m_child); + + for (manifest_map::const_iterator i = m_parent.begin(); + i != m_parent.end(); ++i) + { + manifest_map::const_iterator j = m_child.find(manifest_entry_path(i)); + if (j == m_child.end()) + cs.delete_file(manifest_entry_path(i)); + else if (! (manifest_entry_id(i) == manifest_entry_id(j))) + { + cs.apply_delta(manifest_entry_path(i), + manifest_entry_id(i), + manifest_entry_id(j)); + } + } + for (manifest_map::const_iterator i = m_child.begin(); + i != m_child.end(); ++i) + { + manifest_map::const_iterator j = m_parent.find(manifest_entry_path(i)); + if (j == m_parent.end()) + cs.add_file(manifest_entry_path(i), + manifest_entry_id(i)); + } +} + +static revision_id +construct_revisions(app_state & app, + manifest_id const & child, + std::multimap< manifest_id, manifest_id > const & ancestry, + std::map & mapped) +{ + revision_set rev; + typedef std::multimap< manifest_id, manifest_id >::const_iterator ci; + std::pair range = ancestry.equal_range(child); + for (ci i = range.first; i != range.second; ++i) + { + manifest_id parent(i->second); + revision_id parent_rid; + std::map::const_iterator j = mapped.find(parent); + + if (j != mapped.end()) + parent_rid = j->second; + else + { + parent_rid = construct_revisions(app, parent, ancestry, mapped); + P(F("inserting mapping %d : %s -> %s\n") % mapped.size() % parent % parent_rid);; + mapped.insert(std::make_pair(parent, parent_rid)); + } + + change_set cs; + analyze_manifest_changes(app, parent, child, cs); + rev.edges.insert(std::make_pair(parent_rid, + std::make_pair(parent, cs))); + } + + revision_id rid; + if (rev.edges.empty()) + { + P(F("ignoring empty revision for manifest %s\n") % child); + return rid; + } + + rev.new_manifest = child; + calculate_ident(rev, rid); + + if (!app.db.revision_exists (rid)) + { + P(F("mapping manifest %s to revision %s\n") % child % rid); + app.db.put_revision(rid, rev); + } + else + { + P(F("skipping additional path to revision %s\n") % rid); + } + + // now hoist all the interesting certs up to the revision + std::set cnames; + cnames.insert(cert_name(branch_cert_name)); + cnames.insert(cert_name(date_cert_name)); + cnames.insert(cert_name(author_cert_name)); + cnames.insert(cert_name(tag_cert_name)); + cnames.insert(cert_name(changelog_cert_name)); + cnames.insert(cert_name(comment_cert_name)); + cnames.insert(cert_name(testresult_cert_name)); + + std::vector< manifest > tmp; + app.db.get_manifest_certs(child, tmp); + erase_bogus_certs(tmp, app); + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + if (cnames.find(i->inner().name) == cnames.end()) + continue; + cert new_cert; + cert_value tv; + decode_base64(i->inner().value, tv); + make_simple_cert(rid.inner(), i->inner().name, tv, app, new_cert); + if (! app.db.revision_cert_exists(revision(new_cert))) + app.db.put_revision_cert(revision(new_cert)); + } + return rid; +} + + +void +build_changesets(app_state & app) +{ + std::vector< manifest > tmp; + app.db.get_manifest_certs(cert_name("ancestor"), tmp); + erase_bogus_certs(tmp, app); + + std::multimap< manifest_id, manifest_id > ancestry; + std::set heads; + std::set total; + + for (std::vector< manifest >::const_iterator i = tmp.begin(); + i != tmp.end(); ++i) + { + cert_value tv; + decode_base64(i->inner().value, tv); + manifest_id child, parent; + child = i->inner().ident; + parent = hexenc(tv()); + heads.insert(child); + heads.erase(parent); + total.insert(child); + total.insert(parent); + ancestry.insert(std::make_pair(child, parent)); + } + + P(F("found a total of %d manifests\n") % total.size()); + + transaction_guard guard(app.db); + std::map mapped; + for (std::set::const_iterator i = heads.begin(); + i != heads.end(); ++i) + { + construct_revisions(app, *i, ancestry, mapped); + } + guard.commit(); +} + + +// i/o stuff + +std::string revision_file_name("revision"); + +namespace +{ + namespace syms + { + std::string const old_revision("old_revision"); + std::string const new_manifest("new_manifest"); + std::string const old_manifest("old_manifest"); + } +} + + +void +print_edge(basic_io::printer & printer, + edge_entry const & e) +{ + basic_io::stanza st; + st.push_hex_pair(syms::old_revision, edge_old_revision(e).inner()()); + st.push_hex_pair(syms::old_manifest, edge_old_manifest(e).inner()()); + printer.print_stanza(st); + print_change_set(printer, edge_changes(e)); +} + + +void +print_revision(basic_io::printer & printer, + revision_set const & rev) +{ + basic_io::stanza st; + st.push_hex_pair(syms::new_manifest, rev.new_manifest.inner()()); + printer.print_stanza(st); + for (edge_map::const_iterator edge = rev.edges.begin(); + edge != rev.edges.end(); ++edge) + print_edge(printer, *edge); +} + + +void +parse_edge(basic_io::parser & parser, + edge_map & es) +{ + change_set cs; + manifest_id old_man; + revision_id old_rev; + std::string tmp; + + parser.esym(syms::old_revision); + parser.hex(tmp); + old_rev = revision_id(tmp); + + parser.esym(syms::old_manifest); + parser.hex(tmp); + old_man = manifest_id(tmp); + + parse_change_set(parser, cs); + + es.insert(std::make_pair(old_rev, std::make_pair(old_man, cs))); +} + + +void +parse_revision(basic_io::parser & parser, + revision_set & rev) +{ + rev.edges.clear(); + std::string tmp; + parser.esym(syms::new_manifest); + parser.hex(tmp); + rev.new_manifest = manifest_id(tmp); + while (parser.symp(syms::old_revision)) + parse_edge(parser, rev.edges); +} + +void +read_revision_set(data const & dat, + revision_set & rev) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "revision"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_revision(pars, rev); +} + +void +read_revision_set(revision_data const & dat, + revision_set & rev) +{ + data unpacked; + unpack(dat.inner(), unpacked); + read_revision_set(unpacked, rev); +} + +void +write_revision_set(revision_set const & rev, + data & dat) +{ + std::ostringstream oss; + basic_io::printer pr(oss); + print_revision(pr, rev); + dat = data(oss.str()); +} + +void +write_revision_set(revision_set const & rev, + revision_data & dat) +{ + data d; + write_revision_set(rev, d); + base64< gzip > packed; + pack(d, packed); + dat = revision_data(packed); +} + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" + +static void +revision_test() +{ +} + +void +add_revision_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&revision_test)); +} + + +#endif // BUILD_UNIT_TESTS ============================================================ --- tester.lua fd12e0c5032a3d5fe98c1ba1186c580c24822421 +++ tester.lua 69bd2108dd3803934fca93b26a40ba8f46b746e8 @@ -317,7 +317,7 @@ err("Check failed (stdout): not empty", 3) end elseif type(stdout) == "string" then - local realout = io.open("stdout") + local realout = io.open(ident .. "stdout") local contents = realout:read("*a") realout:close() if contents ~= stdout then @@ -333,7 +333,7 @@ err("Check failed (stderr): not empty", 3) end elseif type(stderr) == "string" then - local realerr = io.open("stderr") + local realerr = io.open(ident .. "stderr") local contents = realerr:read("*a") realerr:close() if contents ~= stderr then ============================================================ --- testsuite.at 477a9f90a62d5f5b3861e368988bd194ad681d49 +++ testsuite.at e20997eff0240c5470652d33fb39242490d63040 @@ -658,16 +658,16 @@ m4_include(tests/t_heads.at)# m4_include(tests/t_heads_discontinuous_branch.at)# m4_include(tests/t_merge_1.at)# -m4_include(tests/t_merge_2.at) -m4_include(tests/t_tags.at) -m4_include(tests/t_add_dot.at) -m4_include(tests/t_cleanup_empty_dir.at) -m4_include(tests/t_merge_add_del.at) -m4_include(tests/t_add_edge.at) -m4_include(tests/t_patch_drop_add.at) -m4_include(tests/t_add_drop_add.at) -m4_include(tests/t_merge2_add_drop_add.at) -m4_include(tests/t_add_patch_drop_add.at) +m4_include(tests/t_merge_2.at)# +m4_include(tests/t_tags.at)# +m4_include(tests/t_add_dot.at)# +m4_include(tests/t_cleanup_empty_dir.at)# +m4_include(tests/t_merge_add_del.at)# +m4_include(tests/t_add_edge.at)# +m4_include(tests/t_patch_drop_add.at)# +m4_include(tests/t_add_drop_add.at)# +m4_include(tests/t_merge2_add_drop_add.at)# +m4_include(tests/t_add_patch_drop_add.at)# m4_include(tests/t_patch_vs_drop_add.at) m4_include(tests/t_explicit_merge.at) m4_include(tests/t_ambig_update.at) ============================================================ --- testsuite.lua e3316fe979a171964970a1676312a20da63e399e +++ testsuite.lua c7dc38733d0fc0a03252b55c4fa9583422e83eea @@ -234,3 +234,13 @@ table.insert(tests, "tests/'heads'") table.insert(tests, "tests/'heads'_with_discontinuous_branches") table.insert(tests, "tests/test_a_merge") +table.insert(tests, "tests/test_a_merge_2") +table.insert(tests, "tests/tags_and_tagging_of_revisions") +table.insert(tests, "tests/mtn_add_.") +table.insert(tests, "tests/(minor)_update_cleans_emptied_directories") +table.insert(tests, "tests/merging__with_") +table.insert(tests, "tests/merging_an_add_edge") +table.insert(tests, "tests/merge(<>,_)") +table.insert(tests, "tests/merge(<>,_)") +table.insert(tests, "tests/merge(,_)") +table.insert(tests, "tests/merge(<>,_)")