# # add_file "cset.cc" # # patch "ChangeLog" # from [c5b6af776442ffcda8b25b0b94a9785795da0817] # to [461c25a92640c41f5d01de41ee652f4ba900980b] # # patch "Makefile.am" # from [bedd826d1991853466ab57d77d80be1635014d17] # to [052baab7e5e3fdc87b29094be9d3f8383c6797f1] # # patch "cset.cc" # from [] # to [1fb425ad8e367a70b3cd9668c69bcf37b4d4418e] # # patch "unit_tests.cc" # from [230b4227d08bed323a2625f593073427bca9fe9b] # to [8eb81e81294b5e8ac18540b19826656903a725cf] # # patch "unit_tests.hh" # from [b4b20ac190077a08db547eb1dcd79f40c58b2ebc] # to [0bb9b3b8b1320afaf40c88a254bdd5f5d9546ef4] # --- ChangeLog +++ ChangeLog @@ -1,3 +1,7 @@ +2005-06-19 graydon hoare + + * cset.cc: Rewrite-in-progress on change_set.cc. + 2005-06-14 Richard Levitte * std_hooks.lua (get_preferred_merge2_command, --- Makefile.am +++ Makefile.am @@ -31,6 +31,7 @@ rcs_import.cc rcs_import.hh \ revision.cc revision.hh \ change_set.cc change_set.hh \ + cset.cc \ mt_version.cc mt_version.hh \ automate.cc automate.hh \ database_check.cc database_check.hh \ --- cset.cc +++ cset.cc @@ -0,0 +1,1762 @@ +/* + + an mfest is a collection of nodes, indexed by ident number and + also held in a tree of nodes. the bucket of identified nodes is + possibly larger than the tree of nodes. + + a change_set is a pair of mfests. every entry in each mfest exists in + the other, though some might be detached from the file tree and just + known by ident. + + a change_consumer -- and the serial form of a change_set -- is the + way a human wants to read about some work: organized into a set of + deletes, moves, and adds. + + there is ambiguity in a change_consumer though, regarding the + resolution of names and the simultineity of operations. specifically, + each entry in a change_set is either looked up in the pre state or + post state mfest of a change. a delete is looked up in the pre + state. a rename's source in the pre state. a rename's destination in + the post state. an add is looked up in the post state. + + when playing back a change_set, there is a canonical order in which + entries are emitted: {delete,rename,add}{file,dir}. + + furthermore within each type of entry, the order in which the entries + are emitted is specified: all entries are emitted in lexicographic + order, which happens to induce topological order as well (A is always + emitted before A/B). + + crucially, deletes are ordered by *source space* and adds and renames + are ordered by *destination space*. we replay these by walking the + tree rooted in each mfest. + +*/ + + +/* + + aliases, splits, joins, nursery, graveyard: + + each node has a set of alias nodes. + + if N is a node and A is an alias node of N in the old_manifest of a + cset C, we say that A split from N in C. if A is an alias of N in + the new_manifest of C, we say that A joined N in C. + + if N is not an alias, but actually present in a manifest, we say that N + is *live* in that manifest. + + - if N is live in the old_manifest of C, we say N is live-in in C. + - if N is live in the new_manifest of C, we say N is live-out in C. + + two special nodes exist *always* in every manifest: + + 1. the nursery + 2. the graveyard + + - if A was split from the nursery in C, we say that A was added in C. + - if A was joined to the graveyard in C, we say that A was deleted in C. + + for any nodes M, N in C, we must have a proper lifecycle nesting. that is: + + - if M splits from N in C then N must be live-in in C + - if M joins to N in C, then N must be live-out in C + + this is normally trivially satisfied by splitting and joining from the + nursery and graveyard, respectively. + + splits exist only in order to serve as the inverse of joins. joins + exist for only one purpose: when merging two trees which have + different (non-identified) files occupying the same name, and the + user decides they *want* to identify the files (eg. 2-way merge). in + this case we want to join one file to the other, to give future edits + on the joiner a target to apply to. + + yes? hmm.. we want to avoid this: + --------------------------------- + + bob makes a tree and syncs it with jane. + bob adds file foo + bob syncs with sid + jane adds file foo + jane syncs with bob + jane merges with bob. jane's foo is not the same as bob's foo. jane's foo wins. + sid makes a change to bob's foo. + sid syncs with jane. + sid's change is silently eaten. + + + + a point: every file *does* have a GUID. the (rev,path) pair it was added in. + or at least it ought to. the problem is you can add a file in two (rev,path) + pairs, and then later decide you *wanted* them to behave -- for the sake of + merging -- as though they are the same file. iow, behave like they had a + common ancestor immediately before the states they were both added in. + + so how do joins help here? simply this: when you are working on a merge + between A and B, the first step is to decide which files will be live + in the merge. to do this you take all the files which are live in A and + live in B. then for each file you've decided will be live, you pull out + a subgraph for it, containing all the nodes in which it's alive. then you + add to this subgraph all the nodes *joined* to the live one. that's the + central trick: you will be doing graph contraction on the joiner as well, + so that you do not lose any of its deltas. + + further restriction: foo/bar can only ever join foo/bar, and this can + only happen in a merge in which one of the incoming edges is + providing foo/bar already; it's actually written (unambiguously) in + the joiner's changeset as 'join "foo/bar"' (inverse: 'split + "foo/bar"'). so, scratch previous idea: adds and deletes are still + expressed as being renamed to or from the nursery or graveyard, not + split/joined with them. + + + further: suppose in A->C we have "join foo/bar", where there's a B->C + edge containing a foo/bar to join with. all well and good. then + suppose we append D="rename foo/bar foo/baz" to C. then the B->C->D + edge has "rename foo/bar foo/baz", and the A->C->D edge has ... + "join foo/baz"? that makes no sense. + + we know a join is essentially a "delete with a forwarding address"; + so like a delete it has to take effect *before* anything else in a + cset. which means it is written in the pre-space of the cset. the + problem is that the name it refers to is clearly a name in the + post-space of the delta: the name it's colliding with! + + perhaps "join" is not the right word. perhaps we want something more + like "doom foo/bar; rename foo/bar foo/baz" in the composite cset: an + acknowledgment that the node named by foo/bar at the *beginning* of + the cset is doomed, even if it gets moved around thereafter: the + proviso of a doomed node eixsting in a revision is that there's a + non-doomed node occupying the same node. a doomed node is not yet + dead, but it adheres to some other node until that other node dies. + + subsequently if you doom the node foo/baz (which requires some other + incoming node in a merge, using that name) then the composite cset + is "doom foo/bar; doom foo/baz; rename foo/bar foo/baz" and you have + *two* doomed nodes in the doomed set of the third node. this should + continue to work until such time as the live node carrying the doomed + ones actually dies. + + actually that reads terribly. it would be better to have "delete_foo" + have a secondary clause, "with_heir", where the heir is a name in the + post-space of the cset. this way the heir can get moved around; and + the natural inverse is "add_foo" ... "with_sire", where the sire is + in the pre-space of the cset. that feels more likely to work. + + */ + + +#define __STDC_CONSTANT_MACROS + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "basic_io.hh" +#include "sanity.hh" +#include "vocab.hh" + +using std::deque; +using std::map; +using std::pair; +using std::stack; +using std::string; +using std::vector; +using std::inserter; +using std::copy; +using std::make_pair; +using boost::shared_ptr; +using boost::dynamic_pointer_cast; +using __gnu_cxx::hash_map; +using __gnu_cxx::hash_set; + +struct node; +struct dir_node; +struct file_node; +struct dir_entry; +struct dirent_t_cmp; +struct dirent_hash; +struct dirent_eq; + +typedef uint32_t ident_t; +typedef shared_ptr node_t; +typedef shared_ptr dir_t; +typedef shared_ptr file_t; +typedef shared_ptr dirent_t; +// dirmap *must* be a sorted map, do not change to a hashed map +typedef map dirmap_t; +typedef hash_set dirset_t; +typedef string attr_name; +typedef string attr_val; + +static ident_t +nursery_ident = 0; + +static ident_t +graveyard_ident = 1; + +struct +ident_source +{ + ident_t ctr; + ident_source() : ctr(graveyard_ident + 1) {} + ident_t next() { I(ctr != UINT32_C(0xffffffff)); return ctr++; } +}; + +// directory entries are globally interned components +// of a file_path + +struct +dir_entry +{ + string val; + + explicit dir_entry(string s) + : val(s) + {} + + bool operator<(dir_entry const & other) const + { + return val < other.val; + } + + bool operator==(dir_entry const & other) const + { + return val == other.val; + } +}; + + + +struct +dirent_t_cmp +{ + bool operator()(dirent_t const & a, + dirent_t const & b) const + { + return *a < *b; + } +}; + +struct +dirent_hash +{ + size_t operator()(dirent_t const & x) const + { + return __gnu_cxx::__stl_hash_string(x->val.c_str()); + } +}; + +struct +dirent_eq +{ + bool operator()(dirent_t const & a, + dirent_t const & b) const + { + return *a == *b; + } +}; + +// this helper class represents an "unpacked" view of the path +// through a tree of directory nodes to some specific leaf (either +// dir or file); it is just a faster form of a non-empty file_path + +struct +path_vec_t +{ + static dirset_t dset; + vector dir; + dirent_t leaf; + + path_vec_t() + { + } + + path_vec_t(file_path const & f) + { + fs::path fp(f()); + for (fs::path::iterator i = fp.begin(); + i != fp.end(); ++i) + { + dirent_t tmp(new dir_entry(*i)); + dirset_t::const_iterator j = dset.find(tmp); + if (j == dset.end()) + { + dset.insert(tmp); + } + else + { + tmp = *j; + } + dir.push_back(tmp); + } + I(!dir.empty()); + leaf = dir.back(); + dir.pop_back(); + } + + file_path to_file_path() const + { + fs::path fp; + for (vector::const_iterator i = dir.begin(); + i != dir.end(); ++i) + { + fp /= (*i)->val; + } + fp /= leaf->val; + return file_path(fp.string()); + } + +}; + +// static bucket of shared pointers +dirset_t +path_vec_t::dset; + +struct +node +{ + ident_t ident; + ident_t parent; + dirent_t name; + map attributes; + + bool unborn() const { return parent == nursery_ident; } + bool live() const { return ! (unborn() || killed()); } + bool killed() const { return parent == graveyard_ident; } + + node() : name(new dir_entry("")) {} + virtual void flush_clobbered() = 0; + virtual node_t clone() const = 0; + virtual ~node() {} +}; + +struct +file_node + : public node +{ + file_id content; + + virtual void flush_clobbered() {} + virtual node_t clone() const; + virtual ~file_node() {} +}; + +node_t +file_node::clone() const +{ + file_t f = file_t(new file_node()); + f->ident = ident; + f->parent = parent; + f->name = name; + f->content = content; + copy(attributes.begin(), attributes.end(), + inserter(f->attributes, f->attributes.begin())); + return f; +} + + +struct +dir_node + : public node +{ + dirmap_t entries; + dirmap_t clobbered_entries; + + // informative + bool contains_entry(dirent_t p) const; + bool contains_clobbered_entry(dirent_t p) const; + bool contains_dir(dirent_t p) const; + bool contains_file(dirent_t p) const; + + // imperative (fault on lookup failures) + node_t get_entry(dirent_t p) const; + dir_t get_dir_entry(dirent_t p) const; + file_t get_file_entry(dirent_t p) const; + + node_t get_clobbered_entry(dirent_t p) const; + dir_t get_clobbered_dir_entry(dirent_t p) const; + file_t get_clobbered_file_entry(dirent_t p) const; + + void add_child(dirent_t name, node_t n, bool clobber = false); + void drop_dir(dirent_t c, bool clobbered_ok = false); + void drop_file(dirent_t c, bool clobbered_ok = false); + + virtual void flush_clobbered(); + virtual node_t clone() const; + virtual ~dir_node() {} +}; + + +void +dir_node::flush_clobbered() +{ + dirmap_t::const_iterator + i = clobbered_entries.begin(), + j = clobbered_entries.end(); + while(i != j) + { + i->second->flush_clobbered(); + } + clobbered_entries.clear(); +} + +void +dir_node::add_child(dirent_t name, node_t n, bool clobber) +{ + // this is a bit odd: it is possible that, while processing renames + // (only), we have actually clobbered a directory entry name in the + // destination mfest of a cset. for example if we have "rename_file + // A/X B/Y", where the file in question has ident T. we can find the + // current parent P of T in the second mfest, but we can't tell what + // name to drop from P in order to get T properly detached and moved + // over to its new home in Q=B. + // + // if we tell P to drop X, we risk dropping an entry which is not T; + // it is possible some other entry has already clobbered that name + // in P. so when an entry is added with clobbering turned on (i.e. when + // we are performing a rename) the new entry moves any old entry to + // the clobber map, where future drops (with clobbers accepted) will + // short-circuit on it. + // + // note that in general (in particular, with respect to resolving + // the parent directory of the target of an add or rename) there is + // no risk of clobbers because we emit renames in topological order + // in the target space: by the time we are processing renames + // landing on foo/bar/baz, we have already resolved which node + // "owns" the name bar in foo. + + if (clobber && contains_entry(name)) + { + I(!contains_clobbered_entry(name)); + clobbered_entries.insert(make_pair(name,get_entry(name))); + entries.erase(name); + } + I(!contains_entry(name)); + L(F("parent %d adding child %d = '%s'\n") % ident % n->ident % name->val); + n->name = name; + n->parent = ident; + entries.insert(make_pair(name,n)); +} + +void +dir_node::drop_dir(dirent_t c, bool clobbered_ok) +{ + if (clobbered_ok && contains_clobbered_entry(c)) + { + dir_t tmp = get_clobbered_dir_entry(c); + clobbered_entries.erase(c); + } + else + { + dir_t tmp = get_dir_entry(c); + entries.erase(c); + } +} + +void +dir_node::drop_file(dirent_t c, bool clobbered_ok) +{ + if (clobbered_ok && contains_clobbered_entry(c)) + { + file_t tmp = get_clobbered_file_entry(c); + clobbered_entries.erase(c); + } + else + { + file_t tmp = get_file_entry(c); + entries.erase(c); + } +} + + + +static inline bool +is_dir_t(node_t n) +{ + dir_t d = dynamic_pointer_cast(n); + return static_cast(d); +} + +static inline bool +is_file_t(node_t n) +{ + file_t f = dynamic_pointer_cast(n); + return static_cast(f); +} + +static inline dir_t +downcast_to_dir_t(node_t const n) +{ + dir_t d = dynamic_pointer_cast(n); + I(static_cast(d)); + return d; +} + +static inline file_t +downcast_to_file_t(node_t const n) +{ + file_t f = dynamic_pointer_cast(n); + I(static_cast(f)); + return f; +} + +static bool +equal_nodes(node_t const & n1, + node_t const & n2, + bool compare_children = false) +{ + deque q1; + deque q2; + + q1.push_back(n1); + q2.push_back(n2); + + while(!q1.empty()) + { + + if (q2.empty()) + return false; + + node_t a = q1.front(); + node_t b = q2.front(); + q1.pop_front(); + q2.pop_front(); + + if (!(a->ident == b->ident)) + return false; + + if (!(a->parent == b->parent)) + return false; + + if (! (*(a->name) == *(b->name))) + return false; + + if (!(a->attributes == b->attributes)) + return false; + + if (is_file_t(a)) + { + if (! is_file_t(b)) + return false; + + file_t af = downcast_to_file_t(a); + file_t bf = downcast_to_file_t(b); + + if (!(af->content == bf->content)) + return false; + } + else + { + I(is_dir_t(a)); + + if (! is_dir_t(b)) + return false; + + dir_t ad = downcast_to_dir_t(a); + dir_t bd = downcast_to_dir_t(b); + + if (ad->entries.size() != bd->entries.size()) + return false; + + dirmap_t::const_iterator i = ad->entries.begin(); + dirmap_t::const_iterator j = bd->entries.begin(); + + while (i != ad->entries.end()) + { + I(j != bd->entries.end()); + + if (! (*(i->first) == *(j->first))) + return false; + + if (compare_children) + { + q1.push_back(i->second); + q2.push_back(j->second); + } + else + { + if (i->second->ident != i->second->ident) + return false; + } + ++i; + ++j; + } + } + } + return true; +} + +bool +dir_node::contains_entry(dirent_t p) const +{ + map::const_iterator i = entries.find(p); + if (i == entries.end()) + return false; + return true; +} + +bool +dir_node::contains_clobbered_entry(dirent_t p) const +{ + map::const_iterator i = clobbered_entries.find(p); + if (i == clobbered_entries.end()) + return false; + return true; +} + +bool +dir_node::contains_dir(dirent_t p) const +{ + dirmap_t::const_iterator i = entries.find(p); + if (i == entries.end()) + return false; + return is_dir_t(i->second); +} + +bool +dir_node::contains_file(dirent_t p) const +{ + dirmap_t::const_iterator i = entries.find(p); + if (i == entries.end()) + return false; + return is_file_t(i->second); +} + +node_t +dir_node::get_entry(dirent_t p) const +{ + L(F("dir_node::get_entry(\"%s\")\n") % p->val); + dirmap_t::const_iterator i = entries.find(p); + I(i != entries.end()); + return i->second; +} + +dir_t +dir_node::get_dir_entry(dirent_t p) const +{ + return downcast_to_dir_t(get_entry(p)); +} + +file_t +dir_node::get_file_entry(dirent_t p) const +{ + return downcast_to_file_t(get_entry(p)); +} + +node_t +dir_node::get_clobbered_entry(dirent_t p) const +{ + dirmap_t::const_iterator i = clobbered_entries.find(p); + I(i != clobbered_entries.end()); + return i->second; +} + +dir_t +dir_node::get_clobbered_dir_entry(dirent_t p) const +{ + return downcast_to_dir_t(get_clobbered_entry(p)); +} + +file_t +dir_node::get_clobbered_file_entry(dirent_t p) const +{ + return downcast_to_file_t(get_clobbered_entry(p)); +} + +node_t +dir_node::clone() const +{ + dir_t d = dir_t(new dir_node()); + d->ident = ident; + d->parent = parent; + d->name = name; + copy(attributes.begin(), attributes.end(), + inserter(d->attributes, d->attributes.begin())); + for (map::const_iterator i = entries.begin(); + i != entries.end(); ++i) + { + d->entries.insert(make_pair(i->first, i->second->clone())); + } + for (map::const_iterator i = clobbered_entries.begin(); + i != clobbered_entries.end(); ++i) + { + d->clobbered_entries.insert(make_pair(i->first, i->second->clone())); + } + return d; +} + + +typedef hash_map node_map_t; + +static void +insert_into_map(dir_t d, + node_map_t & nodes) +{ + deque work; + work.push_back(d); + while (!work.empty()) + { + node_t n = work.front(); + work.pop_front(); + + nodes.insert(make_pair(n->ident, n)); + + if (is_dir_t(n)) + { + dir_t tmp = downcast_to_dir_t(n); + for (dirmap_t::const_iterator i = tmp->entries.begin(); + i != tmp->entries.end(); ++i) + { + work.push_back(i->second); + } + } + } +} + +struct +mfest +{ + ident_t max_ident; + node_map_t nodes; + dir_t root; + + dir_t make_dir(); + file_t make_file(); + + mfest() : + max_ident(graveyard_ident+1), + root(make_dir()) + { + insert_into_map(root, nodes); + } + + mfest(mfest const & other); + void check_sane() const; + + // informative + bool file_exists(file_path fp) const; + bool dir_exists(file_path dp) const; + void get_path(node_t const & n, path_vec_t & path) const; + + // imperative (fault on lookup failures) + node_t get_node(ident_t i) const; + dir_t get_dir_node(ident_t i) const; + file_t get_file_node(ident_t i) const; + + node_t get_node(path_vec_t const & n) const; + dir_t get_dir_node(path_vec_t const & d) const; + file_t get_file_node(path_vec_t const & f) const; + + dir_t get_containing_dir_node(path_vec_t const & d) const; + bool operator==(mfest const & other) const; +}; + + +mfest::mfest(mfest const & other) : + max_ident(other.max_ident), + root(downcast_to_dir_t(other.root->clone())) +{ + insert_into_map(root, nodes); +} + +dir_t +mfest::make_dir() +{ + dir_t n(new dir_node()); + n->ident = ++max_ident; + n->parent = nursery_ident; + nodes.insert(make_pair(n->ident, n)); + return n; +} + +file_t +mfest::make_file() +{ + file_t n(new file_node()); + n->ident = ++max_ident; + n->parent = nursery_ident; + nodes.insert(make_pair(n->ident, n)); + return n; +} + +void +mfest::check_sane() const +{ + + deque work; + hash_set seen; + work.push_back(root->ident); + + // first go from the top of the tree to the bottom checking each + // directory for absence of cycle-forming edges and agreement + // between names. + + while (!work.empty()) + { + ident_t tmp = work.front(); + work.pop_front(); + + I(seen.find(tmp) == seen.end()); + seen.insert(tmp); + + node_t tnode = get_node(tmp); + if (is_dir_t(tnode)) + { + dir_t tdir = downcast_to_dir_t(tnode); + for (dirmap_t::const_iterator i = tdir->entries.begin(); + i != tdir->entries.end(); ++i) + { + node_t child = i->second; + I(i->first == child->name); + I(child->parent == tdir->ident); + work.push_back(i->second->ident); + } + } + } + + // now go through the node map and make sure that we both found + // every node, and also that the nodes have the same belief about + // their ident that the node map has + + for (node_map_t::const_iterator i = nodes.begin(); + i != nodes.end(); ++i) + { + if (i->second->parent != nursery_ident + && i->second->parent != graveyard_ident) + { + I(seen.find(i->first) != seen.end()); + } + I(i->first == i->second->ident); + } +} + +bool +mfest::file_exists(file_path fp) const +{ + path_vec_t v(fp); + dir_t d = root; + vector::const_iterator i = v.dir.begin(), j = v.dir.end(); + while(i != j) + { + if (!d->contains_dir(*i)) + return false; + + d = d->get_dir_entry(*i++); + } + return d->contains_file(v.leaf); +} + +bool +mfest::dir_exists(file_path dp) const +{ + path_vec_t v(dp); + dir_t d = root; + vector::const_iterator i = v.dir.begin(), j = v.dir.end(); + while(i != j) + { + if (!d->contains_dir(*i)) + return false; + + d = d->get_dir_entry(*i++); + } + return d->contains_dir(v.leaf); +} + +node_t +mfest::get_node(ident_t i) const +{ + L(F("mfest::get_node(%s)\n") % i); + node_map_t::const_iterator j = nodes.find(i); + I(j != nodes.end()); + return j->second; +} + +dir_t +mfest::get_dir_node(ident_t i) const +{ + return downcast_to_dir_t(get_node(i)); +} + +file_t +mfest::get_file_node(ident_t i) const +{ + return downcast_to_file_t(get_node(i)); +} + +dir_t +mfest::get_dir_node(path_vec_t const & d) const +{ + return downcast_to_dir_t(get_node(d)); +} + +file_t +mfest::get_file_node(path_vec_t const & f) const +{ + return downcast_to_file_t(get_node(f)); +} + +dir_t +mfest::get_containing_dir_node(path_vec_t const & v) const +{ + dir_t d = root; + vector::const_iterator i = v.dir.begin(), j = v.dir.end(); + while(i != j) + { + d = d->get_dir_entry(*i++); + } + return d; +} + +node_t +mfest::get_node(path_vec_t const & n) const +{ + return get_containing_dir_node(n)->get_entry(n.leaf); +} + +void +mfest::get_path(node_t const & n, + path_vec_t & path) const +{ + path.dir.clear(); + path.leaf = n->name; + ident_t i = n->parent; + while(i != root->ident) + { + node_t tmp = get_node(i); + path.dir.push_back(tmp->name); + i = tmp->parent; + } + reverse(path.dir.begin(), path.dir.end()); +} + +bool +mfest::operator==(mfest const & other) const +{ + if (max_ident != other.max_ident) + return false; + + if (!equal_nodes(root, other.root, true)) + return false; + + for (node_map_t::const_iterator i = nodes.begin(); + i != nodes.end(); ++i) + { + node_map_t::const_iterator j = other.nodes.find(i->first); + + if (j == other.nodes.end()) + return false; + + if (!equal_nodes(i->second, j->second, false)) + return false; + } + return true; +} + + + + +struct +change_consumer +{ + void delete_dir(file_path const & dp); + void delete_file(file_path const & fp); + void rename_dir(file_path const & src, file_path const & dst); + void rename_file(file_path const & src, file_path const & dst); + void add_dir(file_path const & dp); + void add_file(file_path const & fp); + void apply_delta(file_path const & path, + file_id const & src, + file_id const & dst); + + virtual void delete_dir(path_vec_t const & dp) = 0; + virtual void delete_file(path_vec_t const & fp) = 0; + virtual void rename_dir(path_vec_t const & src, path_vec_t const & dst) = 0; + virtual void rename_file(path_vec_t const & src, path_vec_t const & dst) = 0; + virtual void add_dir(path_vec_t const & dp) = 0; + virtual void add_file(path_vec_t const & fp) = 0; + virtual void apply_delta(path_vec_t const & path, + file_id const & src, + file_id const & dst) = 0; +}; + +void +change_consumer::delete_dir(file_path const & dp) +{ + L(F("delete_dir('%s')\n") % dp); + this->delete_dir(path_vec_t(dp)); +} + +void +change_consumer::delete_file(file_path const & fp) +{ + L(F("delete_file('%s')\n") % fp); + this->delete_file(path_vec_t(fp)); +} + +void +change_consumer::rename_dir(file_path const & src, + file_path const & dst) +{ + L(F("rename_dir('%s', '%s')\n") % src % dst); + this->rename_dir(path_vec_t(src), path_vec_t(dst)); +} + +void +change_consumer::rename_file(file_path const & src, + file_path const & dst) +{ + L(F("rename_file('%s', '%s')\n") % src % dst); + this->rename_file(path_vec_t(src), path_vec_t(dst)); +} + +void +change_consumer::add_dir(file_path const & dp) +{ + L(F("add_dir('%s')\n") % dp); + this->add_dir(path_vec_t(dp)); +} + +void +change_consumer::add_file(file_path const & fp) +{ + L(F("add_file('%s')\n") % fp); + this->add_file(path_vec_t(fp)); +} + +void +change_consumer::apply_delta(file_path const & path, + file_id const & src, + file_id const & dst) +{ + L(F("apply_delta('%s', [%s], [%s])\n") % path % src % dst); + this->apply_delta(path_vec_t(path), src, dst); +} + + +struct +cset + : public change_consumer +{ + mfest old_mfest; + mfest new_mfest; + + cset(mfest const & base) : + old_mfest(base), + new_mfest(base) + {} + + cset() + {} + + virtual ~cset() {} + + void reset(mfest const & m); + void check_sane() const; + + void replay_changes(change_consumer & cc) const; + void replay_inverse_changes(change_consumer & cc) const; + + virtual void delete_dir(path_vec_t const & dp); + virtual void delete_file(path_vec_t const & fp); + virtual void rename_dir(path_vec_t const & src, path_vec_t const & dst); + virtual void rename_file(path_vec_t const & src, path_vec_t const & dst); + virtual void add_dir(path_vec_t const & dp); + virtual void add_file(path_vec_t const & fp); + virtual void apply_delta(path_vec_t const & path, + file_id const & src, + file_id const & dst); + + bool operator==(cset const & other) const + { + if (! (old_mfest == other.old_mfest)) + return false; + if (! (new_mfest == other.new_mfest)) + return false; + return true; + } + +}; + + +void +cset::reset(mfest const & m) +{ + old_mfest = m; + new_mfest = m; +} + + +static void +check_hash_inclusion(mfest const & a, + mfest const & b) +{ + for (node_map_t::const_iterator i = a.nodes.begin(); + i != a.nodes.end(); ++i) + { + node_map_t::const_iterator j = b.nodes.find(i->first); + I(j != b.nodes.end()); + } +} + +static void +check_mfests_agree(mfest const & a, + mfest const & b) +{ + check_hash_inclusion(a,b); + check_hash_inclusion(b,a); +} + + +void +cset::check_sane() const +{ + old_mfest.check_sane(); + new_mfest.check_sane(); + check_mfests_agree(old_mfest, new_mfest); +} + +void +cset::delete_dir(path_vec_t const & dp) +{ + dir_t src = old_mfest.get_dir_node(dp); + dir_t dst = new_mfest.get_dir_node(src->ident); + dir_t parent_of_dst = new_mfest.get_dir_node(dst->parent); + parent_of_dst->drop_dir(dp.leaf); + dst->parent = graveyard_ident; + if (global_sanity.debug) + { + check_sane(); + } +} + +void +cset::delete_file(path_vec_t const & fp) +{ + file_t src = old_mfest.get_file_node(fp); + file_t dst = new_mfest.get_file_node(src->ident); + dir_t parent_of_dst = new_mfest.get_dir_node(dst->parent); + parent_of_dst->drop_file(fp.leaf); + dst->parent = graveyard_ident; + if (global_sanity.debug) + { + check_sane(); + } +} + +void +cset::rename_dir(path_vec_t const & s, + path_vec_t const & d) +{ + dir_t src = old_mfest.get_dir_node(s); + dir_t dst = new_mfest.get_dir_node(src->ident); + dir_t old_dst_parent = new_mfest.get_dir_node(dst->parent); + dir_t new_dst_parent = new_mfest.get_containing_dir_node(d); + old_dst_parent->drop_dir(dst->name, true); + new_dst_parent->add_child(d.leaf, dst, true); + if (global_sanity.debug) + { + check_sane(); + } +} + +void +cset::rename_file(path_vec_t const & s, + path_vec_t const & d) +{ + file_t src = old_mfest.get_file_node(s); + file_t dst = new_mfest.get_file_node(src->ident); + dir_t old_dst_parent = new_mfest.get_dir_node(dst->parent); + dir_t new_dst_parent = new_mfest.get_containing_dir_node(d); + old_dst_parent->drop_file(dst->name, true); + new_dst_parent->add_child(d.leaf, dst, true); + if (global_sanity.debug) + { + check_sane(); + } +} + +void cset::add_dir(path_vec_t const & dp) +{ + dir_t new_dst_parent = new_mfest.get_containing_dir_node(dp); + dir_t new_dir = old_mfest.make_dir(); + dir_t new_clone = downcast_to_dir_t(new_dir->clone()); + new_dst_parent->add_child(dp.leaf, new_clone); + new_mfest.nodes.insert(make_pair(new_dir->ident, new_clone)); + if (global_sanity.debug) + { + check_sane(); + } +} + +void cset::add_file(path_vec_t const & fp) +{ + dir_t new_dst_parent = new_mfest.get_containing_dir_node(fp); + file_t new_file = old_mfest.make_file(); + file_t new_clone = downcast_to_file_t(new_file->clone()); + new_dst_parent->add_child(fp.leaf, new_clone); + new_mfest.nodes.insert(make_pair(new_file->ident, new_clone)); + if (global_sanity.debug) + { + check_sane(); + } +} + +void cset::apply_delta(path_vec_t const & path, + file_id const & s, + file_id const & d) +{ + file_t dst = new_mfest.get_file_node(path); + file_t src = old_mfest.get_file_node(dst->ident); + src->content = s; + dst->content = d; + if (global_sanity.debug) + { + check_sane(); + } +} + +static void +play_changes(change_consumer & cc, + mfest const & a, + mfest const & b); + +void +cset::replay_changes(change_consumer & cc) const +{ + check_sane(); + play_changes(cc, old_mfest, new_mfest); +} + +void +cset::replay_inverse_changes(change_consumer & cc) const +{ + check_sane(); + play_changes(cc, new_mfest, old_mfest); +} + +enum +replay_mode_t + { + replay_delete_dir, + replay_delete_file, + replay_rename_dir, + replay_rename_file, + replay_add_dir, + replay_add_file, + replay_apply_delta + }; + + +static void +play_changes_from_dir(change_consumer & cc, + mfest const & src, + mfest const & dst, + replay_mode_t mode, + dir_t cwd) +{ + + typedef pair state; + stack stk; + + stk.push(make_pair(cwd, cwd->entries.begin())); + + while (! stk.empty()) + { + bool pushed = false; + + for (state & curr = stk.top(); + curr.second != curr.first->entries.end() && !pushed; + ++curr.second) + { + + cwd = curr.first; + L(F("replaying %s = %d\n") % cwd->name->val % cwd->ident); + node_t self = curr.second->second, other; + path_vec_t v1, v2; + + switch (mode) + { + case replay_delete_dir: + // cwd is a directory in src space + other = dst.get_node(self->ident); + if (is_dir_t(self) + && self->live() + && other->killed()) + { + I(is_dir_t(other)); + src.get_path(self, v1); + cc.delete_dir(v1); + } + break; + + case replay_delete_file: + // cwd is a directory in src space + other = dst.get_node(self->ident); + if (is_file_t(self) + && self->live() + && other->killed()) + { + I(is_file_t(other)); + src.get_path(self, v1); + cc.delete_file(v1); + } + break; + + case replay_rename_dir: + // cwd is a directory in dst space + other = src.get_node(self->ident); + if (is_dir_t(self) + && self->live() + && other->live() + && ((other->name->val != self->name->val) + || (other->parent != self->parent))) + { + I(is_dir_t(other)); + src.get_path(other, v1); + dst.get_path(self, v2); + cc.rename_dir(v1, v2); + } + break; + + case replay_rename_file: + // cwd is a directory in dst space + other = src.get_node(self->ident); + if (is_file_t(self) + && self->live() + && other->live() + && ((other->name->val != self->name->val) + || (other->parent != self->parent))) + { + I(is_file_t(other)); + src.get_path(other, v1); + dst.get_path(self, v2); + cc.rename_file(v1, v2); + } + break; + + case replay_add_dir: + // cwd is a directory in dst space + other = src.get_node(self->ident); + if (is_dir_t(self) + && other->unborn() + && self->live()) + { + I(is_dir_t(other)); + dst.get_path(self, v2); + cc.add_dir(v2); + } + break; + + case replay_add_file: + // cwd is a directory in dst space + other = src.get_node(self->ident); + if (is_file_t(self) + && other->unborn() + && self->live()) + { + I(is_file_t(other)); + dst.get_path(self, v2); + cc.add_file(v2); + } + break; + + case replay_apply_delta: + // cwd is a directory in dst space + other = src.get_node(self->ident); + if (is_file_t(self)) + { + file_t self_f = downcast_to_file_t(self); + file_t other_f = downcast_to_file_t(other); + dst.get_path(self, v2); + + if (other->unborn() + && self->live()) + { + // deltas accompanying additions + cc.apply_delta(v2, file_id(), self_f->content); + } + else if (self->live() + && other->live() + && !(self_f->content == other_f->content)) + { + // normal deltas + cc.apply_delta(v2, + other_f->content, + self_f->content); + } + } + break; + } + + if (is_dir_t(self)) + { + dir_t dtmp = downcast_to_dir_t(self); + stk.push(make_pair(dtmp, dtmp->entries.begin())); + pushed = true; + L(F("pushed %s = %d\n") % self->name->val % self->ident); + } + } + + if (!pushed) + stk.pop(); + } +} + +static void +play_changes(change_consumer & cc, + mfest const & a, + mfest const & b) +{ + play_changes_from_dir(cc, a, b, replay_delete_dir, a.root); + play_changes_from_dir(cc, a, b, replay_delete_file, a.root); + + play_changes_from_dir(cc, a, b, replay_rename_dir, b.root); + play_changes_from_dir(cc, a, b, replay_rename_file, b.root); + + play_changes_from_dir(cc, a, b, replay_add_dir, b.root); + play_changes_from_dir(cc, a, b, replay_add_file, b.root); + + play_changes_from_dir(cc, a, b, replay_apply_delta, b.root); +} + + + +void +concatenate_changesets(cset const & a, + cset const & b, + cset & c) +{ + a.check_sane(); + b.check_sane(); + c.reset(a.new_mfest); + b.replay_changes(c); + c.old_mfest.root->flush_clobbered(); + c.new_mfest.root->flush_clobbered(); + c.check_sane(); +} + + + +// merging changes is done on a file-by-file basis (within a +// worst-case subgraph established at the revision level). +// +// - start with revisions (and thus mfests) A and B, which we want to +// merge. there's an LCAD C, which is the worst-case common ancestor. +// +// - extract the subgraph S bound by A+B at the bottom and C at the top. +// +// - take the files in A and the files in B, and try to identify them +// via their history structure. there is a possibility for +// mis-identification here if there were mutual adds of "the same +// file" on both edges; the essential thing to achieve is that A and B +// each have only *one* entry for each identified file. there cannot +// be two. if there are two, due to post-add identification along two +// different history lines (suturing), ask the user to serve as a +// tie-breaker. +// +// - for each "aspect" X (name, parent (+ lifecycle-state), +// data-content, any attributes) of each identified file F in A union +// B, check to see if the aspect is identical in A and B. if so, it's not +// affected by the merge, no problem. +// +// if it differs, copy the graph S into G[F.X] and perform a sequence +// of contractions on G[F.X] by checking each graph node until there +// is a full pass over the graph w/o contractions. the contraction +// rules are: +// +// - contract X -> X ==> X +// - contract Y <- X -> Y ==> X +// - contract X -> Y -> Z ==> X -> Z +// (where Y has no other children) +// +// - if we have contracted A[F.X] and B[F.X] into the same node M[F.X] +// in G[F.X] (or more likely, if one node is contracted into a parent +// of the other, such that there is only one head), then we have +// merged F.X for free: there were no conflicts on it. +// +// - otherwise, with a contracted version of G[F.X], we let C[F.X] = +// LCAD(G[F.X]) -- an aspect-specific common-ancestor -- and ask the +// user for help on A[F.X], C[F.X], B[F.X]. if they refuse, we fail. +// + +// TODO: implement this algorithm! + + + + +namespace +{ + namespace syms + { + std::string const patch("patch"); + std::string const from("from"); + std::string const to("to"); + std::string const add_file("add_file"); + std::string const add_dir("add_dir"); + std::string const delete_file("delete_file"); + std::string const delete_dir("delete_dir"); + std::string const rename_file("rename_file"); + std::string const rename_dir("rename_dir"); + } +} + + +static void +parse_cset(basic_io::parser & parser, + change_consumer & cc) +{ + while (parser.symp()) + { + std::string t1, t2, t3; + if (parser.symp(syms::add_file)) + { + parser.sym(); + parser.str(t1); + cc.add_file(file_path(t1)); + } + else if (parser.symp(syms::add_dir)) + { + parser.sym(); + parser.str(t1); + cc.add_dir(file_path(t1)); + } + else if (parser.symp(syms::delete_file)) + { + parser.sym(); + parser.str(t1); + cc.delete_file(file_path(t1)); + } + else if (parser.symp(syms::delete_dir)) + { + parser.sym(); + parser.str(t1); + cc.delete_dir(file_path(t1)); + } + else if (parser.symp(syms::rename_file)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::to); + parser.str(t2); + cc.rename_file(file_path(t1), + file_path(t2)); + } + else if (parser.symp(syms::rename_dir)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::to); + parser.str(t2); + cc.rename_dir(file_path(t1), + file_path(t2)); + } + else if (parser.symp(syms::patch)) + { + parser.sym(); + parser.str(t1); + parser.esym(syms::from); + parser.hex(t2); + parser.esym(syms::to); + parser.hex(t3); + cc.apply_delta(file_path(t1), file_id(t2), file_id(t3)); + } + else + break; + } +} + +struct +cset_printer + : public change_consumer +{ + basic_io::printer & printer; + cset_printer(basic_io::printer & p) : printer(p) {} + virtual void delete_dir(path_vec_t const & dp); + virtual void delete_file(path_vec_t const & fp); + virtual void rename_dir(path_vec_t const & src, + path_vec_t const & dst); + virtual void rename_file(path_vec_t const & src, + path_vec_t const & dst); + virtual void add_dir(path_vec_t const & dp); + virtual void add_file(path_vec_t const & fp); + virtual void apply_delta(path_vec_t const & path, + file_id const & src, + file_id const & dst); +}; + +void +cset_printer::delete_dir(path_vec_t const & dp) +{ + basic_io::stanza st; + st.push_str_pair(syms::delete_dir, dp.to_file_path()()); + printer.print_stanza(st); +} + +void +cset_printer::delete_file(path_vec_t const & fp) +{ + basic_io::stanza st; + st.push_str_pair(syms::delete_file, fp.to_file_path()()); + printer.print_stanza(st); +} + +void +cset_printer::rename_dir(path_vec_t const & src, + path_vec_t const & dst) +{ + basic_io::stanza st; + st.push_str_pair(syms::rename_dir, src.to_file_path()()); + st.push_str_pair(syms::to, dst.to_file_path()()); + printer.print_stanza(st); +} + +void +cset_printer::rename_file(path_vec_t const & src, + path_vec_t const & dst) +{ + basic_io::stanza st; + st.push_str_pair(syms::rename_file, src.to_file_path()()); + st.push_str_pair(syms::to, dst.to_file_path()()); + printer.print_stanza(st); +} + +void +cset_printer::add_dir(path_vec_t const & dp) +{ + basic_io::stanza st; + st.push_str_pair(syms::add_dir, dp.to_file_path()()); + printer.print_stanza(st); +} + +void +cset_printer::add_file(path_vec_t const & fp) +{ + basic_io::stanza st; + st.push_str_pair(syms::add_file, fp.to_file_path()()); + printer.print_stanza(st); +} + +void +cset_printer::apply_delta(path_vec_t const & path, + file_id const & src, + file_id const & dst) +{ + basic_io::stanza st; + st.push_str_pair(syms::patch, path.to_file_path()()); + st.push_hex_pair(syms::from, src.inner()()); + st.push_hex_pair(syms::to, dst.inner()()); + printer.print_stanza(st); +} + + +void +read_cset(data const & dat, + cset & cs) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "cset"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + parse_cset(pars, cs); + I(src.lookahead == EOF); + cs.check_sane(); +} + +void +write_cset(cset const & cs, + data & dat) +{ + cs.check_sane(); + std::ostringstream oss; + basic_io::printer pr(oss); + cset_printer cpr(pr); + cs.replay_changes(cpr); + dat = data(oss.str()); +} + + + + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "sanity.hh" +#include "transforms.hh" + +// TODO: copy more tests from change_set.cc into here, adapt to the +// new circumstances + +file_id null_file_id; + +static void +spin_cset(cset const & cs) +{ + data tmp1; + cset cs1; + write_cset(cs, tmp1); + read_cset(tmp1, cs1); + for (int i = 0; i < 5; ++i) + { + data tmp2; + cset cs2; + write_cset(cs1, tmp2); + BOOST_CHECK(tmp1 == tmp2); + read_cset(tmp2, cs2); + BOOST_CHECK(cs1 == cs2); + cs1 = cs2; + } +} + +static void +basic_cset_test() +{ + try + { + + cset cs; + cs.add_dir(file_path("usr")); + cs.add_dir(file_path("usr/bin")); + cs.add_file(file_path("usr/bin/cat")); + cs.apply_delta(file_path("usr/bin/cat"), + null_file_id, + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + + cs.add_dir(file_path("usr/local")); + cs.add_dir(file_path("usr/local/bin")); + cs.add_file(file_path("usr/local/bin/dog")); + cs.apply_delta(file_path("usr/local/bin/dog"), + null_file_id, + file_id(hexenc("adc83b19e793491b1c6ea0fd8b46cd9f32e592fc"))); + + spin_cset(cs); + } + catch (informative_failure & exn) + { + L(F("informative failure: %s\n") % exn.what); + } + catch (std::runtime_error & exn) + { + L(F("runtime error: %s\n") % exn.what()); + } +} + +void +add_cset_tests(test_suite * suite) +{ + I(suite); + suite->add(BOOST_TEST_CASE(&basic_cset_test)); +} + + +#endif // BUILD_UNIT_TESTS --- unit_tests.cc +++ unit_tests.cc @@ -57,6 +57,9 @@ if (t.empty() || t.find("change_set") != t.end()) add_change_set_tests(suite); + if (t.empty() || t.find("cset") != t.end()) + add_cset_tests(suite); + if (t.empty() || t.find("diff_patch") != t.end()) add_diff_patch_tests(suite); @@ -69,7 +72,7 @@ if (t.empty() || t.find("netcmd") != t.end()) add_netcmd_tests(suite); - if (t.empty() || t.find("netcmd") != t.end()) + if (t.empty() || t.find("path_component") != t.end()) add_path_component_tests(suite); --- unit_tests.hh +++ unit_tests.hh @@ -27,6 +27,7 @@ void add_transform_tests(test_suite * suite); void add_vocab_tests(test_suite * suite); void add_change_set_tests(test_suite * suite); +void add_cset_tests(test_suite * suite); void add_revision_tests(test_suite * suite); void add_xdelta_tests(test_suite * suite); void add_packet_tests(test_suite * suite);