# # patch "ChangeLog" # from [21414944e3a84c4ed2a2cf4a1a7d024a3ed23b61] # to [3cb5390b59bb8d08b7c00dc21d686eb71c9c5763] # # patch "cset.cc" # from [ab94105104e735388f967b1920c969f647115d4c] # to [a7829d5eaa13b2a677ab15420e73f0ca3ae36c47] # =============================================== --- ChangeLog 21414944e3a84c4ed2a2cf4a1a7d024a3ed23b61 +++ ChangeLog 3cb5390b59bb8d08b7c00dc21d686eb71c9c5763 @@ -1,3 +1,8 @@ +2005-08-01 graydon hoare + + * cset.cc: Steps towards integrating feedback from njs: much + smaller and tighter than before, minimal state in nodes. + 2005-07-25 graydon hoare * cset.cc: Random tester seems to be able to run forever now. =============================================== --- cset.cc ab94105104e735388f967b1920c969f647115d4c +++ cset.cc a7829d5eaa13b2a677ab15420e73f0ca3ae36c47 @@ -1,123 +1,90 @@ /* -nodes: +Nodes: ~~~~~~ - a node is either a file or a directory. nodes have attributes, an ident, a - parent-ident, possibly an heir, and a set of sires. directory nodes have a - map of children and a map of clobbered-children. file nodes have a content - hash. see below for the definitions of these members. + A node is either a file or a directory. Nodes have a name and + attributes. Directory nodes have a map of children. File nodes have a + content hash. See below for the definitions of these members. -mfests: +Mfests: ~~~~~~~ - an mfest is an index-set of nodes X and a tree T of nodes starting from a - root. the index X maps ident numbers to shared pointers into T. there may - be entries in X which are not in T. T must always be a well-formed tree. + An mfest is a structure containing a directory node, with a sanity + checking routine to confirm that the tree is acyclic. Since mfests + implement copy-on-write (see below; essentially you have to be rather + careful about touching their insides as a result) we will probably keep + mfests opaque outside this module. - an mfest has a normal form, in which: - - - a node is in X iff it is in T - - all nodes in the tree have empty heirs and sires - (see below for definition of these) - serializing and re-reading an mfest normalizes it (though there is a - faster, in-place function to normalize them as well). - - -csets: +Csets: ~~~~~~ - a cset is a pair of mfests A, B. the mfests in a cset are *not* - normalized. it is an invariant that idents(A.X) = idents(B.X), but it is - only sometimes true that idents(A.T) = idents(B.T); some nodes might be - present in one mfest's tree but absent from the other's (if they were - added or deleted). + A cset is a pair of mfests OLD, NEW, an "incubator" set of unborn nodes, a + "graveyard" set of killed nodes, and a pair of maps relating copied nodes + in mfest B to nodes in A. We should have that: + {OLD union incubator} == {NEW union graveyard}. -change_consumers: + +Change_consumers: ~~~~~~~~~~~~~~~~~ - a change_consumer -- and the serial form of a change_set -- is the + A change_consumer -- and the serial form of a change_set -- is the way a human wants to read about some work: organized into a set of - deletes, moves, adds, deltas, and attribute set/clear operations. + deletes, renames, adds, deltas, and attribute set/clear operations. - there is ambiguity in a change_consumer regarding the resolution of - names and the simultineity of operations. specifically, each entry in - a change_set is either looked up in the pre state or post state mfest - of a change. a delete is looked up in the pre state. a rename's - source in the pre state. a rename's destination in the post state. an - add is looked up in the post state. + There is ambiguity in a change_consumer regarding the resolution of + names and the simultineity of operations. Specifically, each entry in + a change_set is either looked up in the pre-state or post-state mfest + of a change. A delete is looked up in the pre-state. a rename's + source in the pre-state. A rename's destination in the post-state. An + add is looked up in the post-state. - when playing back a change_set, there is a canonical order in which - entries are emitted: set_heir, delete, rename, add, set_sire, - apply_delta, clear_attr, set_attr. + When playing back a change_set, there is a canonical order in which + entries are emitted: delete, rename, add, apply_delta, clear_attr, + set_attr. - furthermore within each type of entry, the order in which the entries + Furthermore within each type of entry, the order in which the entries are emitted is specified: all entries are emitted in lexicographic order, which happens to induce topological order as well (A is always emitted before A/B). - crucially, set_heir and delete are ordered by *source space*; rename, - add, set_sire, apply_delta, clear_attr, and set_attr are ordered by - *destination space*. we replay these by walking the tree rooted in - each mfest. + Crucially, delete is ordered by *source space*; rename, add, apply_delta, + clear_attr, and set_attr are ordered by *destination space*. wW replay + these by walking the tree rooted in each mfest. -heirs and sires: -~~~~~~~~~~~~~~~~ +Copy-on-write: +~~~~~~~~~~~~~~ - nodes may have heirs or sires. only nodes being deleted in a cset may have - an heir; only nodes being added in a cset may have a sire. a node may have - at most one heir. the heir of a node is a target to send future content - deltas and attributes to; it is a "forwarding address" used in cases where - two files with separate histories are considered identical in a merge and - "sutured": one node is deleted, and it marks the other node as an - heir. the name of the heir is looked up in the new manifest. + When copying an mfest, only a shallow copy of the root shared_ptr is made; + When writing to the copy (say inside a cset), you must check the + shared_ptr you're writing to (and all its parents) to see that they are + shared_ptr::unique, and if not then you must make a stepwise copy down to + the child in question. This means that often an mfest contains pointers + into other mfests; the new mfest in a cset usually contains pointers into + the old one. The cset::get_writable_*() family of helpers should do the + right thing, as far as preparing a node in the new_mfest for writing. - only attribute and content-merging passes care about heirs and sires. - they do not affect lifecycle decisions during merging. - an added node A has a node S as sire in cset C iff there is a cset C' in - which A was being deleted with heir S, and C' = inverse(C). in other - words, sires exist *only* to preserve information about heirs during cset - inversion. there are no user-accessible primitives for creating sire - relationships. a node S may be the sire of many other nodes N1,...,Nk, if - multiple Nk consider S their heir. the sire relationship is therefore a - set. - - -generation numbers: -~~~~~~~~~~~~~~~~~~~ - - every node in an mfest has a generation number. this exists to - implement copy-on-write semantics. the cset has a "write_generation" - number representing the generation which nodes should be updated to - when written to in the new mfest. when you want a node for writing, - you request it from the new mfest and the node-finder walks down the tree - looking. any time it hits an intermediate node which is less than the - write_generation, it makes a copy of that node and replaces the owning - pointer before walking into it. this ensures that the path you modify - is a truly new object, not a pointer into to the old tree. - - -attach and detach lists: +Attach and detach lists: ~~~~~~~~~~~~~~~~~~~~~~~~ - this is a subtle point. read it until it's clear. + This is a subtle point. Read it until it's clear. - csets contain an implicit order to their operations: a block of - set_heir actions, then deletes, renames, adds, set_sires, deltas, and - finally attribute changes. moreover as we've seen above, within each - block there is a topological order: renames landing on A/B happen - before renames landing on A/B/C, for example. + Csets contain an implicit order to their operations: a block of delete + actions, then renames, adds, deltas, and finally attribute + changes. Moreover as we've seen above, within each block there is a + topological order: renames landing on A/B happen before renames landing on + A/B/C, for example. - there is still, however, an ordering hazard when you apply renames + There is still, however, an ordering hazard when you apply renames "in-place", either to an mfest or a filesystem: it's possible that a rename-target will land on a name which is *in the process* of being moved elsewhere -- by another rename in the cset -- but has not yet - been moved due to the serial order chosen. consider this cset: + been moved due to the serial order chosen. Consider this cset: rename_file "A/B" to "P/Q" @@ -125,41 +92,40 @@ rename_file "P/Q" to "Y/Z" - this is the order they will be emitted in. because these names exist - in the same cset, they are to be executed "simultaneously" in theory; - but programs execute sequentially, so naively we would perform the + This is the lexicographic order they will be emitted in. Because these + names exist in the same cset, they are to be executed "simultaneously" in + theory; but programs execute sequentially, so naively we would perform the first rename, then the second. - suppose we executed them "sequentially". for the first rename we + Suppose we executed them "sequentially". For the first rename we would do this: - - find the node A/B in the OLD mfest, its file ident is file_0. - - find file_0 in the NEW mfest, its parent is parent_0. - - detach B from parent_0 in the NEW mfest. - - find P in the NEW mfest, its ident is parent_1. - - attach file_0 to parent_1 under the name Q. + - find the node "A/B" in the OLD mfest, call it file_0. + - find file_0 in the NEW mfest, call its parent parent_0. + - detach "B" from parent_0 in the NEW mfest. + - find "P" in the NEW mfest, call it parent_1. + - attach file_0 to parent_1 under the name "Q". - this last action will fail. it will fail because parent_1 already has + This last action will fail. It will fail because parent_1 already has a child called Q in the second mfest (it's the source of the next - rename, and it hasn't been moved elsewhere yet). we the readers know + rename, and it hasn't been moved elsewhere yet). We the readers know that by the time the cset is finished applying, Q will have been - detached from parent_1 in the NEW mfest. but in the meantime we have + detached from parent_1 in the NEW mfest. But in the meantime we have a problem. - so what we do instead is execute the first "half" of the renames in a - preliminary pass, then the second "half" of them in a second - pass. the change_consumer interface has a special - "finalize_renames()" method which triggers the second half; it is - called after the last rename is processed. here's how we *actually* - process the renames: + So what we do instead is execute the first "half" of the renames in a + preliminary pass, then the second "half" of them in a second pass. The + change_consumer interface has a special "finalize_renames()" method which + triggers the second half; it is called after the last rename is + processed. Here's how we *actually* process the renames: - buffer all the renames into a vector of src/dst pairs, in order. - when the user calls finalize_renames(): - - build a temporary vector of idents with the same length as the buffer + - build a temporary vector of nodes with the same length as the buffer - walk the vector from end to beginning, detaching each node from - the NEW tree and saving its ident in the temporary vector + the NEW tree and saving it in the temporary vector ("bottom-up") - walk the vector from beginning to end, attaching each node to - the NEW tree, using the saved idents + the NEW tree ("top-down") */ @@ -177,17 +143,14 @@ #include #include - #include #include - #include -#include #include +#include #include - #include "basic_io.hh" #include "constants.hh" #include "numeric_vocab.hh" @@ -207,136 +170,141 @@ using std::copy; using std::make_pair; using boost::lexical_cast; -using boost::scoped_ptr; using boost::shared_ptr; using boost::dynamic_pointer_cast; +using boost::weak_ptr; using __gnu_cxx::hash_map; using __gnu_cxx::hash_set; -struct node; -struct dir_node; -struct file_node; -struct dir_entry; -struct dirent_t_cmp; -struct dirent_hash; -struct dirent_eq; +//================================================================== +// dirents and paths +//================================================================== - -namespace __gnu_cxx +struct +dirent_t { - template<> - struct hash + // A dirent is a special "kind of pointer": it's one which has an + // operator< that obeys the lexicographic order of the strings it points + // to, and which considers itself "equal" to a dirent_t based on equality + // of the pointed-to string. + // + // Storage for the stable strings is *never reclaimed*; they just + // accumulate in the dirent_t::stable_strings set forever through the + // life of the program. They're expected to be small and very frequently + // reused. + + struct + hash + { + size_t operator()(string const * const & x) const + { + return __gnu_cxx::__stl_hash_string(x->c_str()); + } + }; + + struct + eq { - size_t - operator()(u64 __x) const - { return static_cast(__x); } + bool operator()(string const * const & x, + string const * const & y) const + { + return *x == *y; + } }; -} + string const * stable_string; + typedef hash_set stable_string_set; + static stable_string_set stable_strings; -typedef uint64_t ident_t; -typedef uint64_t gen_t; -typedef shared_ptr node_t; -typedef shared_ptr dir_t; -typedef shared_ptr file_t; -typedef shared_ptr dirent_t; -// dirmap *must* be a sorted map, do not change to a hashed map -typedef map dirmap_t; -typedef hash_set dirset_t; -typedef string attr_name; -typedef string attr_val; + string const & val() const + { + return *stable_string; + } + void set(string const & s) + { + I(s != ".."); + I(s != "."); + I(s.find('/') == string::npos); -static ident_t -null_ident = 0; + stable_string = NULL; + stable_string_set::const_iterator i = stable_strings.find(&s); + if (i != stable_strings.end()) + stable_string = *i; + else + { + stable_string = new string(s); + stable_strings.insert(stable_string); + } + } + + dirent_t() + { + set(""); + } + dirent_t(string const & s) + { + set(s); + } -static ident_t -nursery_ident = 1; - - -static ident_t -graveyard_ident = 2; - - -struct -ident_source -{ - ident_t ctr; - ident_source() : ctr(graveyard_ident + 1) {} - ident_t next() { I(ctr != UINT64_MAX); return ctr++; } + dirent_t(dirent_t const & other) + : stable_string(other.stable_string) + {} }; -//================================================================== -// dirents and paths -//================================================================== +dirent_t::stable_string_set +dirent_t::stable_strings; -// directory entries are globally interned components -// of a file_path -struct -dir_entry +bool +operator<(dirent_t const & a, + dirent_t const & b) { - string val; + return *(a.stable_string) < *(b.stable_string); +} - explicit dir_entry(string s) - : val(s) - {} - bool operator<(dir_entry const & other) const - { - return val < other.val; - } +bool +operator==(dirent_t const & a, + dirent_t const & b) +{ + return *(a.stable_string) == *(b.stable_string); +} - bool operator==(dir_entry const & other) const - { - return val == other.val; - } -}; - -dirent_t -null_dirent(new dir_entry("")); - - bool -operator<(dirent_t const & a, - dirent_t const & b) +operator!=(dirent_t const & a, + dirent_t const & b) { - return *a < *b; + return *(a.stable_string) != *(b.stable_string); } ostream & -operator<<(ostream & o, dirent_t const & d) +operator<<(ostream & o, + dirent_t const & d) { - return o << d->val; + return o << *(d.stable_string); } -struct -dirent_hash -{ - size_t operator()(dirent_t const & x) const - { - return __gnu_cxx::__stl_hash_string(x->val.c_str()); - } -}; +struct node; +struct dir_node; +struct file_node; +typedef shared_ptr node_t; +typedef weak_ptr weak_node_t; +typedef shared_ptr dir_t; +typedef shared_ptr file_t; -struct -dirent_eq -{ - bool operator()(dirent_t const & a, - dirent_t const & b) const - { - return *a == *b; - } -}; +// dirmap *must* be a sorted map, do not change to a hashed map +typedef map dirmap_t; +typedef string attr_name; +typedef string attr_val; - // this helper class represents an "unpacked" view of the path // through a tree of directory nodes to some specific leaf (either // dir or file); it is just a faster form of a non-empty file_path @@ -344,29 +312,9 @@ struct path_vec_t { - static dirset_t dset; vector dir; dirent_t leaf; - path_vec_t() - { - } - - static dirent_t intern_component(string const & s) - { - dirent_t tmp(new dir_entry(s)); - dirset_t::const_iterator i = dset.find(tmp); - if (i == dset.end()) - { - dset.insert(tmp); - } - else - { - tmp = *i; - } - return tmp; - } - static path_vec_t from_file_path(file_path const & f) { fs::path fp(f()); @@ -374,8 +322,7 @@ for (fs::path::iterator i = fp.begin(); i != fp.end(); ++i) { - dirent_t tmp = intern_component(*i); - pv.dir.push_back(tmp); + pv.dir.push_back(dirent_t(*i)); } I(!pv.dir.empty()); pv.leaf = pv.dir.back(); @@ -395,9 +342,9 @@ for (vector::const_iterator i = dir.begin(); i != dir.end(); ++i) { - fp /= (*i)->val; + fp /= i->val(); } - fp /= leaf->val; + fp /= leaf.val(); return file_path(fp.string()); } @@ -408,130 +355,39 @@ // nodes //================================================================== -// static bucket of shared pointers -dirset_t -path_vec_t::dset; - - struct node { - gen_t generation; - ident_t ident; - ident_t parent; - dirent_t name; + map attrs; - // very few nodes have any attributes, heirs, or sires. a node with - // any of these is "fancy", and we keep fancy parts in a secondary - // struture, under a scoped pointer, to save memory. it only gets a - // value if someone assigns to one of the fancy parts. - - struct fancy_part - { - ident_t sire; - ident_t heir; - map attrs; - }; - - scoped_ptr fancy; - void ensure_fancy() - { - if (!fancy) - { - fancy.reset(new fancy_part()); - fancy->sire = null_ident; - fancy->heir = null_ident; - } - } - - bool has_sire() const - { - return fancy && (fancy->sire != null_ident); - } - - bool has_heir() const - { - return fancy && (fancy->heir != null_ident); - } - - ident_t get_sire() const - { - I(fancy); - return fancy->sire; - } - - ident_t get_heir() const - { - I(fancy); - return fancy->heir; - } - - void set_sire(ident_t s) - { - ensure_fancy(); - fancy->sire = s; - } - - void set_heir(ident_t h) - { - ensure_fancy(); - fancy->heir = h; - } - bool has_attrs() { - return fancy && !fancy->attrs.empty(); + return !attrs.empty(); } bool has_attr(attr_name const & name) { - return fancy && (fancy->attrs.find(name) != - fancy->attrs.end()); + return attrs.find(name) != attrs.end(); } attr_val const & get_attr(attr_name const & name) { - I(fancy); - map::const_iterator i = fancy->attrs.find(name); - I(i != fancy->attrs.end()); + map::const_iterator i = attrs.find(name); + I(i != attrs.end()); return i->second; } void set_attr(attr_name const & name, attr_val const & val) { - ensure_fancy(); - fancy->attrs[name] = val; + attrs[name] = val; } void clear_attr(attr_name const & name) { - ensure_fancy(); - fancy->attrs.erase(name); + attrs.erase(name); } - bool unborn() const - { - return parent == nursery_ident; - } - - bool live() const - { - return ! (unborn() || killed()); - } - - bool killed() const - { - return parent == graveyard_ident; - } - - node() - : generation(0), - ident(null_ident), - parent(null_ident), - name(null_dirent) - {} - virtual node_t shallow_copy() const = 0; virtual ~node() {} }; @@ -552,18 +408,8 @@ file_node::shallow_copy() const { file_t f = file_t(new file_node()); - f->generation = 0; - f->ident = ident; - f->parent = parent; - f->name = name; - - if (fancy) - f->fancy.reset(new fancy_part(*fancy)); - + f->attrs = attrs; f->content = content; - - // L(F("shallow-copied file node %d='%s'\n") % ident % name); - return f; } @@ -591,9 +437,6 @@ dir_node::add_child(dirent_t name, node_t n) { I(!contains_entry(name)); - // L(F("parent %d adding child %d = '%s'\n") % ident % n->ident % name->val); - n->name = name; - n->parent = ident; entries.insert(make_pair(name,n)); } @@ -644,9 +487,10 @@ dfs_iter { // NB: the dfs_iter struct *does not return* the node it's - // constructed on, as part of its iteration + // constructed on, as part of its iteration stack< pair > stk; + vector prefix; dfs_iter(dir_t root) { @@ -659,20 +503,31 @@ return stk.empty(); } - dir_t cwd() + void cwd(dir_t & d) { I(!stk.empty()); - return stk.top().first; + d = stk.top().first; } node_t operator*() { I(!stk.empty()); - // L(F("dfs_iter dereferencing node in '%s'\n") - // % stk.top().first->name); return stk.top().second->second; } + void path(path_vec_t & pv) + { + I(!finished()); + pv.dir = prefix; + leaf(pv.leaf); + } + + void leaf(dirent_t & d) + { + I(!finished()); + d = stk.top().second->first; + } + void operator++() { if (stk.empty()) @@ -681,6 +536,7 @@ node_t ntmp = stk.top().second->second; if (is_dir_t(ntmp)) { + prefix.push_back(stk.top().second->first); dir_t dtmp = downcast_to_dir_t(ntmp); stk.push(make_pair(dtmp, dtmp->entries.begin())); } @@ -690,7 +546,9 @@ while (!stk.empty() && stk.top().second == stk.top().first->entries.end()) { + I(!prefix.empty()); stk.pop(); + prefix.pop_back(); if (!stk.empty()) ++stk.top().second; } @@ -704,12 +562,6 @@ deque q; bfs_iter(node_t root) { - // if (is_dir_t(root)) - // L(F("bfs_iter starting with node %d (%d children)\n") - // % root->ident % downcast_to_dir_t(root)->entries.size()); - // else - // L(F("bfs_iter starting with node %d\n") - // % root->ident); q.push_back(root); } @@ -735,8 +587,6 @@ for (dirmap_t::const_iterator i = tmp->entries.begin(); i != tmp->entries.end(); ++i) { - // L(F("bfs_iter adding node %d, child of %d\n") - // % i->second->ident % tmp->ident); q.push_back(i->second); } } @@ -758,7 +608,6 @@ node_t dir_node::get_entry(dirent_t p) const { - // L(F("dir_node::get_entry(\"%s\")\n") % p->val); dirmap_t::const_iterator i = entries.find(p); I(i != entries.end()); return i->second; @@ -769,119 +618,109 @@ dir_node::shallow_copy() const { dir_t d = dir_t(new dir_node()); - d->generation = 0; - d->ident = ident; - d->parent = parent; - d->name = name; - - if (fancy) - d->fancy.reset(new fancy_part(*fancy)); - + d->attrs = attrs; d->entries = entries; - - // L(F("shallow-copied directory node %d='%s'\n") % ident % name); - return d; } -static node_t -deep_copy(node_t n) + +namespace __gnu_cxx { - if (is_file_t(n)) - { - return n->shallow_copy(); + template <> + struct hash + { + size_t + operator()(node_t const & x) const + { + return reinterpret_cast(x.get()); } - else - { - I(is_dir_t(n)); - deque work; - dir_t new_root = downcast_to_dir_t(n->shallow_copy()); - work.push_back(new_root); - - while (!work.empty()) - { - dir_t curr = work.front(); - work.pop_front(); - dirmap_t new_dirmap; - for (dirmap_t::const_iterator i = curr->entries.begin(); - i != curr->entries.end(); ++i) - { - node_t tmp = i->second->shallow_copy(); - new_dirmap.insert(make_pair(i->first, tmp)); - if (is_dir_t(tmp)) - work.push_back(downcast_to_dir_t(tmp)); - } - curr->entries = new_dirmap; - } - return new_root; + }; + + template <> + struct hash + { + size_t + operator()(node * const & x) const + { + return reinterpret_cast(x); } + }; + + template <> + struct hash + { + size_t + operator()(weak_node_t const & x) const + { + return reinterpret_cast(x.lock().get()); + } + }; } +bool +operator==(weak_node_t const & a, + weak_node_t const & b) +{ + return a.lock().get() == b.lock().get(); +} + //================================================================== // mfests //================================================================== -typedef hash_map node_map_t; - -static void -index_nodes(dir_t d, node_map_t & nodes) -{ - for (bfs_iter i(d); !i.finished(); ++i) - nodes.insert(make_pair((*i)->ident, *i)); -} - - struct mfest { - ident_t max_ident; - node_map_t nodes; dir_t root; - dir_t make_dir(); - file_t make_file(); - - mfest() : - max_ident(graveyard_ident+1), - root(new dir_node()) - { - index_nodes(root, nodes); - } - + mfest() : root(new dir_node()) + {} + mfest(mfest const & other); void reset(mfest const & other); void check_sane() const; - // informative + // Informative parts. bool file_exists(file_path fp) const; bool dir_exists(file_path dp) const; - void get_path(node_t const & n, path_vec_t & path) const; - // imperative (fault on lookup failures) - node_t get_node(ident_t i) const; - dir_t get_dir_node(ident_t i) const; - file_t get_file_node(ident_t i) const; + // Imperative parts (fault on lookup failures). + void path_to_node(path_vec_t const & pth, dir_t & parent, node_t & leaf); + void node_to_path(node_t const & n, path_vec_t & pth); +}; - node_t get_node(path_vec_t const & n) const; - dir_t get_dir_node(path_vec_t const & d) const; - file_t get_file_node(path_vec_t const & f) const; - node_t get_node(ident_t i, gen_t write_generation); - dir_t get_dir_node(ident_t i, gen_t write_generation); - file_t get_file_node(ident_t i, gen_t write_generation); +void +mfest::path_to_node(path_vec_t const & pth, dir_t & parent, node_t & leaf) +{ + parent = root; + for (vector::const_iterator i = pth.dir.begin(); + i != pth.dir.end(); ++i) + { + parent = downcast_to_dir_t(parent->get_entry(*i)); + } + leaf = parent->get_entry(pth.leaf); +} - node_t get_node(path_vec_t const & n, gen_t write_generation); - dir_t get_dir_node(path_vec_t const & d, gen_t write_generation); - file_t get_file_node(path_vec_t const & f, gen_t write_generation); - dir_t get_containing_dir_node(path_vec_t const & d) const; - dir_t get_containing_dir_node(path_vec_t const & d, - gen_t write_generation); +void +mfest::node_to_path(node_t const & n, path_vec_t & path) +{ + for (dfs_iter i(root); !i.finished(); ++i) + { + node_t nn = *i; + if (nn.get() == n.get()) + { + i.path(path); + return; + } + } - bool operator==(mfest const & other) const; -}; + // Don't request a node which doesn't exist. + I(false); +} mfest::mfest(mfest const & other) @@ -893,96 +732,22 @@ void mfest::reset(mfest const & other) { - nodes.clear(); root = other.root; - max_ident = other.max_ident; - index_nodes(root, nodes); + check_sane(); } -dir_t -mfest::make_dir() -{ - dir_t n(new dir_node()); - n->ident = ++max_ident; - n->parent = nursery_ident; - // L(F("produced new dir %d\n") % n->ident); - nodes.insert(make_pair(n->ident, n)); - return n; -} - - -file_t -mfest::make_file() -{ - file_t n(new file_node()); - n->ident = ++max_ident; - n->parent = nursery_ident; - // L(F("produced new file %d\n") % n->ident); - nodes.insert(make_pair(n->ident, n)); - return n; -} - - void mfest::check_sane() const { - - hash_set seen; - - // first go from the top of the tree to the bottom checking each - // directory for absence of cycle-forming edges and agreement - // between names. - - // L(F("mfest sanity check beginning...\n")); + // Check for absence of cycles. + hash_set seen; for(bfs_iter i(root); !i.finished(); ++i) { - // if ((*i)->live()) - // { - // path_vec_t v; - // get_path(*i, v); - // L(F("tree iter visiting live node %d = '%s'\n") - // % (*i)->ident - // % v.to_file_path()); - // } - // else - // { - // L(F("tree iter visiting %s node %d\n") - // % ((*i)->unborn() ? "unborn" : "killed") - // % (*i)->ident); - // } - I(seen.find((*i)->ident) == seen.end()); - seen.insert((*i)->ident); + node *n = (*i).get(); + I(seen.find(n) == seen.end()); + seen.insert(n); } - - // now go through the node map and make sure that we both found - // every node, and also that the nodes have the same belief about - // their ident that the node map has - - for (node_map_t::const_iterator i = nodes.begin(); - i != nodes.end(); ++i) - { - // if (i->second->live()) - // { - // path_vec_t v; - // get_path(i->second, v); - // L(F("set iter visiting live node %d = '%s'\n") - // % i->first - // % v.to_file_path()); - // } - // else - // { - // L(F("set iter visiting %s node %d\n") - // % (i->second->unborn() ? "unborn" : "killed") - // % i->first); - // } - I(i->first == i->second->ident); - if (i->second->live()) - { - I(seen.find(i->first) != seen.end()); - } - } - // L(F("mfest sanity check done")); } @@ -1032,211 +797,38 @@ } -node_t -mfest::get_node(ident_t i) const +bool +operator==(mfest const & ma, + mfest const & mb) { - // L(F("mfest::get_node(%s)\n") % i); - node_map_t::const_iterator j = nodes.find(i); - I(j != nodes.end()); - return j->second; -} + // This function could use bfs_iter, but we want it to go *very fast*, so + // it actually short-circuits any time it sees identical node pointers + // rather than structurally comparing their subtrees. + if (ma.root.get() == mb.root.get()) + return true; -dir_t -mfest::get_dir_node(ident_t i) const -{ - return downcast_to_dir_t(get_node(i)); -} + deque qa, qb; + qa.push_back(ma.root); + qb.push_back(mb.root); - -file_t -mfest::get_file_node(ident_t i) const -{ - return downcast_to_file_t(get_node(i)); -} - - -dir_t -mfest::get_dir_node(path_vec_t const & d) const -{ - return downcast_to_dir_t(get_node(d)); -} - - -file_t -mfest::get_file_node(path_vec_t const & f) const -{ - return downcast_to_file_t(get_node(f)); -} - - -dir_t -mfest::get_containing_dir_node(path_vec_t const & v) const -{ - dir_t d = root; - - for (vector::const_iterator i = v.dir.begin(); - i != v.dir.end(); ++i) + while (! (qa.empty() || qb.empty())) { - d = downcast_to_dir_t(d->get_entry(*i)); - } - return d; -} + node_t a = qa.front(); + node_t b = qb.front(); -node_t -mfest::get_node(path_vec_t const & n) const -{ - return get_containing_dir_node(n)->get_entry(n.leaf); -} + qa.pop_front(); + qb.pop_front(); + // Short-circuit comparisons when we actually + // have the same pointers here. -void -mfest::get_path(node_t const & n, - path_vec_t & path) const -{ - I(n->live()); - path.dir.clear(); - path.leaf = n->name; - ident_t i = n->parent; - while(i != root->ident) - { - node_t tmp = get_node(i); - path.dir.push_back(tmp->name); - i = tmp->parent; - } - reverse(path.dir.begin(), path.dir.end()); -} + if (a.get() == b.get()) + continue; - -// these versions implement copy-on-write, updating the -// tree to point to nodes meeting write_generation - -static void -ensure_node_meets_generation(mfest & m, - dir_t & d, - node_t & n, - gen_t write_generation) -{ - if (n->generation < write_generation) - { - n = n->shallow_copy(); - n->generation = write_generation; - - m.nodes.erase(n->ident); - m.nodes.insert(make_pair(n->ident, n)); - - d->entries.erase(n->name); - d->entries.insert(make_pair(n->name, n)); - } -} - - -dir_t -mfest::get_containing_dir_node(path_vec_t const & v, - gen_t write_generation) -{ - if (root->generation < write_generation) - { - // L(F("upgrading root to write generation %d\n") % write_generation); - root = downcast_to_dir_t(root->shallow_copy()); - root->generation = write_generation; - nodes.erase(root->ident); - nodes.insert(make_pair(root->ident, root)); - } - - dir_t d = root; - for (vector::const_iterator i = v.dir.begin(); - i != v.dir.end(); ++i) - { - node_t n = d->get_entry(*i); - ensure_node_meets_generation(*this, d, n, write_generation); - d = downcast_to_dir_t(n); - } - return d; -} - - -node_t -mfest::get_node(path_vec_t const & pth, - gen_t write_generation) -{ - dir_t d = get_containing_dir_node(pth, write_generation); - node_t n = d->get_entry(pth.leaf); - ensure_node_meets_generation(*this, d, n, write_generation); - return n; -} - - -dir_t -mfest::get_dir_node(path_vec_t const & pth, - gen_t write_generation) -{ - return downcast_to_dir_t(get_node(pth, write_generation)); -} - - -file_t -mfest::get_file_node(path_vec_t const & pth, - gen_t write_generation) -{ - return downcast_to_file_t(get_node(pth, write_generation)); -} - - -node_t -mfest::get_node(ident_t i, gen_t write_generation) -{ - path_vec_t pth; - if (i == root->ident) - return root; - node_t n = get_node(i); - get_path(n, pth); - return get_node(pth, write_generation); -} - - -dir_t -mfest::get_dir_node(ident_t i, gen_t write_generation) -{ - return downcast_to_dir_t(get_node(i, write_generation)); -} - - -file_t -mfest::get_file_node(ident_t i, gen_t write_generation) -{ - return downcast_to_file_t(get_node(i, write_generation)); -} - - -bool -equal_up_to_renumbering(mfest const & ma, - mfest const & mb) -{ - // NB: this function compares mfests for structural equality over the - // abstract filesystem; it ignores differences which may exist between - // the mfests' node sets, ident numbers, and any sire/heir relationships - // (which are only relevant in the context of csets) - - bfs_iter pa(ma.root), pb(mb.root); - - while (!(pa.finished() || pb.finished())) - { - - node_t a = *pa; - node_t b = *pb; - - if ((a->name != b->name)) + if (a->attrs != b->attrs) return false; - - if (a->fancy || b->fancy) - { - if (!(a->fancy && b->fancy)) - return false; - if (a->fancy->attrs != b->fancy->attrs) - return false; - } if (is_file_t(a) && is_file_t(b)) { @@ -1245,22 +837,32 @@ if (! (fa->content == fb->content)) return false; } - + else if (is_dir_t(a) && is_dir_t(b)) { dir_t da = downcast_to_dir_t(a); dir_t db = downcast_to_dir_t(b); + if (da->entries.size() != da->entries.size()) return false; + + for (dirmap_t::const_iterator + i = da->entries.begin(), + j = db->entries.begin(); + i != da->entries.end(); ++i, ++j) + { + I(j != db->entries.end()); + if (i->first != j->first) + return false; + qa.push_back(i->second); + qb.push_back(j->second); + } } else return false; - - ++pa; - ++pb; } - if (! (pa.finished() && pb.finished())) + if (! (qa.empty() && qb.empty())) return false; return true; @@ -1277,9 +879,6 @@ virtual ~change_consumer() {} - void set_heir(file_path const & dying, - file_path const & heir); - void delete_node(file_path const & pth); void rename_node(file_path const & src, file_path const & dst); @@ -1287,9 +886,6 @@ void add_dir(file_path const & dp); void add_file(file_path const & fp); - void set_sire(file_path const & newborn, - file_path const & sire); - void apply_delta(file_path const & path, file_id const & src, file_id const & dst); @@ -1303,12 +899,9 @@ virtual void finalize_renames() {} - // this part is just a lower-level form of the API above, to avoid - // composing or parsing file_paths in their string-y form + // This part is just a lower-level form of the API above, to avoid + // composing or parsing file_paths in their string-y form. - virtual void set_heir(path_vec_t const & dying, - path_vec_t const & heir) = 0; - virtual void delete_node(path_vec_t const & path) = 0; virtual void rename_node(path_vec_t const & src, @@ -1317,9 +910,6 @@ virtual void add_dir(path_vec_t const & dp) = 0; virtual void add_file(path_vec_t const & fp) = 0; - virtual void set_sire(path_vec_t const & newborn, - path_vec_t const & sire) = 0; - virtual void apply_delta(path_vec_t const & path, file_id const & src, file_id const & dst) = 0; @@ -1333,16 +923,7 @@ }; -void -change_consumer::set_heir(file_path const & dying, - file_path const & heir) -{ - L(F("set_heir('%s', '%s')\n") % dying % heir); - this->set_heir(path_vec_t::from_file_path(dying), - path_vec_t::from_file_path(heir)); -} - void change_consumer::delete_node(file_path const & dp) { @@ -1378,16 +959,6 @@ void -change_consumer::set_sire(file_path const & newborn, - file_path const & sire) -{ - L(F("set_sire('%s', '%s')\n") % newborn % sire); - this->set_sire(path_vec_t::from_file_path(newborn), - path_vec_t::from_file_path(sire)); -} - - -void change_consumer::apply_delta(file_path const & path, file_id const & src, file_id const & dst) @@ -1426,10 +997,10 @@ { virtual ~attach_detach_change_consumer() {} - virtual ident_t detach(path_vec_t const & path) = 0; - virtual void attach(path_vec_t const & path, ident_t id) = 0; + virtual node_t detach(path_vec_t const & path) = 0; + virtual void attach(path_vec_t const & path, node_t id) = 0; - // renames are directed into a temporary buffer, which + // Renames are directed into a temporary buffer, which // then executes them in two steps using the lower-level API // "attach" and "detach". @@ -1452,16 +1023,15 @@ void attach_detach_change_consumer::finalize_renames() { - vector > tmp; + vector > tmp; for (vector< pair >::reverse_iterator i = pending_renames.rbegin(); i != pending_renames.rend(); ++i) { - ident_t t = detach(i->first); - tmp.push_back(make_pair(i->second, t)); + tmp.push_back(make_pair(i->second, detach(i->first))); } - for (vector< pair >::const_iterator i = tmp.begin(); + for (vector< pair >::const_iterator i = tmp.begin(); i != tmp.end(); ++i) { attach(i->first, i->second); @@ -1479,13 +1049,36 @@ cset : public attach_detach_change_consumer { + + hash_set incubator; mfest old_mfest; mfest new_mfest; - gen_t write_generation; + hash_set graveyard; + hash_map old_to_new; + hash_map new_to_old; + + // Accessors related the new <-> old bijection. + void update_old_new_mapping(weak_node_t old, + weak_node_t modified_new); + void get_corresponding_new_weak_node(weak_node_t old_or_unborn, + weak_node_t & new_or_killed) const; + void get_corresponding_old_weak_node(weak_node_t new_or_killed, + weak_node_t & old_or_unborn) const; + void get_corresponding_new_node(weak_node_t old_or_unborn, + node_t & new_or_killed) const; + void get_corresponding_old_node(weak_node_t new_or_killed, + node_t & old_or_unborn) const; + + // Accessors related to copy-on-write. + node_t unique_entry_in_new_dir(dir_t new_dir, dirent_t entry); + node_t get_writable_node(path_vec_t const & pth); + dir_t get_writable_dir_containing(path_vec_t const & pth); + dir_t get_writable_dir(path_vec_t const & pth); + file_t get_writable_file(path_vec_t const & pth); + cset() { - write_generation = 1; } cset(mfest const & base) @@ -1498,25 +1091,23 @@ void reset(mfest const & m); void check_sane() const; + bool is_unborn(node_t const & n) const; + bool is_live(node_t const & n) const; + bool is_killed(node_t const & n) const; + void replay_changes(change_consumer & cc) const; void replay_inverse_changes(change_consumer & cc) const; virtual void finalize_renames(); - virtual void set_heir(path_vec_t const & dying, - path_vec_t const & heir); - virtual void delete_node(path_vec_t const & dp); - virtual ident_t detach(path_vec_t const & path); - virtual void attach(path_vec_t const & path, ident_t id); + virtual node_t detach(path_vec_t const & path); + virtual void attach(path_vec_t const & path, node_t id); virtual void add_dir(path_vec_t const & dp); virtual void add_file(path_vec_t const & fp); - virtual void set_sire(path_vec_t const & newborn, - path_vec_t const & sire); - virtual void apply_delta(path_vec_t const & path, file_id const & src, file_id const & dst); @@ -1527,97 +1118,246 @@ virtual void clear_attr(path_vec_t const & path, attr_name const & name); + }; -static gen_t -find_max_write_generation(dir_t d) +void +cset::reset(mfest const & m) { - gen_t m = d->generation; - for (dfs_iter i(d); !i.finished(); ++i) - if ((*i)->generation > m) - m = (*i)->generation; - return m; -} + incubator.clear(); + graveyard.clear(); + new_to_old.clear(); + old_to_new.clear(); -void -cset::reset(mfest const & m) -{ old_mfest.reset(m); new_mfest.reset(m); - write_generation = find_max_write_generation(m.root); - I(write_generation != UINT64_MAX); - ++write_generation; - // L(F("cset write generation is %d\n") % write_generation); } +bool +cset::is_unborn(node_t const & n) const +{ + return incubator.find(n) != incubator.end(); +} -static void -check_hash_inclusion(mfest const & a, - mfest const & b) +bool +cset::is_live(node_t const & n) const { - for (node_map_t::const_iterator i = a.nodes.begin(); - i != a.nodes.end(); ++i) - { - node_map_t::const_iterator j = b.nodes.find(i->first); - I(j != b.nodes.end()); - } + return !(is_unborn(n) || is_killed(n)); } +bool +cset::is_killed(node_t const & n) const +{ + return graveyard.find(n) != graveyard.end(); +} static void -check_mfests_agree(mfest const & a, - mfest const & b) +confirm_weak_maps_agree(hash_map const & a, + hash_map const & b) { - check_hash_inclusion(a,b); - check_hash_inclusion(b,a); + for (hash_map::const_iterator i = a.begin(); + i != a.end(); ++i) + { + I(!i->first.expired()); + I(!i->second.expired()); + hash_map::const_iterator j = b.find(i->second); + I(j != b.end()); + I(!j->first.expired()); + I(!j->second.expired()); + I(j->first.lock().get() == i->second.lock().get()); + I(j->second.lock().get() == i->first.lock().get()); + } } - + void cset::check_sane() const { - // L(F("cset::check_sane checking prestate manifest sanity...\n")); old_mfest.check_sane(); - // L(F("cset::check_sane checking poststate manifest sanity...\n")); new_mfest.check_sane(); - // L(F("cset::check_sane checking pre/post agreement...\n")); - check_mfests_agree(old_mfest, new_mfest); - // L(F("cset::check_sane OK\n")); + + confirm_weak_maps_agree(new_to_old, old_to_new); + confirm_weak_maps_agree(old_to_new, new_to_old); + + for (bfs_iter i(old_mfest.root); !i.finished(); ++i) + I(is_live(*i)); + + for (bfs_iter i(new_mfest.root); !i.finished(); ++i) + I(is_live(*i)); + + for (hash_set::const_iterator i = graveyard.begin(); + i != graveyard.end(); ++i) + { + I(!is_unborn(*i)); + I(old_to_new.find(weak_node_t(*i)) != old_to_new.end()); + } + + for (hash_set::const_iterator i = incubator.begin(); + i != incubator.end(); ++i) + { + I(!is_killed(*i)); + I(new_to_old.find(weak_node_t(*i)) != new_to_old.end()); + } } -void -cset::finalize_renames() + +void +cset::update_old_new_mapping(weak_node_t old, weak_node_t modified_new) { - attach_detach_change_consumer::finalize_renames(); + // We must call this when we might have changed the "new" node associated + // with an "old" (or unborn) node; namely when we've done a + // copy-for-write. - if (global_sanity.debug) + weak_node_t existing_new; + get_corresponding_new_weak_node(old, existing_new); + new_to_old.erase(existing_new); + old_to_new[old] = modified_new; + new_to_old[modified_new] = old; +} + + +node_t +cset::unique_entry_in_new_dir(dir_t new_dir, dirent_t entry) +{ + // NB: We do not use "get_foo_entry" here because that reproduces the + // shared_ptr, and we're using "unique" to check that the shared_ptr + // is non-duplicated. + + dirmap_t::const_iterator e = new_dir->entries.find(entry); + I(e != new_dir->entries.end()); + node_t result; + if (e->second.unique()) + result = e->second; + else { - check_sane(); + weak_node_t old_weak_node; + get_corresponding_old_weak_node(e->second, old_weak_node); + result = e->second->shallow_copy(); + I(result.unique()); + update_old_new_mapping(old_weak_node, weak_node_t(result)); + new_dir->entries[entry] = result; } + + // The node should now have exactly 2 references: + // 'new_dir->entries[entry]' and 'result'. + + I(result.use_count() == 2); + return result; } + -void -cset::delete_node(path_vec_t const & dp) +node_t +cset::get_writable_node(path_vec_t const & pth) { - this->detach(dp); + dir_t d = get_writable_dir_containing(pth); + return unique_entry_in_new_dir(d, pth.leaf); +} - if (global_sanity.debug) + +dir_t +cset::get_writable_dir_containing(path_vec_t const & pth) +{ + // We want to return a dir_t which: + // + // - is present at the provided path, in the tree rooted at + // new_mfest.root + // + // - is shared_ptr::unique + // + // - has only shared_ptr::unique parents all the way up to + // new_mfest.root + // + // We can do this "top down": we simply have to duplicate non-unique + // nodes as we go. We know that in any existing unique dir D we're safe + // to replace any existing slot S with a clone because D is itself + // unique; nobody else is going to see us damaging it. + + if (!new_mfest.root.unique()) + new_mfest.root = downcast_to_dir_t(new_mfest.root->shallow_copy()); + + I(new_mfest.root.unique()); + + dir_t d = new_mfest.root; + for (vector::const_iterator i = pth.dir.begin(); + i != pth.dir.end(); ++i) { - check_sane(); - } + d = downcast_to_dir_t(unique_entry_in_new_dir(d,*i)); + } + return d; } +dir_t +cset::get_writable_dir(path_vec_t const & pth) +{ + return downcast_to_dir_t(get_writable_node(pth)); +} + + +file_t +cset::get_writable_file(path_vec_t const & pth) +{ + return downcast_to_file_t(get_writable_node(pth)); +} + +void +cset::get_corresponding_new_weak_node(weak_node_t old_or_unborn, + weak_node_t & n) const +{ + hash_map::const_iterator i + = old_to_new.find(old_or_unborn); + + if (i == old_to_new.end()) + n = old_or_unborn; + else + n = i->second; + + I(!n.expired()); +} + + +void +cset::get_corresponding_old_weak_node(weak_node_t new_or_killed, + weak_node_t & n) const +{ + hash_map::const_iterator i + = new_to_old.find(new_or_killed); + + if (i == old_to_new.end()) + n = new_or_killed; + else + n = i->second; + + I(!n.expired()); +} + + +void +cset::get_corresponding_new_node(weak_node_t old_or_unborn, + node_t & n) const +{ + weak_node_t tmp; + get_corresponding_new_weak_node(old_or_unborn, tmp); + n = tmp.lock(); +} + + +void +cset::get_corresponding_old_node(weak_node_t new_or_killed, + node_t & n) const +{ + weak_node_t tmp; + get_corresponding_old_weak_node(new_or_killed, tmp); + n = tmp.lock(); +} + + void -cset::set_heir(path_vec_t const & dying, - path_vec_t const & heir) +cset::finalize_renames() { - node_t n = new_mfest.get_node(dying, write_generation); - node_t h = new_mfest.get_node(heir); - n->ensure_fancy(); - n->fancy->heir = h->ident; + attach_detach_change_consumer::finalize_renames(); if (global_sanity.debug) { @@ -1627,46 +1367,45 @@ void -cset::set_sire(path_vec_t const & newborn, - path_vec_t const & sire) +cset::delete_node(path_vec_t const & dp) { - node_t n = new_mfest.get_node(newborn, write_generation); - node_t s = new_mfest.get_node(sire); - n->ensure_fancy(); - n->fancy->sire = s->ident; + graveyard.insert(this->detach(dp)); if (global_sanity.debug) { check_sane(); - } + } } -ident_t +node_t cset::detach(path_vec_t const & path) { - node_t src = old_mfest.get_node(path); - node_t dst = new_mfest.get_node(src->ident, write_generation); - dir_t parent_of_dst = new_mfest.get_dir_node(dst->parent, write_generation); + dir_t old_parent; + node_t old_node, new_node; + path_vec_t new_path; - parent_of_dst->drop_child(dst->name); - dst->parent = graveyard_ident; + old_mfest.path_to_node(path, old_parent, old_node); + get_corresponding_new_node(old_node, new_node); + new_mfest.node_to_path(new_node, new_path); + dir_t new_parent = get_writable_dir_containing(new_path); + new_node = unique_entry_in_new_dir(new_parent, new_path.leaf); + new_parent->drop_child(new_path.leaf); + return new_node; + // NB: it is important *not* to check sanity here, as we might have // entered a transient state where detached grandchildren are // not pointing to the graveyard because they're about to be // re-attached. - - return dst->ident; } void -cset::attach(path_vec_t const & path, ident_t id) +cset::attach(path_vec_t const & path, node_t n) { - node_t n = new_mfest.get_node(id); - dir_t d = new_mfest.get_containing_dir_node(path, write_generation); - d->add_child(path.leaf, n); + dir_t new_dir = get_writable_dir_containing(path); + new_dir->add_child(path.leaf, n); // NB: it is important *not* to check sanity here, as we might still // be in a transient state where detached grandchildren are not @@ -1678,15 +1417,16 @@ void cset::add_dir(path_vec_t const & dp) { - dir_t new_dst_parent = new_mfest.get_containing_dir_node(dp, write_generation); - dir_t new_dir = old_mfest.make_dir(); - if (new_dir->ident > new_mfest.max_ident) - new_mfest.max_ident = new_dir->ident; - node_t new_dir_in_new_mfest = new_dir->shallow_copy(); + dir_t old_dir(new dir_node()); + incubator.insert(old_dir); - new_dst_parent->add_child(dp.leaf, new_dir_in_new_mfest); - new_mfest.nodes.insert(make_pair(new_dir->ident, new_dir_in_new_mfest)); + dir_t new_dir(downcast_to_dir_t(old_dir->shallow_copy())); + dir_t new_parent = get_writable_dir_containing(dp); + new_parent->add_child(dp.leaf, new_dir); + old_to_new[weak_node_t(old_dir)] = weak_node_t(new_dir); + new_to_old[weak_node_t(new_dir)] = weak_node_t(old_dir); + if (global_sanity.debug) { check_sane(); @@ -1697,15 +1437,16 @@ void cset::add_file(path_vec_t const & fp) { - dir_t new_dst_parent = new_mfest.get_containing_dir_node(fp, write_generation); - file_t new_file = old_mfest.make_file(); - if (new_file->ident > new_mfest.max_ident) - new_mfest.max_ident = new_file->ident; - node_t new_file_in_new_mfest = new_file->shallow_copy(); + file_t old_file(new file_node()); + incubator.insert(old_file); - new_dst_parent->add_child(fp.leaf, new_file_in_new_mfest); - new_mfest.nodes.insert(make_pair(new_file->ident, new_file_in_new_mfest)); + file_t new_file(downcast_to_file_t(old_file->shallow_copy())); + dir_t new_parent = get_writable_dir_containing(fp); + new_parent->add_child(fp.leaf, new_file); + old_to_new[weak_node_t(old_file)] = weak_node_t(new_file); + new_to_old[weak_node_t(new_file)] = weak_node_t(old_file); + if (global_sanity.debug) { check_sane(); @@ -1718,12 +1459,16 @@ file_id const & s, file_id const & d) { - file_t dst = new_mfest.get_file_node(path, write_generation); - file_t src = old_mfest.get_file_node(dst->ident); + node_t old_node; + file_t old_file, new_file; - I(src->content == s); - dst->content = d; + new_file = get_writable_file(path); + get_corresponding_old_node(weak_node_t(new_file), old_node); + old_file = downcast_to_file_t(old_node); + I(old_file->content == s); + new_file->content = d; + if (global_sanity.debug) { check_sane(); @@ -1736,8 +1481,12 @@ attr_name const & name, attr_val const & val) { - node_t n = new_mfest.get_node(path, write_generation); - n->set_attr(name, val); + get_writable_node(path)->set_attr(name, val); + + if (global_sanity.debug) + { + check_sane(); + } } @@ -1745,32 +1494,34 @@ cset::clear_attr(path_vec_t const & path, string const & name) { - node_t n = new_mfest.get_node(path, write_generation); - n->clear_attr(name); -} + get_writable_node(path)->clear_attr(name); + if (global_sanity.debug) + { + check_sane(); + } +} -typedef vector > node_pair_vec; -typedef vector > file_pair_vec; -typedef vector, +typedef vector > path_vec_pair; +typedef vector > > > -node_attr_name_vec; +path_pair_attr_name_vec; struct replay_record { - node_pair_vec heirs_set; - node_pair_vec dirs_deleted; - node_pair_vec files_deleted; - node_pair_vec nodes_renamed; - node_pair_vec dirs_added; - node_pair_vec files_added; - node_pair_vec sires_set; - file_pair_vec deltas_applied; - node_attr_name_vec attrs_changed; + path_vec_pair dirs_deleted; + path_vec_pair files_deleted; + path_vec_pair nodes_renamed; + path_vec_pair dirs_added; + path_vec_pair files_added; + path_vec_pair deltas_applied; + path_pair_attr_name_vec attrs_changed; }; +/* + static void play_back_replay_record(replay_record const & rr, mfest const & src, @@ -1779,14 +1530,6 @@ { path_vec_t v1, v2; - for (node_pair_vec::const_iterator i = rr.heirs_set.begin(); - i != rr.heirs_set.end(); ++i) - { - src.get_path(i->first, v1); - dst.get_path(i->second, v2); - cc.set_heir(v1, v2); - } - for (node_pair_vec::const_iterator i = rr.files_deleted.begin(); i != rr.files_deleted.end(); ++i) { @@ -1825,14 +1568,6 @@ cc.add_file(v2); } - for (node_pair_vec::const_iterator i = rr.sires_set.begin(); - i != rr.sires_set.end(); ++i) - { - src.get_path(i->first, v1); - dst.get_path(i->second, v2); - cc.set_sire(v2, v1); - } - for (file_pair_vec::const_iterator i = rr.deltas_applied.begin(); i != rr.deltas_applied.end(); ++i) { @@ -1893,20 +1628,6 @@ { path_vec_t v1, v2; - for (node_pair_vec::const_iterator i = rr.sires_set.begin(); - i != rr.sires_set.end(); ++i) - { - // the forward cset goes from src->dst, and had - // set_sire(newborn, sire) with newborn in dst and sire in src - // - // the inverse cset goes from src<-dst, and has - // set_heir(dying, heir) with dying in dst and heir in src - - src.get_path(i->first, v1); - dst.get_path(i->second, v2); - cc.set_heir(v2, v1); - } - for (node_pair_vec::const_iterator i = rr.files_added.begin(); i != rr.files_added.end(); ++i) { @@ -1949,7 +1670,6 @@ cc.finalize_renames(); - for (node_pair_vec::const_iterator i = rr.dirs_deleted.begin(); i != rr.dirs_deleted.end(); ++i) { @@ -1976,20 +1696,6 @@ cc.add_file(v1); } - for (node_pair_vec::const_iterator i = rr.heirs_set.begin(); - i != rr.heirs_set.end(); ++i) - { - // the forward cset goes from src->dst, and had - // set_heir(dying, heir) with dying in src and heir in dst - // - // the inverse cset goes from src<-dst, and has - // set_sire(newborn, sire) with newborn in src and sire in dst - - src.get_path(i->first, v1); - dst.get_path(i->second, v2); - cc.set_sire(v1, v2); - } - for (file_pair_vec::const_iterator i = rr.deltas_applied.begin(); i != rr.deltas_applied.end(); ++i) { @@ -2005,7 +1711,6 @@ i->first->content); } - // attr pass #1: emit all the cleared attrs for (node_attr_name_vec::const_iterator i = rr.attrs_changed.begin(); i != rr.attrs_changed.end(); ++i) @@ -2074,8 +1779,8 @@ for (dfs_iter i(src.root); !i.finished(); ++i) { - // in this pass we accumulate heirs_set and nodes_deleted. - // the "self" node is a member of the "src" directory tree. + // in this pass we accumulate nodes_deleted. the "self" node is a + // member of the "src" directory tree. node_t self = *i; node_t other = dst.get_node(self->ident); @@ -2086,21 +1791,14 @@ if (is_dir_t(self)) rr.dirs_deleted.push_back(make_pair(self, other)); else - rr.files_deleted.push_back(make_pair(self, other)); - - if (self->has_heir()) - { - node_t heir = dst.get_node(self->fancy->heir); - rr.heirs_set.push_back(make_pair(self, heir)); - } + rr.files_deleted.push_back(make_pair(self, other)); } } for (dfs_iter i(dst.root); !i.finished(); ++i) { - // in this pass we accumulate nodes_renamed, files_added, - // dirs_added, sires_set, deltas_applied, attrs_cleared, - // and attrs_set. + // in this pass we accumulate nodes_renamed, files_added, dirs_added, + // deltas_applied, attrs_cleared, and attrs_set. // // the "self" node is a member of the "dst" directory tree. node_t self = *i; @@ -2121,11 +1819,6 @@ I(is_file_t(other)); rr.files_added.push_back(make_pair(other, self)); } - if (self->has_sire()) - { - node_t sire = src.get_node(self->fancy->sire); - rr.sires_set.push_back(make_pair(self, sire)); - } } else if (other->live()) { @@ -2272,12 +1965,10 @@ std::string const content("content"); // cset symbols - std::string const set_heir("set_heir"); std::string const delete_node("delete"); std::string const rename_node("rename"); std::string const add_file("add_file"); std::string const add_dir("add_dir"); - std::string const set_sire("set_sire"); std::string const patch("patch"); std::string const from("from"); std::string const to("to"); @@ -2296,19 +1987,10 @@ while (parser.symp()) { std::string t1, t2, t3; - if (parser.symp(syms::set_heir)) + if (parser.symp(syms::delete_node)) { parser.sym(); parser.str(t1); - parser.esym(syms::to); - parser.str(t2); - cc.set_heir(file_path(t1), - file_path(t2)); - } - else if (parser.symp(syms::delete_node)) - { - parser.sym(); - parser.str(t1); cc.delete_node(file_path(t1)); } else if (parser.symp(syms::rename_node)) @@ -2332,15 +2014,6 @@ parser.str(t1); cc.add_dir(file_path(t1)); } - else if (parser.symp(syms::set_sire)) - { - parser.sym(); - parser.str(t1); - parser.esym(syms::from); - parser.str(t2); - cc.set_sire(file_path(t1), - file_path(t2)); - } else if (parser.symp(syms::patch)) { parser.sym(); @@ -2383,15 +2056,11 @@ { basic_io::printer & printer; cset_printer(basic_io::printer & p) : printer(p) {} - virtual void set_heir(path_vec_t const & dying, - path_vec_t const & heir); virtual void delete_node(path_vec_t const & fp); virtual void rename_node(path_vec_t const & src, path_vec_t const & dst); virtual void add_dir(path_vec_t const & dp); virtual void add_file(path_vec_t const & fp); - virtual void set_sire(path_vec_t const & newborn, - path_vec_t const & sire); virtual void apply_delta(path_vec_t const & path, file_id const & src, file_id const & dst); @@ -2404,17 +2073,6 @@ void -cset_printer::set_heir(path_vec_t const & dying, - path_vec_t const & heir) -{ - basic_io::stanza st; - st.push_str_pair(syms::set_heir, dying.to_file_path()()); - st.push_str_pair(syms::to, heir.to_file_path()()); - printer.print_stanza(st); -} - - -void cset_printer::delete_node(path_vec_t const & dp) { basic_io::stanza st; @@ -2453,17 +2111,6 @@ void -cset_printer::set_sire(path_vec_t const & newborn, - path_vec_t const & sire) -{ - basic_io::stanza st; - st.push_str_pair(syms::set_sire, newborn.to_file_path()()); - st.push_str_pair(syms::from, sire.to_file_path()()); - printer.print_stanza(st); -} - - -void cset_printer::apply_delta(path_vec_t const & path, file_id const & src, file_id const & dst) @@ -2634,6 +2281,7 @@ dat = data(oss.str()); } +*/ #ifdef BUILD_UNIT_TESTS @@ -2723,9 +2371,9 @@ for (bfs_iter i(m.root); !i.finished(); ++i) { - if ((*i)->ident != m.root->ident) + if ((*i)->has_parent()) has_nonroot_nodes = true; - + if ((*i)->has_attrs()) has_attrs = true; @@ -2737,7 +2385,7 @@ void perform_random_action(cset & c) { - set parents; + hash_set parents; path_vec_t pv_a, pv_b; file_path fp_a, fp_b; @@ -2940,7 +2588,7 @@ write_mfest(m1, tmp); L(F("wrote mfest: [[%s]]\n") % tmp); read_mfest(tmp, m2); - BOOST_CHECK(equal_up_to_renumbering(m1, m2)); + BOOST_CHECK(structurally_equal(m1, m2)); m1.reset(m2); } }