# # patch "cset.cc" # from [d08253f6ba29d649d6f5dfa2d3798cd741b0716d] # to [9cb02d42216e2d8a3d14af6bde479fb3c5bbd237] # # patch "cset.hh" # from [35dd18413c314261b8869dae3455958329aeed1d] # to [53da65e733d3d316a68899a74c70c47819ee4b07] # # patch "roster4.cc" # from [7bbd28a5912b7e855df56cfb0bff32cf5d4715c3] # to [00c679d031f2dd70b7b108dc8d42f5d9b8aa782d] # # patch "roster4.hh" # from [3103b059c8adac3b1dd2f1315f33284e8f3c811d] # to [a9ca6e008ebf3d1c6f1c084d495f6f9411106bcf] # ======================================================================== --- cset.cc d08253f6ba29d649d6f5dfa2d3798cd741b0716d +++ cset.cc 9cb02d42216e2d8a3d14af6bde479fb3c5bbd237 @@ -312,13 +312,6 @@ } } -inline split_path -string_to_path(string const & str) -{ - split_path sp; - file_path_internal(str).split(sp); - return sp; -} void parse_cset(basic_io::parser & parser, @@ -332,7 +325,7 @@ { parser.sym(); parser.str(t1); - safe_insert(cs.nodes_deleted, string_to_path(t1)); + safe_insert(cs.nodes_deleted, internal_string_to_split_path(t1)); } else if (parser.symp(syms::rename_node)) { @@ -340,14 +333,14 @@ parser.str(t1); parser.esym(syms::to); parser.str(t2); - safe_insert(cs.nodes_renamed, make_pair(string_to_path(t1), - string_to_path(t2))); + safe_insert(cs.nodes_renamed, make_pair(internal_string_to_split_path(t1), + internal_string_to_split_path(t2))); } else if (parser.symp(syms::add_dir)) { parser.sym(); parser.str(t1); - safe_insert(cs.dirs_added, string_to_path(t1)); + safe_insert(cs.dirs_added, internal_string_to_split_path(t1)); } else if (parser.symp(syms::add_file)) { @@ -355,7 +348,7 @@ parser.str(t1); parser.esym(syms::content); parser.hex(t2); - safe_insert(cs.files_added, make_pair(string_to_path(t1), + safe_insert(cs.files_added, make_pair(internal_string_to_split_path(t1), file_id(t2))); } else if (parser.symp(syms::patch)) @@ -367,7 +360,7 @@ parser.esym(syms::to); parser.hex(t3); safe_insert(cs.deltas_applied, - make_pair(string_to_path(t1), + make_pair(internal_string_to_split_path(t1), make_pair(file_id(t2), file_id(t3)))); } @@ -378,7 +371,7 @@ parser.esym(syms::attr); parser.str(t2); safe_insert(cs.attrs_cleared, - make_pair(string_to_path(t1), + make_pair(internal_string_to_split_path(t1), attr_key(t2))); } else if (parser.symp(syms::set)) @@ -390,7 +383,7 @@ parser.esym(syms::value); parser.str(t3); safe_insert(cs.attrs_set, - make_pair(make_pair(string_to_path(t1), + make_pair(make_pair(internal_string_to_split_path(t1), attr_key(t2)), attr_value(t3))); } ======================================================================== --- cset.hh 35dd18413c314261b8869dae3455958329aeed1d +++ cset.hh 53da65e733d3d316a68899a74c70c47819ee4b07 @@ -116,4 +116,12 @@ return i->second; } +inline split_path +internal_string_to_split_path(std::string const & str) +{ + split_path sp; + file_path_internal(str).split(sp); + return sp; +} + #endif // __CSET_HH__ ======================================================================== --- roster4.cc 7bbd28a5912b7e855df56cfb0bff32cf5d4715c3 +++ roster4.cc 00c679d031f2dd70b7b108dc8d42f5d9b8aa782d @@ -80,6 +80,15 @@ } } + +node::node(node_id i) + : self(i), + parent(the_null_node), + name(the_null_component) +{ +} + + node::node() : self(the_null_node), parent(the_null_node), @@ -87,6 +96,19 @@ { } + +dir_node::dir_node(node_id i) + : node(i) +{ +} + + +dir_node::dir_node() + : node() +{ +} + + node_t dir_node::get_child(path_component const & pc) const { @@ -115,12 +137,11 @@ return n; } + node_t dir_node::clone() { - dir_t d = dir_t(new dir_node()); - d->birth_revision = birth_revision; - d->self = self; + dir_t d = dir_t(new dir_node(self)); d->parent = parent; d->name = name; d->attrs = attrs; @@ -128,16 +149,27 @@ return d; } + +file_node::file_node(node_id i, file_id const & f) + : node(i), + content(f) +{ +} + + +file_node::file_node() + : node() +{ +} + + node_t file_node::clone() { - file_t f = file_t(new file_node()); - f->birth_revision = birth_revision; - f->self = self; + file_t f = file_t(new file_node(self, content)); f->parent = parent; f->name = name; f->attrs = attrs; - f->content = content; return f; } @@ -158,6 +190,7 @@ } } + struct dfs_iter { @@ -167,6 +200,7 @@ stack< pair > stk; split_path dirname; + dfs_iter(dir_t r) : root(r), return_root(true) { @@ -174,6 +208,7 @@ stk.push(make_pair(root, root->children.begin())); } + void path(split_path & pv) { I(!finished()); @@ -190,11 +225,13 @@ } } + bool finished() { return (!return_root) && stk.empty(); } + node_t operator*() { if (return_root) @@ -209,6 +246,7 @@ } } + void operator++() { if (finished()) @@ -245,12 +283,14 @@ return static_cast(root_dir); } + inline bool same_type(node_t a, node_t b) { return is_file_t(a) == is_file_t(b); } + inline bool shallow_equal(node_t a, node_t b, bool shallow_compare_dir_children) @@ -264,9 +304,6 @@ if (a->name != b->name) return false; - if (!(a->birth_revision == b->birth_revision)) - return false; - if (a->attrs != b->attrs) return false; @@ -309,6 +346,7 @@ return true; } + bool roster_t::operator==(roster_t const & other) const { @@ -341,6 +379,7 @@ return true; } + node_t roster_t::get_node(split_path const & sp) const { @@ -540,10 +579,19 @@ i->second = val; } -marking_t::marking_t(revision_id const & birth_rid, node_t n) + +marking_t::marking_t() { +} + + +marking_t::marking_t(revision_id const & birth_rid, + revision_id const & current_rid, + node_t n) + : birth_revision(birth_rid) +{ set singleton; - singleton.insert(birth_rid); + singleton.insert(current_rid); parent_name = singleton; file_content = singleton; for (full_attr_map_t::const_iterator i = n->attrs.begin(); @@ -551,7 +599,43 @@ attrs.insert(make_pair(i->first, singleton)); } +marking_t +marking_t::freshen(node_t old_node, + node_t new_node, + revision_id const & current_rid) const +{ + I(same_type(old_node, new_node)); + set singleton; + singleton.insert(current_rid); + + marking_t mk = marking_t(*this); + if (old_node->parent != new_node->parent + || old_node->name != new_node->name) + mk.parent_name = singleton; + + if (is_file_t(old_node)) + { + file_t fold = downcast_to_file_t(old_node); + file_t fnew = downcast_to_file_t(new_node); + if (!(fold->content == fnew->content)) + mk.file_content = singleton; + } + + for (full_attr_map_t::const_iterator i = new_node->attrs.begin(); + i != new_node->attrs.end(); ++i) + { + full_attr_map_t::const_iterator j = old_node->attrs.find(i->first); + I(j != old_node->attrs.end()); + map >::iterator k = mk.attrs.find(i->first); + I(k != mk.attrs.end()); + if (i->second != j->second) + k->second = singleton; + } + return mk; +} + + void roster_t::check_finite_depth() const { @@ -591,7 +675,7 @@ I(!null_name(n->name) && !null_node(n->parent)); !null_id(downcast_to_file_t(n)->content); } - I(!null_id(n->birth_revision)); + I(!null_id(mi->second.birth_revision)); for (full_attr_map_t::const_iterator i = n->attrs.begin(); i != n->attrs.end(); ++i) I(i->second.first || !i->second.second().empty()); if (n != root_dir) @@ -669,6 +753,7 @@ node_id_source & nis; }; + struct testing_node_id_source : public node_id_source { @@ -683,6 +768,7 @@ node_id curr; }; + struct temp_node_id_source : public node_id_source { @@ -696,6 +782,7 @@ node_id curr; }; + struct true_node_id_source : public node_id_source { @@ -709,6 +796,7 @@ app_state & app; }; + class editable_roster_for_merge : public editable_roster_base { @@ -731,7 +819,8 @@ } }; - // this handles all the stuff in a_new + + // This handles all the stuff in a_new. void unify_roster_oneway(roster_t & a, set & a_new, roster_t & b, set & b_new, set & new_ids, @@ -758,11 +847,11 @@ else { a.replace_node_id(aid, bid); - a.get_node(bid)->birth_revision = b.get_node(bid)->birth_revision; } } } + // After this, left should == right, and there should be no temporary ids. // Destroys sets, because that's handy (it has to scan over both, but it can // skip some double-scanning) @@ -777,7 +866,8 @@ unify_roster_oneway(right, right_new, left, left_new, new_ids, nis); } - // this function implements the case + + // This function implements the case. // a b1 // \ / // b2 @@ -917,7 +1007,8 @@ } } - // take care of marking a single node both of whose parents exist + + // Take care of marking a single node both of whose parents exist. void mark_nontrivial_node(node_t ln, node_t rn, @@ -1013,7 +1104,8 @@ new_rid, n->attrs, marks); } - // this function is also responsible for verifying ancestry invariants -- + + // This function is also responsible for verifying ancestry invariants -- // those invariants on a roster that involve the structure of the roster's // parents, rather than just the structure of the roster itself. void @@ -1044,16 +1136,16 @@ // If the node didn't exist at all in either right or left // side, it was added; there's no need to examine the left // and right sides further, we know all the markings for - // this node will be the set {new_rid}. - marking.insert(make_pair(i->first, marking_t(new_rid, n))); - I(n->birth_revision == new_rid); + // this node will be the set {new_rid}, and set the birth + // revision to new_rid. + marking.insert(make_pair(i->first, marking_t(new_rid, new_rid, n))); } else if (!exists_in_left && exists_in_right) { node_t const & rn = rni->second; - I(same_type(n, rn) && n->birth_revision == rn->birth_revision); - I(right_uncommon_ancestors.find(n->birth_revision) + I(same_type(n, rn) && n->self == rn->self); + I(right_uncommon_ancestors.find(rmi->second.birth_revision) != right_uncommon_ancestors.end()); // Two sub-cases: @@ -1068,33 +1160,36 @@ } else { - // 2. The right edge represents a change to the node - // -- thus a decision on the part of the user -- in - // which case we need to set the new mark-set to - // {new_rid} - marking.insert(make_pair(i->first, marking_t(new_rid, n))); + // 2. The right edge represents a change to the node -- + // thus a decision on the part of the user -- in which case + // we need to "freshen" the marks based on the changes + // which occurred along the edge. + marking.insert(make_pair(i->first, + rmi->second.freshen(rn, n, new_rid))); } } else if (exists_in_left && !exists_in_right) { node_t const & ln = lni->second; - I(same_type(n, ln) && n->birth_revision == ln->birth_revision); - I(left_uncommon_ancestors.find(n->birth_revision) + I(same_type(n, ln) && n->self == ln->self); + I(left_uncommon_ancestors.find(lmi->second.birth_revision) != left_uncommon_ancestors.end()); // Same two sub-cases here as above: if (shallow_equal(n, ln, false)) marking.insert(*lmi); else - marking.insert(make_pair(i->first, marking_t(new_rid, n))); + marking.insert(make_pair(i->first, + lmi->second.freshen(ln, n, new_rid))); } else { node_t const & ln = lni->second; node_t const & rn = rni->second; - I(same_type(n, rn) && n->birth_revision == rn->birth_revision); - I(same_type(n, ln) && n->birth_revision == ln->birth_revision); - marking_t marks(new_rid, n); + I(same_type(n, rn)); + I(same_type(n, ln)); + I(lmi->second.birth_revision == rmi->second.birth_revision); + marking_t marks(lmi->second.birth_revision, new_rid, n); mark_nontrivial_node(ln, rn, lmi->second, rmi->second, left_uncommon_ancestors, right_uncommon_ancestors, new_rid, n, marks); @@ -1178,8 +1273,7 @@ node_id handle_new(node_id nid) { node_t n = r.get_node(nid); - n->birth_revision = rid; - markings.insert(make_pair(nid, marking_t(rid, n))); + markings.insert(make_pair(nid, marking_t(rid, rid, n))); return nid; } @@ -1236,7 +1330,7 @@ unify_rosters(result, from_left_er.new_nodes, from_right_r, from_right_er.new_nodes, new_ids, tnis); - + I(result == from_right_r); } // SPEEDUP?: instead of constructing new marking from scratch, track which @@ -1251,6 +1345,7 @@ new_rid, result, marking); } + void make_roster_for_nonmerge(cset const & cs, revision_id const & parent_rid, revision_id const & new_rid, @@ -1308,6 +1403,8 @@ // 'local' roster and marking symbols string const ident("ident"); string const birth("birth"); + string const dormant_attr("dormant_attr"); + string const path_mark("path_mark"); string const content_mark("content_mark"); string const attr_mark("attr_mark"); @@ -1315,14 +1412,15 @@ } -static inline void -push_local_parts(basic_io::stanza & st, - node_t curr, - marking_t const & mark) +static void +push_marking(basic_io::stanza & st, + node_t curr, + marking_t const & mark) { - st.push_str_pair(syms::ident, lexical_cast(curr->self)); - st.push_hex_pair(syms::birth, curr->birth_revision.inner()()); + I(!null_id(mark.birth_revision)); + st.push_hex_pair(syms::birth, mark.birth_revision.inner()()); + for (set::const_iterator i = mark.parent_name.begin(); i != mark.parent_name.end(); ++i) st.push_hex_pair(syms::path_mark, i->inner()()); @@ -1341,13 +1439,54 @@ { map >::const_iterator am = mark.attrs.find(i->first); I(am != mark.attrs.end()); - for (set::const_iterator i = am->second.begin(); - i != am->second.end(); ++i) - st.push_hex_pair(syms::content_mark, i->inner()()); + for (set::const_iterator j = am->second.begin(); + j != am->second.end(); ++j) + st.push_hex_triple(syms::attr_mark, i->first(), j->inner()()); } } +void +parse_marking(basic_io::parser & pa, + node_t n, + marking_t & marking) +{ + while (pa.symp()) + { + string rev; + if (pa.symp(syms::birth)) + { + pa.sym(); + pa.hex(rev); + marking.birth_revision = revision_id(rev); + } + else if (pa.symp(syms::path_mark)) + { + pa.sym(); + pa.hex(rev); + safe_insert(marking.parent_name, revision_id(rev)); + } + else if (pa.symp(syms::content_mark)) + { + pa.sym(); + pa.hex(rev); + safe_insert(marking.file_content, revision_id(rev)); + } + else if (pa.symp(syms::attr_mark)) + { + string k; + pa.sym(); + pa.str(k); + pa.hex(rev); + attr_key key = attr_key(k); + I(n->attrs.find(key) != n->attrs.end()); + safe_insert(marking.attrs[key], revision_id(rev)); + } + else break; + } +} + + void roster_t::print_to(basic_io::printer & pr, marking_map const & mm, @@ -1375,6 +1514,14 @@ st.push_hex_pair(syms::content, ftmp->content.inner()()); // L(F("printing file %s\n") % fp); } + + if (print_local_parts) + { + I(curr->self != the_null_node); + st.push_str_pair(syms::ident, lexical_cast(curr->self)); + } + + // Push the non-dormant part of the attr map for (full_attr_map_t::const_iterator j = curr->attrs.begin(); j != curr->attrs.end(); ++j) { @@ -1386,32 +1533,143 @@ } } - // Now, we *might* print an extended roster here, including the - // marking, assuming we're writing the internal - // (local-to-this-database) form of the roster. - if (print_local_parts) { + // Push the dormant part of the attr map + for (full_attr_map_t::const_iterator j = curr->attrs.begin(); + j != curr->attrs.end(); ++j) + { + if (!j->second.first) + { + I(j->second.second().empty()); + st.push_str_pair(syms::dormant_attr, j->first()); + } + } + marking_map::const_iterator m = mm.find(curr->self); I(m != mm.end()); - push_local_parts(st, curr, m->second); + push_marking(st, curr, m->second); } + pr.print_stanza(st); } } + void roster_t::parse_from(basic_io::parser & pa, - marking_map & mm, - bool parse_local_parts) + marking_map & mm) { - // FIXME: implement - I(false); + // We *always* parse the local part of a roster, because we do not + // actually send the non-local part over the network; the only times + // we serialize a manifest (non-local roster) is when we're printing + // it out for a user, or when we're hashing it for a manifest ID. + nodes.clear(); + root_dir.reset(); + mm.clear(); + + while(pa.symp()) + { + string pth, ident, rev; + node_t n; + + if (pa.symp(syms::file)) + { + string content; + pa.sym(); + pa.str(pth); + pa.esym(syms::content); + pa.hex(content); + pa.esym(syms::ident); + pa.str(ident); + n = file_t(new file_node(lexical_cast(ident), + file_id(content))); + } + else if (pa.symp(syms::dir)) + { + pa.sym(); + pa.str(pth); + pa.esym(syms::ident); + pa.str(ident); + n = dir_t(new dir_node(lexical_cast(ident))); + } + else + break; + + I(static_cast(n)); + + safe_insert(nodes, make_pair(n->self, n)); + if (is_dir_t(n) && pth.empty()) + { + I(! has_root()); + root_dir = downcast_to_dir_t(n); + } + else + { + I(!pth.empty()); + attach_node(n->self, internal_string_to_split_path(pth)); + } + + // Non-dormant attrs + while(pa.symp(syms::attr)) + { + pa.sym(); + string k, v; + pa.str(k); + pa.str(v); + safe_insert(n->attrs, make_pair(attr_key(k), + make_pair(true, attr_value(v)))); + } + + // Dormant attrs + while(pa.symp(syms::dormant_attr)) + { + pa.sym(); + string k; + pa.str(k); + safe_insert(n->attrs, make_pair(attr_key(k), + make_pair(false, attr_value()))); + } + + { + marking_t marking; + parse_marking(pa, n, marking); + safe_insert(mm, make_pair(n->self, marking)); + } + } } +void +read_roster_and_marking(data const & dat, + roster_t & ros, + marking_map & mm) +{ + std::istringstream iss(dat()); + basic_io::input_source src(iss, "roster"); + basic_io::tokenizer tok(src); + basic_io::parser pars(tok); + ros.parse_from(pars, mm); + I(src.lookahead == EOF); + ros.check_sane(mm); +} +void +write_roster_and_marking(roster_t const & ros, + marking_map const & mm, + data & dat, + bool print_local_parts) +{ + ros.check_sane(mm); + std::ostringstream oss; + basic_io::printer pr(oss); + ros.print_to(pr, mm, print_local_parts); + dat = data(oss.str()); +} + + + //////////////////////////////////////////////////////////////////// // testing //////////////////////////////////////////////////////////////////// ======================================================================== --- roster4.hh 3103b059c8adac3b1dd2f1315f33284e8f3c811d +++ roster4.hh a9ca6e008ebf3d1c6f1c084d495f6f9411106bcf @@ -43,7 +43,7 @@ struct node { node(); - revision_id birth_revision; + node(node_id i); node_id self; node_id parent; // the_null_node iff this is a root dir path_component name; // the_null_component iff this is a root dir @@ -58,6 +58,8 @@ struct dir_node : public node { + dir_node(); + dir_node(node_id i); dir_map children; node_t get_child(path_component const & pc) const; void attach_child(path_component const & pc, node_t child); @@ -72,6 +74,8 @@ struct file_node : public node { + file_node(); + file_node(node_id i, file_id const & f); file_id content; // need a virtual function to make dynamic_cast work @@ -115,10 +119,17 @@ struct marking_t { + revision_id birth_revision; std::set parent_name; std::set file_content; std::map > attrs; - marking_t(revision_id const & birth_rid, node_t n); + marking_t(); + marking_t(revision_id const & birth_rid, + revision_id const & current_rid, + node_t n); + marking_t freshen(node_t old_node, + node_t new_node, + revision_id const & current_rid) const; }; typedef std::map marking_map; @@ -170,8 +181,7 @@ bool print_local_parts) const; void parse_from(basic_io::parser & pa, - marking_map & mm, - bool parse_local_parts); + marking_map & mm); private: void check_finite_depth() const;