# # patch "database.cc" # from [b38cff6e9f0d3727afb7a3fc6113d4d959929790] # to [a2c4dd49452b29eee2a1385541e55129060831be] # # patch "database.hh" # from [72206f8334e585a3866b88bc251c496f131d3515] # to [cd0bfbd78e434d93784fa21cd7e565f6e035497e] # # patch "revision.cc" # from [e52e14519a7caff8718e3679d88952b909080797] # to [abdb91c365c3e27be20f50508df16457056f3a61] # # patch "roster4.cc" # from [e972383b3dae879a54561811da0611ce7357d611] # to [49a3aad6c40cbee029d6602f295aa9141e9219c0] # # patch "roster4.hh" # from [8b007fd8271c97603857b53ff95f3aa4f8834ff4] # to [8e362ef36b54ab8eae6550a5bd47e2da5e0efba7] # # patch "schema_migration.cc" # from [38c6a6d8a82751cc80ff017b0a7f863d0b50eac6] # to [80bcca9958dc4c04852ccec8fcf9f9589f0b85c3] # # patch "transforms.cc" # from [05437d611e11ac3f6a0cbef9eb352a112c60d401] # to [ec5692a9ca5515b806c1e3b8c5ab3fe303f1f140] # # patch "transforms.hh" # from [934c5f0c68d4ee8c18a2237398d43a59107e067a] # to [ad5b4e603818a127c3b5a1c3eb365fe32d90da00] # ======================================================================== --- database.cc b38cff6e9f0d3727afb7a3fc6113d4d959929790 +++ database.cc a2c4dd49452b29eee2a1385541e55129060831be @@ -1346,6 +1346,7 @@ dat = rdat; } + void database::put_revision(revision_id const & new_id, revision_set const & rev) @@ -1353,20 +1354,36 @@ I(!null_id(new_id)); I(!revision_exists(new_id)); + + rev.check_sane(); revision_data d; + write_revision_set(rev, d); - rev.check_sane(); + // Phase 1: confirm the revision makes sense + { + revision_id tmp; + calculate_ident(d, tmp); + I(tmp == new_id); + } - write_revision_set(rev, d); - revision_id tmp; - calculate_ident(d, tmp); - I(tmp == new_id); + transaction_guard guard(*this); + + // Phase 2: construct a new roster and sanity-check its manifest_id + // against the manifest_id of the revision you're writing + roster_t ros; + marking_map mm; + { + manifest_id roster_manifest_id; + make_roster_for_revision(rev, new_id, ros, mm, *__app); + calculate_ident(ros, mm, roster_manifest_id); + I(rev.new_manifest == roster_manifest_id); + } + // Phase 3: Write the revision data + base64 > d_packed; pack(d.inner(), d_packed); - transaction_guard guard(*this); - execute("INSERT INTO revisions VALUES(?, ?)", new_id.inner()().c_str(), d_packed().c_str()); @@ -1379,14 +1396,14 @@ new_id.inner()().c_str()); } -/* -// FIXME_ROSTERS: disabled until rewritten to use rosters - check_sane_history(new_id, constants::verify_depth, *__app); -*/ + // Phase 4: write the roster data and commit + put_roster(new_id, ros, mm); + guard.commit(); } -void + +void database::put_revision(revision_id const & new_id, revision_data const & dat) { @@ -1399,6 +1416,8 @@ void database::delete_existing_revs_and_certs() { + execute("DELETE FROM rosters"); + execute("DELETE FROM roster_deltas"); execute("DELETE FROM revisions"); execute("DELETE FROM revision_ancestry"); execute("DELETE FROM revision_certs"); ======================================================================== --- database.hh 72206f8334e585a3866b88bc251c496f131d3515 +++ database.hh cd0bfbd78e434d93784fa21cd7e565f6e035497e @@ -200,6 +200,13 @@ delta const & del, database & db); + void get_roster_id_for_revision(revision_id const & rev_id, + hexenc & roster_id); + + void put_roster(revision_id const & rev_id, + roster_t & roster, + marking_map & marks); + void check_filename(); void open(); @@ -233,12 +240,6 @@ void get_file_version(file_id const & id, file_data & dat); - // get file delta if it exists, else calculate it. - // both manifests must exist. - void get_file_delta(file_id const & src, - file_id const & dst, - file_delta & del); - // put file w/o predecessor into db void put_file(file_id const & new_id, file_data const & dat); @@ -263,12 +264,6 @@ void get_manifest(manifest_id const & id, manifest_map & mm); - // get manifest delta if it exists, else calculate it. - // both manifests must exist. - void get_manifest_delta(manifest_id const & src, - manifest_id const & dst, - manifest_delta & del); - // put manifest w/o predecessor into db void put_manifest(manifest_id const & new_id, manifest_data const & dat); @@ -303,10 +298,10 @@ revision_data & dat); void put_revision(revision_id const & new_id, - revision_set const & cs); + revision_set const & rev); void put_revision(revision_id const & new_id, - revision_data const & dat); + revision_data const & dat); void delete_existing_revs_and_certs(); @@ -435,13 +430,6 @@ // roster and node_id stuff - void get_roster_id_for_revision(revision_id const & rev_id, - hexenc & roster_id); - - void put_roster(revision_id const & rev_id, - roster_t & roster, - marking_map & marks); - void get_roster(revision_id const & rid, roster_t & roster, marking_map & marks); ======================================================================== --- revision.cc e52e14519a7caff8718e3679d88952b909080797 +++ revision.cc abdb91c365c3e27be20f50508df16457056f3a61 @@ -1076,6 +1076,7 @@ concatenate_change_sets(start_to_ca, ca_to_end, composed); } +*/ // Stuff related to rebuilding the revision graph. Unfortunately this is a // real enough error case that we need support code for it. @@ -1086,7 +1087,7 @@ manifest_id const & parent, manifest_id const & child, std::set const & need_history_splitting, - change_set & cs) + cset & cs) { manifest_map m_parent, m_child; @@ -1337,26 +1338,8 @@ { transaction_guard guard(app.db); if (existing_graph) - app.db.delete_existing_revs_and_certs(); - - std::set parents, children, heads; - for (std::multimap::const_iterator i = ancestry.begin(); - i != ancestry.end(); ++i) - { - children.insert(i->first); - parents.insert(i->second); - } - set_difference(children.begin(), children.end(), - parents.begin(), parents.end(), - std::inserter(heads, heads.begin())); - - // FIXME: should do a depth-first traversal here, or something like, - // instead of being recursive. - for (std::set::const_iterator i = heads.begin(); - i != heads.end(); ++i) - { - construct_revision_from_ancestry(*i); - } + app.db.delete_existing_revs_and_certs(); + construct_revisions_from_ancestry(); write_certs(); guard.commit(); } @@ -1440,8 +1423,144 @@ return node; } -// FIXME: this is recursive -- stack depth grows as ancestry depth -- and will -// overflow the stack on large histories. +typedef std::map, + boost::shared_ptr + > > +parent_roster_map; + +static void +insert_into_roster_reusing_parent_entries(file_path const & pth, + file_id const & fid, + parent_roster_map const & parent_rosters, + temp_node_id_source & nis, + roster_t const & child_roster) +{ + node_id nid = create_file_node(i->second, nis); + split_path sp; + i->first.split(sp); + for (parent_roster_map::const_iterator j = parent_rosters.begin(); + j != parent_rosters.end(); ++j) + { + // We use a stupid heuristic: first parent who has + // a file with the same name gets the node + // identity copied forward. Anything else is + // considered a new file. + boost::shared_ptr ros = j->second.first; + if (ros->has_node(sp)) + { + node_t existing_node = ros->get_node(sp); + if (is_file_t(existing_node)) + { + child_roster.replace_node_id(nid, existing_node->self); + nid = existing_node->self; + break; + } + } + } + child_roster.attach_node(); + //... +} + +void +anc_graph::construct_revisions_from_ancestry() +{ + // This is an incredibly cheesy, and also reasonably simple sorting + // system: we put all the root nodes in the work queue. we take a + // node out of the work queue and check if its parents are done. if + // they are, we process it and insert its children. otherwise we put + // it back on the end of the work queue. This both ensures that we're + // always processing something *like* a frontier, while avoiding the + // need to worry about one side of the frontier advancing faster than + // another. + + std::multimap parent_to_child_map; + std::deque work; + std::set done; + + { + std::set parents, children; + for (std::multimap::const_iterator i = ancestry.begin(); + i != ancestry.end(); ++i) + { + parent_to_child_map.insert(std::make_pair(i->second, i->first)); + children.insert(i->first); + parents.insert(i->second); + } + + set_difference(parents.begin(), parents.end(), + children.begin(), children.end(), + std::back_inserter(work)); + } + + while (!work.empty()) + { + + u64 child = work.front(); + work.pop_front(); + typedef std::multimap::const_iterator ci; + std::pair parent_range = ancestry.equal_range(child); + set parents; + bool parents_all_done = true; + for (ci i = parent_range.first; parents_all_done && i != parent_range.second; ++i) + { + if (i->first != child) + continue; + u64 parent = i->second; + if (done.find(parent) == done.end()) + { + work.push_back(child); + parents_all_done = false; + } + else + parents.insert(parent); + } + + if (parents_all_done + && (node_to_new_rev.find(child) == node_to_new_rev.end())) + { + L(F("processing node %d\n") % child); + revision_set rev; + + manifest_id old_child_mid; + get_node_manifest(child, old_child_mid); + manifest_map old_child_man; + app.db.get_manifest(old_child_mid, old_child_man); + + // Load all the parent rosters into a temporary roster map + parent_roster_map parent_rosters; + + for (ci i = parent_range.first; parents_all_done && i != parent_range.second; ++i) + { + if (i->first != child) + continue; + u64 parent = i->second; + if (parent_rosters.find(parent) == parent_rosters.end()) + { + boost::shared_ptr ros = boost::shared_ptr(new roster_t()); + boost::shared_ptr mm = boost::shared_ptr(new marking_map()); + app.db.get_roster(safe_get(node_to_new_rev, parent), *ros, *mm); + safe_insert(parent_rosters, std::make_pair(ros, mm)); + } + } + + // Now convert the old-skool manifest into a cset adding all + // the files in question, and build a roster out of that. + roster_t child_roster; + temp_node_id_source nis; + { + for (manifest_map::const_iterator i = old_child_man.begin(); + i != old_child_man.end(); ++i) + { + insert_into_roster_reusing_parent_entries(i->first, i->second, + parent_rosters, + nis, child_roster); + } + } + } + } +} + revision_id anc_graph::construct_revision_from_ancestry(u64 child) { ======================================================================== --- roster4.cc e972383b3dae879a54561811da0611ce7357d611 +++ roster4.cc 49a3aad6c40cbee029d6602f295aa9141e9219c0 @@ -459,7 +459,32 @@ return d->get_child(basename); } +bool +roster_t::has_node(split_path const & sp) const +{ + split_path dirname; + path_component basename; + dirname_basename(sp, dirname, basename); + if (dirname.empty()) + { + I(null_name(basename)); + return has_root(); + } + + I(has_root()); + dir_t d = root_dir; + for (split_path::const_iterator i = dirname.begin()+1; i != dirname.end(); ++i) + { + if (d->children.find(*i) == d->children.end()) + return false; + d = downcast_to_dir_t(d->get_child(*i)); + } + return d->children.find(basename) != d->children.end(); +} + + + node_t roster_t::get_node(node_id nid) const { @@ -847,20 +872,6 @@ }; - struct temp_node_id_source - : public node_id_source - { - temp_node_id_source() : curr(first_temp_node) {} - virtual node_id next() - { - node_id n = curr++; - I(temp_node(n)); - return n; - } - node_id curr; - }; - - struct true_node_id_source : public node_id_source { ======================================================================== --- roster4.hh 8b007fd8271c97603857b53ff95f3aa4f8834ff4 +++ roster4.hh 8e362ef36b54ab8eae6550a5bd47e2da5e0efba7 @@ -153,6 +153,7 @@ roster_t(roster_t const & other); roster_t & operator=(roster_t const & other); bool has_root() const; + bool has_node(split_path const & sp) const; node_t get_node(split_path const & sp) const; node_t get_node(node_id n) const; void get_name(node_id n, split_path & sp) const; @@ -203,6 +204,19 @@ friend void dump(roster_t const & val, std::string & out); }; +struct temp_node_id_source + : public node_id_source +{ + temp_node_id_source() : curr(first_temp_node) {} + virtual node_id next() + { + node_id n = curr++; + I(temp_node(n)); + return n; + } + node_id curr; +}; + void dump(roster_t const & val, std::string & out); struct app_state; ======================================================================== --- schema_migration.cc 38c6a6d8a82751cc80ff017b0a7f863d0b50eac6 +++ schema_migration.cc 80bcca9958dc4c04852ccec8fcf9f9589f0b85c3 @@ -764,6 +764,41 @@ return true; } +static bool +migrate_client_to_add_rosters(sqlite3 * sql, + char ** errmsg) +{ + int res; + + res = sqlite3_exec(sql, + "CREATE TABLE rosters\n" + "(\n" + "id primary key, -- strong hash of the roster\n" + "rev_id not null unique, -- strong hash of associated revision\n" + "data not null -- compressed, encoded contents of the roster\n" + ");", + NULL, NULL, errmsg); + if (res != SQLITE_OK) + return false; + + res = sqlite3_exec(sql, + "CREATE TABLE roster_deltas\n" + "(\n" + "id not null, -- strong hash of the roster\n" + "rev_id not null, -- strong hash of associated revision\n" + "base not null, -- joins with either rosters.id or roster_deltas.id\n" + "delta not null, -- rdiff to construct current from base\n" + "unique(id, base),\n" + "unique(rev_id, base)\n" + ");", + NULL, NULL, errmsg); + if (res != SQLITE_OK) + return false; + + return true; +} + + void migrate_monotone_schema(sqlite3 *sql) { ======================================================================== --- transforms.cc 05437d611e11ac3f6a0cbef9eb352a112c60d401 +++ transforms.cc ec5692a9ca5515b806c1e3b8c5ab3fe303f1f140 @@ -322,6 +322,20 @@ ident = tid; } +// Variant which calculates the "manifest part" of a roster; this does +// not include the local sequence numbers or markings, but produces +// the manifest_id which is stored in the public revision_set object. +void calculate_ident(roster_t const & ros, + marking_map const & mm, + manifest_id & ident) +{ + data tmp; + hexenc tid; + write_roster_and_marking(ros, mm, tmp, false); + calculate_ident(tmp, tid); + ident = tid; +} + // this might reasonably go in file_io.cc too... void calculate_ident(file_path const & file, ======================================================================== --- transforms.hh 934c5f0c68d4ee8c18a2237398d43a59107e067a +++ transforms.hh ad5b4e603818a127c3b5a1c3eb365fe32d90da00 @@ -10,6 +10,7 @@ #include "lua.hh" #include "manifest.hh" #include "vocab.hh" +#include "roster4.hh" #include @@ -135,7 +136,14 @@ void calculate_ident(revision_set const & cs, revision_id & ident); +// Variant which calculates the "manifest part" of a roster; this does +// not include the local sequence numbers or markings, but produces +// the manifest_id which is stored in the public revision_set object. +void calculate_ident(roster_t const & ros, + marking_map const & mm, + manifest_id & ident); + // quick streamy variant which doesn't necessarily load the whole file void calculate_ident(file_path const & file,