# # patch "AUTHORS" # from [f99ce1623b725b9eaa0e5755761ede56a26681bc] # to [a7e91001cefb99727ab18a6899ef0743089cb251] # # patch "ChangeLog" # from [3b65978ec3897497e7ebb0f154f9749d67d1c1c8] # to [eed66c4a8a69fce7af58b67d8efbc0e641ae9fb3] # # patch "commands.cc" # from [2fcb423053b7dfe19cb23d9386e0454b3e57fec9] # to [4a88d127eba69c674a66718e8e76a5cd213098a0] # # patch "constants.cc" # from [dcbffe20599c26310957ee7c1310a44ae93dc2a0] # to [7d39c51c90d4e9be993c9cef054af6d52cb8abe6] # # patch "constants.hh" # from [94b12534ecf00b0d10056ab3edf9fad2e38eb300] # to [e44952936dcfaf9f158865b67046394e8805763a] # # patch "database.cc" # from [c81a8c8cfaf1f6e1257d85470af1cd1f19b98e11] # to [4a4bbeb4d61cf981e103a39d899b12a17abb9602] # # patch "database.hh" # from [8eff7740768dec012533217d6771853b705a3254] # to [77ee0f199a83b2174e046d007ee563ece0dc81d0] # # patch "rcs_import.cc" # from [b90ec8325f19892896d9692fc01893c9d8816594] # to [9a42135b663a3f926d78159eb765e19014ece69c] # # patch "tests/t_cvsimport_drepper2.at" # from [c0d315c7c6fd5077592b8d81ecbc8a801acc3e41] # to [530e44ff587e4ad3ce3ca95026f1990f505c8e5e] # --- AUTHORS +++ AUTHORS @@ -62,6 +62,7 @@ Brian Campbell Ethan Blanton Eric Anderson + Jordan Breeding supporting files: ----------------- --- ChangeLog +++ ChangeLog @@ -1,3 +1,25 @@ +2005-07-11 graydon hoare + + * AUTHORS: Add Jordan. + * commands.cc (ls_tags): Do not uniquify tags. + * constants.{cc,hh} (cvs_window): Change to time_t, tighten to 5 minutes. + * rcs_import.cc (window): Remove. + (note_type): Remove dead code. + (is_sbr): Add test for synthetic branch roots. + (cvs_commit::is_synthetic_branch_root): New test. + (process_branch): Skip synthetic branch roots, push new branch + before picking branch to mark, rather than after. + (cvs_history::index_branchpoint_symbols): Handle vendor branches. + (cvs_history::push_branch): Do not duplicate root on private branches. + (import_branch): Fix up cluster inference. + (cluster_consumer::consume_cluster): New invariant. + * tests/t_cvsimport_drepper2.at: Modify to reflect fixes. + +2005-07-11 Jordan Breeding + + * commands.cc (db): New subcommand "kill_tag_locally" + * database.{cc,hh} (delete_tag_named): New function. + 2005-07-08 Matt Johnston * tests/t_normalized_filenames.at: expect exit code of 1 not 3 for @@ -205,7 +227,7 @@ added --no-merges option provides a means to disable the merge entries). -2005-06-26 Matthew Gregan * tests/t_automate_stdio.at, tests/t_cvsimport_drepper.at, tests/t_selector_later_earlier.at: Further canonicalisation of --- commands.cc +++ commands.cc @@ -1623,20 +1623,23 @@ vector< revision > certs; app.db.get_revision_certs(tag_cert_name, certs); - std::map > sorted_certs; + std::set< pair > > sorted_vals; - for (size_t i = 0; i < certs.size(); ++i) + for (vector< revision >::const_iterator i = certs.begin(); + i != certs.end(); ++i) { cert_value name; - decode_base64(idx(certs, i).inner().value, name); - sorted_certs.insert(std::make_pair(name, idx(certs, i))); + cert c = i->inner(); + decode_base64(c.value, name); + sorted_vals.insert(std::make_pair(name, std::make_pair(c.ident, c.key))); } - for (std::map >::const_iterator i = sorted_certs.begin(); - i != sorted_certs.end(); ++i) + for (std::set > >::const_iterator i = sorted_vals.begin(); + i != sorted_vals.end(); ++i) { cout << i->first << " " - << i->second.inner().ident << " " - << i->second.inner().key << endl; + << i->second.first << " " + << i->second.second << endl; } } @@ -2110,6 +2113,7 @@ "execute\n" "kill_rev_locally ID\n" "kill_branch_locally BRANCH\n" + "kill_tag_locally TAG\n" "check\n" "changesetify\n" "rebuild\n" @@ -2150,6 +2154,8 @@ app.db.clear_epoch(cert_value(idx(args, 1)())); else if (idx(args, 0)() == "kill_branch_locally") app.db.delete_branch_named(cert_value(idx(args, 1)())); + else if (idx(args, 0)() == "kill_tag_locally") + app.db.delete_tag_named(cert_value(idx(args, 1)())); else throw usage(name); } @@ -2970,9 +2976,10 @@ // we have the following // - // old --- working - // \ \ - // chosen --- merged + // old --> working + // | | + // V V + // chosen --> merged // // - old is the revision specified in MT/revision // - working is based on old and includes the working copy's changes --- constants.cc +++ constants.cc @@ -32,7 +32,7 @@ // number of seconds in window, in which to consider CVS commits equivalent // if they have otherwise compatible contents (author, changelog) - size_t const cvs_window = 3600 * 3; + time_t const cvs_window = 60 * 5; // number of bytes in a password buffer. further bytes will be dropped. size_t const maxpasswd = 0xfff; --- constants.hh +++ constants.hh @@ -33,7 +33,7 @@ // number of seconds in window, in which to consider CVS commits equivalent // if they have otherwise compatible contents (author, changelog) - extern size_t const cvs_window; + extern time_t const cvs_window; // number of bytes in a password buffer. further bytes will be dropped. extern size_t const maxpasswd; --- database.cc +++ database.cc @@ -1560,6 +1560,17 @@ encoded().c_str()); } +/// Deletes all certs referring to a particular tag. +void +database::delete_tag_named(cert_value const & tag) +{ + base64 encoded; + encode_base64(tag, encoded); + L(F("Deleting all references to tag %s\n") % tag); + execute("DELETE FROM revision_certs WHERE name='tag' AND value ='%s'", + encoded().c_str()); +} + // crypto key management void --- database.hh +++ database.hh @@ -292,6 +292,8 @@ void delete_branch_named(cert_value const & branch); + void delete_tag_named(cert_value const & tag); + // crypto key / cert operations void get_key_ids(std::string const & pattern, --- rcs_import.cc +++ rcs_import.cc @@ -42,8 +42,6 @@ #include "transforms.hh" #include "ui.hh" -int window = 3600 * 3; - using namespace std; using boost::shared_ptr; using boost::scoped_ptr; @@ -67,6 +65,7 @@ file_id const & ident, cvs_history & cvs); + bool is_synthetic_branch_root; time_t time; bool alive; cvs_author author; @@ -92,11 +91,24 @@ map live_at_beginning; vector lineage; + cvs_branch() + : last_branchpoint(0), + first_commit(0) + { + } + void note_commit(time_t now) { + if (!has_a_commit) + { + first_commit = now; + } + else + { + if (now < first_commit) + first_commit = now; + } has_a_commit = true; - if (now < first_commit) - first_commit = now; } void note_branchpoint(time_t now) @@ -108,15 +120,22 @@ time_t beginning() const { - I(has_a_branchpoint || has_a_commit); + I(has_a_branchpoint || has_a_commit); if (has_a_commit) - return first_commit; + { + I(first_commit != 0); + return first_commit; + } else - return last_branchpoint; + { + I(last_branchpoint != 0); + return last_branchpoint; + } } void append_commit(cvs_commit const & c) { + I(c.time != 0); note_commit(c.time); lineage.push_back(c); } @@ -173,15 +192,39 @@ void index_branchpoint_symbols(rcs_file const & r); - - enum note_type { note_branchpoint, - note_branch_first_commit }; - void push_branch(string const & branch_name, bool private_branch); void pop_branch(); }; +static bool +is_sbr(shared_ptr dl, + shared_ptr dt) +{ + + // CVS abuses the RCS format a bit (ha!) when storing a file which + // was only added on a branch: on the root of the branch there'll be + // a commit with dead state, empty text, and a log message + // containing the string "file foo was initially added on branch + // bar". We recognize and ignore these cases, as they do not + // "really" represent commits to be clustered together. + + if (dl->state != "dead") + return false; + + if (!dt->text.empty()) + return false; + + string log_bit = "was initially added on branch"; + string::const_iterator i = search(dt->log.begin(), + dt->log.end(), + log_bit.begin(), + log_bit.end()); + + return i != dt->log.end(); +} + + cvs_commit::cvs_commit(rcs_file const & r, string const & rcs_version, file_id const & ident, @@ -217,8 +260,14 @@ time = mktime(&t); L(F("= %i\n") % time); + is_synthetic_branch_root = is_sbr(delta->second, + deltatext->second); + alive = delta->second->state != "dead"; - changelog = cvs.changelog_interner.intern(deltatext->second->log); + if (is_synthetic_branch_root) + changelog = cvs.changelog_interner.intern("synthetic branch root changelog"); + else + changelog = cvs.changelog_interner.intern(deltatext->second->log); author = cvs.author_interner.intern(delta->second->author); path = cvs.curr_file_interned; version = cvs.file_version_interner.intern(ident.inner()()); @@ -543,8 +592,11 @@ L(F("version %s has %d lines\n") % curr_version % curr_lines->size()); cvs_commit curr_commit(r, curr_version, curr_id, cvs); - cvs.stk.top()->append_commit(curr_commit); - ++cvs.n_versions; + if (!curr_commit.is_synthetic_branch_root) + { + cvs.stk.top()->append_commit(curr_commit); + ++cvs.n_versions; + } string next_version = r.deltas.find(curr_version)->second->next; @@ -569,8 +621,8 @@ { for (ity i = range.first; i != range.second; ++i) { + cvs.push_branch(i->second, false); shared_ptr b = cvs.stk.top(); - cvs.push_branch(i->second, false); if (curr_commit.alive) b->live_at_beginning[cvs.curr_file_interned] = curr_commit.version; b->note_branchpoint(curr_commit.time); @@ -748,25 +800,58 @@ vector components; split_version(num, components); + vector first_entry_components; + vector branchpoint_components; + if (components.size() > 2 && - components[components.size() - 2] == string("0")) + (components.size() % 2 == 1)) + { + // this is a "vendor" branch + // + // such as "1.1.1", where "1.1" is the branchpoint and + // "1.1.1.1" will be the first commit on it. + + first_entry_components = components; + first_entry_components.push_back("1"); + + branchpoint_components = components; + branchpoint_components.erase(branchpoint_components.end() - 1, + branchpoint_components.end()); + + } + + else if (components.size() > 2 && + (components.size() % 2 == 0) && + components[components.size() - 2] == string("0")) { - string first_entry_version; - components[components.size() - 2] = components[components.size() - 1]; - components[components.size() - 1] = string("1"); - join_version(components, first_entry_version); + // this is a "normal" branch + // + // such as "1.3.0.2", where "1.3" is the branchpoint and + // "1.3.2.1" - L(F("first version in branch %s would be %s\n") - % sym % first_entry_version); - branch_first_entries.insert(make_pair(first_entry_version, sym)); + first_entry_components = components; + first_entry_components[first_entry_components.size() - 2] + = first_entry_components[first_entry_components.size() - 1]; + first_entry_components[first_entry_components.size() - 1] + = string("1"); + + branchpoint_components = components; + branchpoint_components.erase(branchpoint_components.end() - 2, + branchpoint_components.end()); + } - string branchpoint_version; - components.erase(components.end() - 2, components.end()); - join_version(components, branchpoint_version); + string first_entry_version; + join_version(first_entry_components, first_entry_version); + + L(F("first version in branch %s would be %s\n") + % sym % first_entry_version); + branch_first_entries.insert(make_pair(first_entry_version, sym)); - L(F("file branchpoint for %s at %s\n") % sym % branchpoint_version); - branchpoints.insert(make_pair(branchpoint_version, sym)); - } + string branchpoint_version; + join_version(branchpoint_components, branchpoint_version); + + L(F("file branchpoint for %s at %s\n") % sym % branchpoint_version); + branchpoints.insert(make_pair(branchpoint_version, sym)); } } @@ -782,8 +867,9 @@ if (private_branch) { - stk.push(stk.top()); - bstk.push(bstk.top()); + branch = shared_ptr(new cvs_branch()); + stk.push(branch); + bstk.push(branch_interner.intern("")); return; } else @@ -1010,22 +1096,25 @@ unsigned long commits_remaining = branch->lineage.size(); // step 1: sort the lineage - sort(branch->lineage.begin(), branch->lineage.end()); + stable_sort(branch->lineage.begin(), branch->lineage.end()); for (vector::const_iterator i = branch->lineage.begin(); i != branch->lineage.end(); ++i) { commits_remaining--; - L(F("examining next commit [t:%d] [a:%d] [c:%d]\n") - % i->time % i->author % i->changelog); + L(F("examining next commit [t:%d] [p:%s] [a:%s] [c:%s]\n") + % i->time + % cvs.path_interner.lookup(i->path) + % cvs.author_interner.lookup(i->author) + % cvs.changelog_interner.lookup(i->changelog)); // step 2: expire all clusters from the beginning of the set which // have passed the window size while (!clusters.empty()) { cluster_set::const_iterator j = clusters.begin(); - if ((*j)->first_time + window < i->time) + if ((*j)->first_time + constants::cvs_window < i->time) { L(F("expiring cluster\n")); cons.consume_cluster(**j, false); @@ -1037,18 +1126,32 @@ // step 3: find the last still-live cluster to have touched this // file - time_t last_modify_time = 0; + time_t time_of_last_cluster_touching_this_file = 0; + + unsigned clu = 0; for (cluster_set::const_iterator j = clusters.begin(); j != clusters.end(); ++j) - { + { + L(F("examining cluster %d to see if it touched %d\n") + % clu++ + % i->path); + cvs_cluster::entry_map::const_iterator k = (*j)->entries.find(i->path); - if (k != (*j)->entries.end() && - k->second.time > last_modify_time) - last_modify_time = k->second.time; + if ((k != (*j)->entries.end()) + && (k->second.time > time_of_last_cluster_touching_this_file)) + { + L(F("found cluster touching %d: [t:%d] [a:%d] [c:%d]\n") + % i->path + % (*j)->first_time + % (*j)->author + % (*j)->changelog); + time_of_last_cluster_touching_this_file = (*j)->first_time; + } } - L(F("last modification time is %d\n") % last_modify_time); + L(F("last modification time is %d\n") + % time_of_last_cluster_touching_this_file); - // step 4: find a cluster which starts after the + // step 4: find a cluster which starts on or after the // last_modify_time, which doesn't modify the file in question, // and which contains the same author and changelog as our // commit @@ -1056,12 +1159,16 @@ for (cluster_set::const_iterator j = clusters.begin(); j != clusters.end(); ++j) { - if (((*j)->first_time > last_modify_time) + if (((*j)->first_time >= time_of_last_cluster_touching_this_file) && ((*j)->author == i->author) && ((*j)->changelog == i->changelog) && ((*j)->entries.find(i->path) == (*j)->entries.end())) { - L(F("picked existing cluster target\n")); + L(F("picked existing cluster [t:%d] [a:%d] [c:%d]\n") + % (*j)->first_time + % (*j)->author + % (*j)->changelog); + target = (*j); } } @@ -1072,7 +1179,10 @@ if (!target) { L(F("building new cluster [t:%d] [a:%d] [c:%d]\n") - % i->time % i->author % i->changelog); + % i->time + % i->author + % i->changelog); + target = cluster_ptr(new cvs_cluster(i->time, i->author, i->changelog)); @@ -1232,7 +1342,7 @@ cvs.file_version_interner.lookup(i->second)); initial_cluster.entries.insert(make_pair(i->first, e)); } - consume_cluster(initial_cluster, false); + consume_cluster(initial_cluster, branch.lineage.empty()); } } @@ -1263,11 +1373,6 @@ { data tmp; write_revision_set(*(i->rev), tmp); - /* - cout << "+++WRITING REVISION" << endl; - cout << tmp << endl; - cout << "---WRITING REVISION" << endl; - */ app.db.put_revision(i->rid, *(i->rev)); store_auxiliary_certs(*i); ++n_revisions; @@ -1417,7 +1522,15 @@ void cluster_consumer::consume_cluster(cvs_cluster const & c, bool head_p) -{ +{ + // we should never have an empty cluster; it's *possible* to have + // an empty changeset (say on a vendor import) but every cluster + // should have been created by at least one file commit, even + // if the commit made no changes. it's a logical inconsistency if + // you have an empty cluster. + I(!c.entries.empty()); + + L(F("BEGIN consume_cluster()\n")); shared_ptr rev(new revision_set()); boost::shared_ptr cs(new change_set()); build_change_set(c, *cs); @@ -1431,11 +1544,11 @@ store_manifest_edge(head_p); - L(F("consumed cluster %s (parent '%s')\n") % child_rid % rev->edges.begin()->first); preps.push_back(prepared_revision(child_rid, rev, c)); // now apply same change set to parent_map, making parent_map == child_map apply_change_set(*cs, parent_map); parent_mid = child_mid; parent_rid = child_rid; + L(F("END consume_cluster('%s') (parent '%s')\n") % child_rid % rev->edges.begin()->first); } --- tests/t_cvsimport_drepper2.at +++ tests/t_cvsimport_drepper2.at @@ -10,9 +10,9 @@ 411cfd008f4a72e433b48d6421733b6a792ca3b7 t/libelf-po/POTFILES.in ]) -AT_DATA(test.tags, [initial f137764bdcf393ecb68b44ed2accd3e574681fcb address@hidden -portable-branch-base d57a26278cb758d02ee64d1c633008bfedc4cefd address@hidden -portable-branch-fork-20050601T0139 d57a26278cb758d02ee64d1c633008bfedc4cefd address@hidden +AT_DATA(test.tags, [initial f5e868d2572c2ae5b50fd93bc8d0cb827e416e50 address@hidden +portable-branch-base 1455aa956c2b095d476e803b8b3d85d8e8509bf7 address@hidden +portable-branch-fork-20050601T0139 1455aa956c2b095d476e803b8b3d85d8e8509bf7 address@hidden ]) AT_DATA(e.tar.gz.enc, [H4sIAEJyw0IAA+1YbW/bOBLOV+tXEOiHSw+xqhdKTuq9g4u03SuQbIumub1vAS3R