# # # patch "ChangeLog" # from [456bc664b93334a01a45ac8116d46259737f3d61] # to [74073c312a4d49453c1ddaa7963c5ea5dd85f474] # # patch "annotate.cc" # from [6670e1fa98971830139378770a1e2669894d190b] # to [dfd575fc0c005379cb8020a120c6345ead2cd451] # ============================================================ --- ChangeLog 456bc664b93334a01a45ac8116d46259737f3d61 +++ ChangeLog 74073c312a4d49453c1ddaa7963c5ea5dd85f474 @@ -1,3 +1,8 @@ +2006-11-27 Thomas Moschny + + * annotate.cc (do_annotate, do_annotate_node): use markings + together with heights in order to significantly speed up annotate. + 2006-11-22 Patrick Mauritz * rev_height.cc: add memcmp to global namespace for sunpro. ============================================================ --- annotate.cc 6670e1fa98971830139378770a1e2669894d190b +++ annotate.cc dfd575fc0c005379cb8020a120c6345ead2cd451 @@ -7,10 +7,10 @@ // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // PURPOSE. -#include #include #include #include +#include #include @@ -28,12 +28,11 @@ #include "transforms.hh" #include "ui.hh" #include "vocab.hh" +#include "rev_height.hh" -using std::auto_ptr; using std::back_insert_iterator; using std::back_inserter; using std::cout; -using std::deque; using std::endl; using std::make_pair; using std::map; @@ -41,6 +40,8 @@ using std::vector; using std::set; using std::string; using std::vector; +using std::pair; +using std::priority_queue; using boost::shared_ptr; @@ -99,7 +100,6 @@ private: }; - /* An annotate_lineage_mapping tells you, for each line of a file, where in the ultimate descendent of interest (UDOI) the line came @@ -141,7 +141,6 @@ private: vector mapping; }; - interner annotate_lineage_mapping::in; @@ -175,51 +174,22 @@ struct annotate_node_work }; -class lineage_merge_node +struct rev_cmp { -public: - typedef shared_ptr splm; - - lineage_merge_node(lineage_merge_node const & m) - : work(m.work), - incoming_edges(m.incoming_edges), - completed_edges(m.completed_edges) - {} - - lineage_merge_node(annotate_node_work wu, size_t incoming) - : work(wu), - incoming_edges(incoming), - completed_edges(1) - {} - - void merge(splm incoming, - shared_ptr const & acp) + bool dir; + rev_cmp(bool _dir) : dir(_dir) {} + bool operator() (pair const & x, + pair const & y) const { - work.lineage->merge(*incoming, acp); completed_edges++; + return dir ? (x.first < y.first) : (x.first > y.first); } - - bool iscomplete() const - { - I(completed_edges <= incoming_edges); - return incoming_edges == completed_edges; - } - - annotate_node_work get_work() const - { - I(iscomplete()); - return work; - } - -private: - annotate_node_work work; - size_t incoming_edges; - size_t completed_edges; }; +typedef priority_queue, + vector >, + rev_cmp> rev_queue; - - annotate_context::annotate_context(file_id fid, app_state & app) : annotated_lines_completed(0) { @@ -356,11 +326,11 @@ annotate_context::is_complete() const return false; } - -string cert_string_value(vector< revision > const & certs, - string const & name, - bool from_start, bool from_end, - string const & sep) +static string +cert_string_value(vector< revision > const & certs, + string const & name, + bool from_start, bool from_end, + string const & sep) { for (vector< revision >::const_iterator i = certs.begin(); i != certs.end(); ++i) @@ -386,7 +356,6 @@ string cert_string_value(vector< revisio return ""; } - void annotate_context::build_revisions_to_annotations (app_state & app, @@ -441,7 +410,6 @@ annotate_context::build_revisions_to_ann } } - void annotate_context::dump(app_state & app, bool just_revs) const { @@ -477,7 +445,6 @@ annotate_context::dump(app_state & app, } } - annotate_lineage_mapping::annotate_lineage_mapping(file_data const & data) { // split into lines @@ -543,7 +510,6 @@ annotate_lineage_mapping::init_with_line "ending with %d entries in mapping\n") % mapping.size()); } - shared_ptr annotate_lineage_mapping::build_parent_lineage (shared_ptr acp, @@ -646,7 +612,6 @@ annotate_lineage_mapping::build_parent_l return parent_lineage; } - void annotate_lineage_mapping::merge(annotate_lineage_mapping const & other, shared_ptr const & acp) @@ -687,7 +652,6 @@ annotate_lineage_mapping::credit_mapped_ } } - void annotate_lineage_mapping::set_copied_all_mapped (shared_ptr acp) const @@ -699,60 +663,44 @@ annotate_lineage_mapping::set_copied_all } } - static void do_annotate_node (annotate_node_work const & work_unit, app_state & app, - deque & nodes_to_process, - set & nodes_complete, - map const & paths_to_nodes, - map & pending_merge_nodes) + rev_queue & revs_to_visit, + map & work_units) { L(FL("do_annotate_node for node %s") % work_unit.revision); - I(nodes_complete.find(work_unit.revision) == nodes_complete.end()); - // nodes_seen.insert(make_pair(work_unit.revision, work_unit.lineage)); roster_t roster; - marking_map markmap; - app.db.get_roster(work_unit.revision, roster, markmap); marking_t marks; - map::const_iterator mmi = - markmap.find(work_unit.fid); - I(mmi != markmap.end()); - marks = mmi->second; + { + marking_map markmap; + app.db.get_roster(work_unit.revision, roster, markmap); - if (marks.file_content.size() == 0) - { - L(FL("found empty content-mark set at rev %s") - % work_unit.revision); - work_unit.lineage->credit_mapped_lines(work_unit.annotations); - work_unit.annotations->evaluate(work_unit.revision); - nodes_complete.insert(work_unit.revision); - return; - } + map::const_iterator mmi = + markmap.find(work_unit.fid); + I(mmi != markmap.end()); + marks = mmi->second; + } - set parents; + I(marks.file_content.size() != 0); - // If we have content-marks which are *not* equal to the current - // rev, we can jump back to them directly. If we have only a - // content-mark equal to the current rev, it means we made a - // decision here, and we must move to the immediate parent revs. - // - // Unfortunately, while this algorithm *could* use the marking - // information as suggested above, it seems to work much better - // (especially wrt. merges) when it goes rev-by-rev, so we leave it - // that way for now. + set parents; // fixme: rename - // if (marks.file_content.size() == 1 - // && *(marks.file_content.begin()) == work_unit.revision) + if (marks.file_content.size() == 1 + && *(marks.file_content.begin()) == work_unit.revision) + { + // this node is marked, need to inspect the parents + app.db.get_revision_parents(work_unit.revision, parents); + } + else + { + // jump directly to the marked ancestors + parents = marks.file_content; + } - app.db.get_revision_parents(work_unit.revision, parents); - - // else - // parents = marks.file_content; - size_t added_in_parent_count = 0; for (set::const_iterator i = parents.begin(); @@ -805,10 +753,10 @@ do_annotate_node // If this parent has not yet been queued for processing, create the // work unit for it. - map::iterator lmn - = pending_merge_nodes.find(parent_revision); + map::iterator lmn + = work_units.find(parent_revision); - if (lmn == pending_merge_nodes.end()) + if (lmn == work_units.end()) // not yet seen { // Once we move on to processing the parent that this file was // renamed from, we'll need the old name @@ -817,26 +765,12 @@ do_annotate_node parent_revision, work_unit.fid); - map::const_iterator ptn - = paths_to_nodes.find(parent_revision); - - if (ptn->second > 1) - { - lineage_merge_node nmn(newunit, ptn->second); - pending_merge_nodes.insert(make_pair(parent_revision, nmn)); - L(FL("put new merge node on pending_merge_nodes " - "for parent %s\n") - % parent_revision); - // just checking... - //(pending_merge_nodes.find(parent_revision))->second.dump(); - } - else - { - L(FL("single path to node, just stick work on the queue " - "for parent %s\n") - % parent_revision); - nodes_to_process.push_back(newunit); - } + work_units.insert(make_pair(parent_revision, newunit)); + { + rev_height height; + app.db.get_rev_height(parent_revision, height); + revs_to_visit.push(make_pair(height, parent_revision)); + } } else { @@ -845,15 +779,8 @@ do_annotate_node // queue. L(FL("merging lineage from node %s to parent %s") % work_unit.revision % parent_revision); - lmn->second.merge(parent_lineage, work_unit.annotations); - //L(FL("after merging from work revision %s to parent %s" - // " lineage_merge_node is:\n") % work_unit.revision - // % parent_revision); lmn->second.dump(); - if (lmn->second.iscomplete()) - { - nodes_to_process.push_back(lmn->second.get_work()); - pending_merge_nodes.erase(lmn); - } + + lmn->second.lineage->merge(*parent_lineage, work_unit.annotations); } } @@ -863,46 +790,9 @@ do_annotate_node } work_unit.annotations->evaluate(work_unit.revision); - nodes_complete.insert(work_unit.revision); } - void -find_ancestors(app_state & app, - revision_id rid, - map & paths_to_nodes) -{ - vector frontier; - frontier.push_back(rid); - - while (!frontier.empty()) - { - revision_id rid = frontier.back(); - frontier.pop_back(); - if(!null_id(rid)) { - set parents; - app.db.get_revision_parents(rid, parents); - for (set::const_iterator i = parents.begin(); - i != parents.end(); ++i) - { - map::iterator found - = paths_to_nodes.find(*i); - - if (found == paths_to_nodes.end()) - { - frontier.push_back(*i); - paths_to_nodes.insert(make_pair(*i, 1)); - } - else - { - (found->second)++; - } - } - } - } -} - -void do_annotate (app_state &app, file_t file_node, revision_id rid, bool just_revs) { L(FL("annotating file %s with content %s in revision %s") @@ -914,29 +804,30 @@ do_annotate (app_state &app, file_t file shared_ptr lineage = acp->initial_lineage(); - set nodes_complete; - map paths_to_nodes; - map pending_merge_nodes; - find_ancestors(app, rid, paths_to_nodes); + // toposorted queue of nodes (revision ids) to visit + rev_queue revs_to_visit(rev_cmp(true)); // fixme + { + rev_height height; + app.db.get_rev_height(rid, height); + revs_to_visit.push(make_pair(height, rid)); + } - // build node work unit - deque nodes_to_process; + // maps rev_id -> workunit + map work_units; annotate_node_work workunit(acp, lineage, rid, file_node->self); - nodes_to_process.push_back(workunit); - - auto_ptr revs_ticker(new ticker(N_("revs done"), "r", 1)); - revs_ticker->set_total(paths_to_nodes.size() + 1); - - while (nodes_to_process.size() && !acp->is_complete()) + work_units.insert(make_pair(rid, workunit)); + + while (!(revs_to_visit.empty() || acp->is_complete())) { - annotate_node_work work = nodes_to_process.front(); - nodes_to_process.pop_front(); - do_annotate_node(work, app, nodes_to_process, nodes_complete, - paths_to_nodes, pending_merge_nodes); - ++(*revs_ticker); + revision_id const & rev = revs_to_visit.top().second; + + map::iterator work = + work_units.find(rev); + I(work != work_units.end()); + do_annotate_node(work->second, app, revs_to_visit, work_units); + revs_to_visit.pop(); } - I(pending_merge_nodes.size() == 0); acp->annotate_equivalent_lines(); I(acp->is_complete());