# # patch "ChangeLog" # from [33d2af6dcdcf261a25b3bd62d38f4ab7b518d9ac] # to [970b66b917a432098f4999830d8ed694b00e1788] # # patch "commands.cc" # from [3bb4a480d7b67398096823f00743f43d6eb1404a] # to [1d3b380ab21c137774d25d72d85a063cee28c53b] # # patch "pcdv.cc" # from [344d7cbbc185228a0d22d2a3db6c94bf85d828f0] # to [3013fda6802159117186282ee281185ae301b64a] # # patch "pcdv.hh" # from [144f8837c224ee7a70d28b9bda553ed00da7e4b5] # to [30c19d348e8315aa42fec1f521ea514c45da2839] # =============================================== --- ChangeLog 33d2af6dcdcf261a25b3bd62d38f4ab7b518d9ac +++ ChangeLog 970b66b917a432098f4999830d8ed694b00e1788 @@ -1,3 +1,8 @@ +2005-07-29 Timothy Brownawell + + * commands.cc (pcdv): Try to make pcdv driver more efficient. + * pcdv.{cc,hh}: Handle the interners better. + 2005-07-09 Timothy Brownawell * pcdv.{cc,hh}: More speedups; use interners to work with ints instead =============================================== --- commands.cc 3bb4a480d7b67398096823f00743f43d6eb1404a +++ commands.cc 1d3b380ab21c137774d25d72d85a063cee28c53b @@ -3794,28 +3794,119 @@ vector -get_file(revision_id const & rev, string const & filename, app_state & app) +get_file(file_id const & ident, app_state & app) { - if (null_id(rev)) - return vector(); + file_data dat; + app.db.get_file_version(ident, dat); + std::string const & in(dat.inner()()); + vector out; + + // like split_into_lines, but keep the line-end characters + // line-end is "\r", "\n", "\r\n", or "\n\r" + std::string::size_type begin = 0; + std::string::size_type end = in.find_first_of("\r\n", begin); + while (end != std::string::npos && end >= begin) + { + if (in.size() > end+1 + && (in.at(end+1) == '\n' + || in.at(end+1) == '\r')) + end += 2; + else + end += 1; + out.push_back(in.substr(begin, end-begin)); + begin = end; + if (begin >= in.size()) + break; + end = in.find_first_of("\r\n", begin); + } + if (begin < in.size()) + out.push_back(in.substr(begin, in.size() - begin)); + + return out; +} + +typedef std::map > > pmap; +typedef std::map > > cmap; + +void +get_fileids(revision_id const & start, + file_path const & fp, + std::map > & fileids, + pmap & parents, + cmap & children, + vector & roots, + app_state & app) +{ manifest_id mid; - app.db.get_revision_manifest(rev, mid); - file_path fp(filename); + app.db.get_revision_manifest(start, mid); manifest_map m; app.db.get_manifest(mid, m); manifest_map::const_iterator i = m.find(fp); - if(i == m.end()) - return vector(); + I(i != m.end()); file_id ident = manifest_entry_id(i); - file_data dat; - L(F("dumping file %s\n") % ident); - app.db.get_file_version(ident, dat); - vector lines; - split_into_lines(dat.inner()(), "utf8", lines); - for (vector::iterator i = lines.begin(); - i != lines.end(); ++i) - (*i)+='\n'; - return lines; + fileids.insert(make_pair(start, make_pair(ident, fp))); + std::deque > todo; + todo.push_back(make_pair(start, fp)); + ticker num("file_id count", "F", 1); + while (!todo.empty()) + { + file_path const & p(todo.front().second); + revision_set rs; + app.db.get_revision(todo.front().first, rs); + int child_count = 0; + for (edge_map::const_iterator i = rs.edges.begin(); + i != rs.edges.end(); ++i) + { + revision_id oldrev(edge_old_revision(i)); + { + std::pair > p(1, vector()); + p.second.push_back(oldrev); + std::pair + pr(parents.insert(make_pair(todo.front().first, p))); + if (!pr.second) + { + I(++pr.first->second.first <= 2); + pr.first->second.second.push_back(oldrev); + } + + std::pair > c(1, vector()); + c.second.push_back(todo.front().first); + std::pair + cr(children.insert(make_pair(oldrev, c))); + if (!cr.second) + { + ++cr.first->second.first; + cr.first->second.second.push_back(todo.front().first); + } + } + + // already processed it + if (fileids.find(oldrev) != fileids.end()) + continue; + // this is the beginning of time + if (edge_changes(i).rearrangement.has_added_file(p)) + continue; + std::map const & + renames(edge_changes(i).rearrangement.renamed_files); + std::map::const_iterator j = renames.begin(); + while (j != renames.end() && !(j->second == p)) + ++j; + file_path const & mfp((j == renames.end())?fp:j->first); + + manifest_map m; + app.db.get_manifest(edge_old_manifest(i), m); + manifest_map::const_iterator mi = m.find(mfp); + I(mi != m.end()); + file_id ident = manifest_entry_id(mi); + fileids.insert(make_pair(oldrev, make_pair(ident, mfp))); + todo.push_back(make_pair(oldrev, mfp)); + ++child_count; + } + if (!child_count) + roots.push_back(todo.front().first); + ++num; + todo.pop_front(); + } } CMD(pcdv, "debug", "REVISION REVISION FILENAME", @@ -3823,6 +3914,8 @@ OPT_NONE) { pcdv_test(); + if (args.size() == 0) + return; if (args.size() != 3) throw usage(name); @@ -3830,85 +3923,91 @@ revision_id left, right; complete(app, idx(args, 0)(), left); complete(app, idx(args, 1)(), right); + file_path fp(idx(args, 2)()); - typedef std::multimap::iterator gi; - typedef std::map > >::iterator pi; - std::multimap graph; - app.db.get_revision_ancestry(graph); - std::set leaves; - app.db.get_revision_ids(leaves); - std::map > > parents; - std::map child_count; - for (gi i = graph.begin(); i != graph.end(); ++i) - parents.insert(std::make_pair(i->first, - std::make_pair(0, vector()))); - for (gi i = graph.begin(); i != graph.end(); ++i) - { - std::pair > & p(parents[i->second]); - ++p.first; - p.second.push_back(i->first); - } - // first find the set of graph roots - std::list roots; - for (pi i = parents.begin(); i != parents.end(); ++i) - if(i->second.first == 0) - roots.push_back(i->first); + std::map > fileids; + pmap parents; + cmap children; + std::vector rootvect; + get_fileids(left, fp, fileids, parents, children, rootvect, app); + get_fileids(right, fp, fileids, parents, children, rootvect, app); + // now compute the merge history + std::deque roots; + for (vector::const_iterator i = rootvect.begin(); + i != rootvect.end(); ++i) + roots.push_back(*i); + ticker count("Revs in weave", "R", 1); ticker lines("Lines in weave", "L", 1); + ticker unique("Unique lines", "U", 1); map files; file_state empty; - std::set heads; file_state p(empty); - while (!roots.empty()) + bool found_right = false; + bool found_left = false; + while (!roots.empty() && !(found_right && found_left)) { - vector const & ps(parents[roots.front()].second); - if (ps.size() == 0) - p = empty; - else if (ps.size() == 1) + std::map >::const_iterator + i(fileids.find(roots.front())); + if (i != fileids.end()) { - map::iterator i = files.find(ps.front()); - I(i != files.end()); - p = i->second; - } - else - { - I(ps.size() == 2); - map::iterator i = files.find(ps.front()); - I(i != files.end()); - map::iterator j = files.find(ps.back()); - I(j != files.end()); - p = i->second.mash(j->second); - } - vector contents(get_file(roots.front(), idx(args, 2)(), app)); - string r(roots.front().inner()()); - files.insert(std::make_pair(roots.front(),p.resolve(contents, r))); + vector const & ps(parents[roots.front()].second); + if (ps.size() == 0) + p = empty; + else if (ps.size() == 1) + { + map::const_iterator + i = files.find(ps.front()); + if (i != files.end()) + p = i->second; + } + else + { + I(ps.size() == 2); + map::const_iterator + i = files.find(ps.front()); + bool l(i != files.end()); + map::const_iterator + j = files.find(ps.back()); + bool r(j != files.end()); + if (l && r) + p = i->second.mash(j->second); + else if (l) + p = i->second; + else if (r) + p = j->second; + } + vector contents(get_file(i->second.first, app)); + string r(roots.front().inner()()); + files.insert(std::make_pair(roots.front(),p.resolve(contents, r))); - ++count; - lines += (empty.weave->size() - lines.ticks); + ++count; + lines += (empty.weave->size() - lines.ticks); + unique += (empty.itx->first.rev.size() - unique.ticks); - heads.insert(roots.front()); - for (vector::const_iterator i = ps.begin(); - i != ps.end(); ++i) - { - heads.erase(*i); - if (--child_count[*i] == 0 - && left.inner()() != i->inner()() - && right.inner()() != i->inner()()) - files.erase(*i); + found_left = found_left + || (left.inner()() == roots.front().inner()()); + found_right = found_right + || (right.inner()() == roots.front().inner()()); + for (vector::const_iterator i = ps.begin(); + i != ps.end(); ++i) + { + if (--children[*i].first == 0 + && left.inner()() != i->inner()() + && right.inner()() != i->inner()()) + files.erase(*i); + } } - int children = 0; - for (gi i = graph.lower_bound(roots.front()); - i != graph.upper_bound(roots.front()); i++) + + vector const & cs(children[roots.front()].second); + for (vector::const_iterator i = cs.begin(); + i != cs.end(); i++) { - if (--(parents[i->second].first) == 0) - roots.push_back(i->second); - ++children; + if (--(parents[*i].first) == 0) + roots.push_back(*i); } - child_count.insert(make_pair(roots.front(), children)); - graph.erase(roots.front()); - leaves.erase(roots.front()); roots.pop_front(); } @@ -3916,6 +4015,9 @@ N(l != files.end(), F("Not found: %s.") % left); map::iterator r = files.find(right); N(r != files.end(), F("Not found: %s.") % right); + + P(F("Done building history.")); + vector result(l->second.conflict(r->second)); P(F("")); show_conflict(consolidate(result)); =============================================== --- pcdv.cc 344d7cbbc185228a0d22d2a3db6c94bf85d828f0 +++ pcdv.cc 3013fda6802159117186282ee281185ae301b64a @@ -445,10 +445,12 @@ states(new map()) {} -file_state::file_state(vector const & initial, string rev): - weave(new vector()), - itx(new std::pair, - interner >()), +file_state::file_state(vector const & initial, string rev, + boost::shared_ptr > _weave, + boost::shared_ptr, + interner > > _itx): + weave(_weave), + itx(_itx), states(new map()) { revid r(itx->second.intern(rev)); @@ -517,6 +519,7 @@ for (vector::const_iterator i = weave->begin(); i != weave->end(); ++i) { + std::string line(itx->first.lookup(i->line)); map::const_iterator m = states->find(i->id); map::const_iterator o = other.states->find(i->id); bool mm(m != states->end()); @@ -535,7 +538,7 @@ result.push_back(merge_section(left, right)); else result.push_back(merge_section(clean)); - result.push_back(merge_section(itx->first.lookup(i->line))); + result.push_back(merge_section(line)); left.clear(); right.clear(); clean.clear(); @@ -552,11 +555,11 @@ mustright = true; } if (mehave) - left.push_back(itx->first.lookup(i->line)); + left.push_back(line); if (otherhave) - right.push_back(itx->first.lookup(i->line)); + right.push_back(line); if (mergehave) - clean.push_back(itx->first.lookup(i->line)); + clean.push_back(line); } } if (mustright && mustleft) @@ -572,9 +575,7 @@ { if (weave->empty()) { - file_state x(result_, revision); - *weave = *x.weave; - x.weave = weave; + file_state x(result_, revision, weave, itx); return x; } revid rev(itx->second.intern(revision)); @@ -906,14 +907,16 @@ test_file_state() { { - file_state ta(vectorize("abc"), "a"); + file_state orig; + file_state ta(orig.resolve(vectorize("abc"), "a")); file_state tb(ta.resolve(vectorize("bcd"), "b")); vector res(tb.current()); I(res == vectorize("bcd")); } { - file_state ta(vectorize("abc"), "a"); + file_state orig; + file_state ta(orig.resolve(vectorize("abc"), "a")); file_state tb(ta.resolve(vectorize("dabc"), "b")); file_state tc(ta.resolve(vectorize("abce"), "c")); file_state td(tb.mash(tc)); @@ -921,7 +924,8 @@ } { - file_state ta(vectorize("abc"), "a"); + file_state orig; + file_state ta(orig.resolve(vectorize("abc"), "a")); file_state tb(ta.resolve(vectorize("adc"), "b")); file_state tc(tb.resolve(vectorize("abc"), "c")); file_state td(ta.resolve(vectorize("aec"), "d")); @@ -933,7 +937,8 @@ } { - file_state ta(vectorize("abc"), "a"); + file_state orig; + file_state ta(orig.resolve(vectorize("abc"), "a")); file_state tb(ta.resolve(vectorize("adc"), "b")); file_state tc(ta.resolve(vectorize("aec"), "c")); =============================================== --- pcdv.hh 144f8837c224ee7a70d28b9bda553ed00da7e4b5 +++ pcdv.hh 30c19d348e8315aa42fec1f521ea514c45da2839 @@ -132,7 +132,10 @@ boost::shared_ptr, interner > > _itx); file_state(); - file_state(vector const & initial, string rev); + file_state(vector const & initial, string rev, + boost::shared_ptr > _weave, + boost::shared_ptr, + interner > > _itx); ~file_state();