# # add_file "annotate.cc" # # add_file "annotate.hh" # # patch "Makefile.am" # from [e06716c8fe396a1b540b37122e303393b1f71067] # to [023e73f473930cafcc8767ee7e09b2e62b4b2ddb] # # patch "annotate.cc" # from [] # to [172ac24cce6a31e77ad1d4a4b92473d6e0f77856] # # patch "annotate.hh" # from [] # to [7567f27180290de13debcdd5e13d46d35a443ff4] # # patch "commands.cc" # from [66bf9df3449b8dec9e3df3dddd4a19150cc3fc51] # to [a38db89027771230319f4518b85386d42ed431f2] # # patch "database.cc" # from [2f0d694a3ff58c86f98ae425dba01c9f0e467578] # to [86ca32f9f4b9c948c428c2bf5b4945f93e4f80be] # --- Makefile.am +++ Makefile.am @@ -9,7 +9,7 @@ constants.cc netsync.cc netcmd.cc merkle_tree.cc basic_io.cc \ mkstemp.cc lcs.cc rcs_import.hh rcs_import.cc revision.cc \ change_set.cc mt_version.cc automate.cc database_check.cc \ - path_component.cc epoch.cc inodeprint.cc \ + path_component.cc epoch.cc inodeprint.cc annotate.cc \ \ app_state.hh commands.hh file_io.hh manifest.hh packet.hh \ sanity.hh update.hh work.hh cert.hh database.hh keys.hh \ @@ -23,7 +23,7 @@ mkstemp.hh mt_version.hh automate.hh database_check.hh smap.hh \ gettext.h package_revision.c package_full_revision.c \ path_component.hh epoch.hh package_full_revision.h \ - package_revision.h inodeprint.hh + package_revision.h inodeprint.hh annotate.hh NETXX_SOURCES = \ netxx/accept.cxx netxx/accept.h netxx/address.cxx \ --- annotate.cc +++ annotate.cc @@ -0,0 +1,463 @@ +// copyright (C) 2005 emile snyder +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +#include +#include + +#include "platform.hh" +#include "vocab.hh" +#include "sanity.hh" +#include "revision.hh" +#include "change_set.hh" +#include "app_state.hh" +#include "manifest.hh" +#include "transforms.hh" +#include "annotate.hh" + + +annotate_context::annotate_context(file_id fid, app_state &app) +{ + // get the file data (and store it here?) + file_data fpacked; + data funpacked; + app.db.get_file_version(fid, fpacked); + //unpack(fpacked.inner(), funpacked); + funpacked = fpacked.inner(); + fdata = funpacked(); +} + + +void +annotate_context::copy_block(off_t start, off_t end) +{ + L(F("annotate_context::copy_block [%d, %d)\n") % start % end); + pending_copy_blocks.insert(std::make_pair(start, end)); +} + + +void +annotate_context::evaluate(revision_id responsible_revision) +{ + pack_blockset(pending_copy_blocks); + + // any block that is not either copied or already assigned is assigned to rev. + // So build a set of all copies and assignments and pack it, then walk over it + // to find the gaps. + std::set copied_or_assigned; + + std::set::const_iterator i; + std::set::const_iterator next; + + for (i = pending_copy_blocks.begin(); i != pending_copy_blocks.end(); i++) { + copied_or_assigned.insert(*i); + } + std::set< std::pair >::const_iterator j; + for (j = assigned_blocks.begin(); j != assigned_blocks.end(); j++) { + copied_or_assigned.insert(j->first); + } + L(F("packing copied_or_assigned\n")); + pack_blockset(copied_or_assigned); + if (copied_or_assigned.size() > 0) { + i = copied_or_assigned.begin(); + if (i->first > 0) { + block b(0, i->first); + L(F("assigning block [%d, %d) <- %s\n") % b.first % b.second % responsible_revision); + assigned_blocks.insert(std::make_pair(b, responsible_revision)); + } + next = i; + next++; + while (next != copied_or_assigned.end()) { + I(i != copied_or_assigned.end()); + I(i->second <= next->first); + if (i->second < next->first) { + block b(i->second, next->first); + L(F("assigning block [%d, %d) <- %s\n") % b.first % b.second % responsible_revision); + assigned_blocks.insert(std::make_pair(b, responsible_revision)); + } + i++; + next++; + } + + if (fdata.size() > i->second) { + block b(i->second, fdata.size()); + L(F("assigning block [%d, %d) <- %s\n") % b.first % b.second % responsible_revision); + assigned_blocks.insert(std::make_pair(b, responsible_revision)); + } + } + + pending_copy_blocks.clear(); +} + + +void +annotate_context::complete(revision_id initial_revision) +{ + if (fdata.size() == 0) + return; + + std::set< std::pair >::const_iterator i; + std::set< std::pair >::const_iterator next; + + i = assigned_blocks.begin(); + + if (i == assigned_blocks.end()) + return; + + if (i->first.first > 0) + i = assigned_blocks.insert(i, std::make_pair(std::make_pair(0, i->first.first), initial_revision)); + + next = i; next++; + while (next != assigned_blocks.end()) { + I(i->first.second <= next->first.first); + + if (i->first.second != next->first.first) { + i = assigned_blocks.insert(i, std::make_pair(std::make_pair(i->first.second, next->first.first), initial_revision)); + next = i; + next++; + continue; + } + + i++; + next++; + } + + if (i->first.second < fdata.size()) + assigned_blocks.insert(std::make_pair(std::make_pair(i->first.second, fdata.size()), initial_revision)); +} + + +bool +annotate_context::is_complete() const +{ + if (fdata.size() == 0) + return true; + + std::set< std::pair >::const_iterator i; + std::set< std::pair >::const_iterator next; + + i = assigned_blocks.begin(); + + if (i == assigned_blocks.end()) + return false; + if (i->first.first > 0) + return false; + + next = i; next++; + while (next != assigned_blocks.end()) { + I(i->first.second <= next->first.first); + if (i->first.second != next->first.first) + return false; + i++; + next++; + } + + if (i->first.second < fdata.size()) + return false; + + return true; +} + + +void +annotate_context::dump() const +{ + std::set< std::pair >::const_iterator i; + for (i = assigned_blocks.begin(); i != assigned_blocks.end(); i++) { + L(F("annotate_context::dump [%d, %d) <- %s\n") % i->first.first % i->first.second % i->second); + } +} + + +void +annotate_context::pack_blockset(std::set &blocks) +{ + L(F("annotate_context::pack_blockset blocks.size() == %d\n") % blocks.size()); + + if (blocks.size() < 2) + return; + + std::set::iterator i, next; + i = blocks.begin(); + next = i; + next++; + + while (i != blocks.end() && next != blocks.end()) { + L(F("annotate_context::pack_blockset test [%d, %d) and [%d, %d) for overlap\n") + % i->first % i->second % next->first % next->second); + + if (i->second > next->first) { + L(F("merging\n")); + if (i->second < next->second) { + block newb(i->first, next->second); + L(F("new block is [%d, %d)\n") % newb.first % newb.second); + blocks.erase(next); + blocks.erase(i); + i = blocks.insert(i, newb); + next = i; + next++; + continue; + } else { + L(F("next is contained in i, deleting next\n")); + blocks.erase(next); + next = i; + next++; + continue; + } + } + + L(F("incrementing\n")); + i++; + next++; + } +} + + + +annotate_lineage::annotate_lineage() + : current_len(0) +{ + // do nothing, we'll get built by calls to copy() and insert() using the + // child lineage +} + +annotate_lineage::annotate_lineage(const block &initialfileblock) + : current_len(initialfileblock.second) +{ + blocks.insert(lineage_block(initialfileblock, initialfileblock)); +} + +void +annotate_lineage::copy(boost::shared_ptr acp, + boost::shared_ptr child, block b) +{ + std::set child_block_view = child->translate_block(b); + + std::set::const_iterator i; + for (i=child_block_view.begin(); i != child_block_view.end(); i++) { + off_t blen = i->local_block.second - i->local_block.first; + blocks.insert(lineage_block(std::make_pair(current_len, current_len+blen), + i->final_block)); + + L(F("annotate_lineage::copy now mapping [%d, %d) -> [%d, %d)\n") + % current_len % (current_len + blen) % i->final_block.first % i->final_block.second); + + current_len += blen; + if (i->final_block.second > i->final_block.first) + acp->copy_block(i->final_block.first, i->final_block.second); + } +} + +void +annotate_lineage::insert(off_t length) +{ + L(F("annotate::insert called with length %d and current_len %d\n") % length % current_len); + + blocks.insert(lineage_block(std::make_pair(current_len, current_len + length), + std::make_pair(0, 0))); + + L(F("annotate_lineage::insert now mapping [%d, %d) -> [0, 0)\n") + % current_len % (current_len + length)); + + current_len += length; +} + +std::set +annotate_lineage::translate_block(block b) +{ + I(b.second <= current_len); + + std::set result; + + std::set::const_iterator i; + for (i=blocks.begin(); i != blocks.end(); i++) { + L(F("annotate_lineage::translate_block b [%d, %d), i [%d, %d) -> [%d, %d)\n") + % b.first % b.second % i->local_block.first % i->local_block.second % i->final_block.first % i->final_block.second); + + if (i->local_block.second < b.first) { // b comes after i + L(F("b after i -- continue\n")); + continue; + } + + if (i->local_block.first >= b.second) { // b comes before i + // local blocks are sorted, so this means no match + L(F("b before i -- break\n")); + break; + } + + // we must have copied all earlier portions of b already + I(b.first >= i->local_block.first); + + bool final_block_exists = i->final_block.second > i->final_block.first; + + block bc, bf; + off_t final_delta_start; + + if (b.first > i->local_block.first) { + bc.first = b.first; + final_delta_start = b.first - i->local_block.first; + } else { + bc.first = i->local_block.first; + final_delta_start = 0; + } + + if (b.second < i->local_block.second) { + bc.second = b.second; + bf = i->final_block; + if (final_block_exists) { + bf.first += final_delta_start; + bf.second -= i->local_block.second - b.second; + } + + result.insert(lineage_block(bc, bf)); + break; + } else { + bc.second = i->local_block.second; + bf = i->final_block; + if (final_block_exists) + bf.first += final_delta_start; + result.insert(lineage_block(bc, bf)); + b.first = i->local_block.second; + } + } + + return result; +} + + + + +boost::shared_ptr +apply_delta_annotation (const annotate_node_work &work, file_delta d) +{ + delta dd; + //unpack(d.inner(), dd); + dd = d.inner(); + std::string delta_string = dd(); + L(F("file delta to child %s:\n%s\n") % work.node_revision % dd); + + boost::shared_ptr parent_lineage(new annotate_lineage()); + + // parse the delta; consists of C blocks and I blocks + // patterned on xdelta.cc:apply_delta() + + std::istringstream del(delta_string); + for (char c = del.get(); c == 'I' || c == 'C'; c = del.get()) { + I(del.good()); + if (c == 'I') { + std::string::size_type len = std::string::npos; + del >> len; + I(del.good()); + I(len != std::string::npos); + //string tmp; + //tmp.reserve(len); + I(del.get(c).good()); + I(c == '\n'); + + parent_lineage->insert(len); + + while (len--) { + I(del.get(c).good()); + //tmp += c; + } + I(del.get(c).good()); + I(c == '\n'); + + // do our thing with the string tmp? + + } else { // c == 'C' + std::string::size_type pos = std::string::npos, len = std::string::npos; + del >> pos >> len; + I(del.good()); + I(len != std::string::npos); + I(del.get(c).good()); + I(c == '\n'); + + parent_lineage->copy(work.annotations, work.lineage, std::make_pair(pos, pos + len)); + } + } + + return parent_lineage; +} + +file_id +find_file_id_in_revision(app_state &app, file_path fpath, revision_id rid) +{ + // find the version of the file requested + manifest_map mm; + revision_set rev; + app.db.get_revision(rid, rev); + app.db.get_manifest(rev.new_manifest, mm); + manifest_map::const_iterator i = mm.find(fpath); + I(i != mm.end()); + file_id fid = manifest_entry_id(*i); + return fid; +} + +void +do_annotate_node (const annotate_node_work &work_unit, + app_state &app, + std::deque &nodes_to_process, + std::set &nodes_seen) +{ + L(F("do_annotate_node for node %s\n") % work_unit.node_revision); + nodes_seen.insert(work_unit.node_revision); + revision_id null_revision; // initialized to 0 by default? + + // get the revisions parents + std::set parents; + app.db.get_revision_parents(work_unit.node_revision, parents); + L(F("do_annotate_node found %d parents for node %s\n") % parents.size() % work_unit.node_revision); + + std::set::const_iterator parent; + for (parent = parents.begin(); parent != parents.end(); parent++) { + if (*parent == null_revision) { + // work_unit.node_revision is the root node + L(F("do_annotate_node setting root revision as %s\n") % work_unit.node_revision); + work_unit.annotations->set_root_revision(work_unit.node_revision); + return; + } + + change_set cs; + calculate_arbitrary_change_set (*parent, work_unit.node_revision, app, cs); + + file_path parent_fpath = apply_change_set_inverse(cs, work_unit.node_fpath); + L(F("file %s in parent revision %s is %s\n") % work_unit.node_fpath % *parent % parent_fpath); + + if (parent_fpath == std::string("")) { + // I(work_unit.node_revision added work_unit.node_fid) + L(F("revision %s added file %s (file id %s), terminating annotation processing\n") + % work_unit.node_revision % work_unit.node_fpath % work_unit.node_fid); + work_unit.annotations->complete(work_unit.node_revision); + return; + } + file_id parent_fid = find_file_id_in_revision(app, parent_fpath, *parent); + + boost::shared_ptr parent_lineage; + + if (! (work_unit.node_fid == parent_fid)) { + file_delta delta; + app.db.get_file_delta(parent_fid, work_unit.node_fid, delta); + parent_lineage = apply_delta_annotation(work_unit, delta); + } else { + parent_lineage = work_unit.lineage; + } + + // if this parent has not yet been queued for processing, create the work unit for it. + if (nodes_seen.find(*parent) == nodes_seen.end()) { + nodes_seen.insert(*parent); + annotate_node_work newunit(work_unit.annotations, parent_lineage, *parent, parent_fid, parent_fpath); + nodes_to_process.push_back(newunit); + } + } + + work_unit.annotations->evaluate(work_unit.node_revision); +} + + +void +write_annotations (boost::shared_ptr acp, + boost::shared_ptr frmt) +{ +} --- annotate.hh +++ annotate.hh @@ -0,0 +1,175 @@ +#ifndef __ANNOTATE_HH__ +#define __ANNOTATE_HH__ + +// copyright (C) 2005 emile snyder +// all rights reserved. +// licensed to the public under the terms of the GNU GPL (>= 2) +// see the file COPYING for details + +#include +#include + +#include + + +// imagine the following scenario (. is unknown, x is assigned): +// A lineage map: [0,11) (coordinates in A) +// [4,15) (coordinates of A's region in file of interest) +// copy to parent A +// /---------\ . +// annotations: ............xxxxxx...............xxxxxxxxx child rev X +// \_____/ \________/ +// copy to p. B copy to p. B +// B lineage map: [0, 7)[7,17) +// [0, 7)[27,37) +// +// in this case, the region |+++++++| should get assigned to X, giving new +// +// annotations: ............xxxxxxXXXXXXXXX......xxxxxxxxx at the next level +// +// Note that we can't know this without reference to the delta's to *each* parent. +// +// Then a copy from A [2,6) would really be from [6,10), similarly: +// a copy from B [6,10) would really be from [6,7)[27,30) + +typedef std::pair block; + +struct lt_block { + bool operator()(const block &lhs, const block &rhs) { + return (lhs.first < rhs.first); + } +}; + + +// the file that we're annotating is a block of bytes [0, n) +// this represents our knowledge of the annotations as a set of +// adjacent regions, ie. [j, k) , with an associated revision id +// or UNKNOWN indicator. +// +// the file must be wholey specified; ie. if we have regions a, b, c +// then a.i == 0, a.j == b.i, b.j == c.i, and c.j == n +// +class annotate_context { +public: + annotate_context(file_id fid, app_state &app); + + /// remember that someone did a copy of this region for future + /// evaluate() call + void copy_block(off_t start, off_t end); + + /// using the set of copy_block() data we've recorded to date, find + /// all the regions that were not copied, and assign them to the given + /// revision. + void evaluate(revision_id responsible_revision); + + /// assign all remaining unknown regions to the given revision and set + /// our complete status. + void complete(revision_id initial_revision); + + void set_root_revision(revision_id rrid) { root_revision = rrid; } + revision_id get_root_revision() { return root_revision; } + + bool is_complete() const; + + void dump() const; + +private: + void pack_blockset(std::set &blocks); + + std::set pending_copy_blocks; + std::set< std::pair > assigned_blocks; + + revision_id root_revision; + std::string fdata; +}; + + +struct lineage_block { + lineage_block(const block &localb, const block &finalb) : local_block(localb), final_block(finalb) {} + + block local_block; // the block in the current file version + block final_block; // the block in the descendent version + // (use [0,0) if it doesn't exist.) +}; + +struct lt_lineage_block { + bool operator()(const lineage_block &lhs, const lineage_block &rhs) { + return (lhs.local_block.first < rhs.local_block.first); + } +}; + + +/* + * An annotate_lineage records the set of blocks that make up the file + * and where they came from (if they did) from it's ultimate descendent + * (remember, we're walking backwards in time.) + */ +class annotate_lineage { +public: + annotate_lineage(); + annotate_lineage(const block &initialfileblock); + + //void apply_delta(annotate_node_work &work, file_delta fdelta); + + /// copy and insert are used to build up a new lineage by applying + /// reverse deltas to a child lineage. + void copy(boost::shared_ptr acp, + boost::shared_ptr child, + block b); + void insert(off_t length); + +private: + /// given a block from our version of the file, translate this into + /// blocks from the ultimate descendent file. it's a set because + /// a single logical block for us might be several disjoint blocks + /// from the original (and some blocks which don't come from the original + /// at all. + std::set translate_block(block b); + + /// used as we build up a file representation with + /// copy and insert calls + off_t current_len; + + std::set blocks; +}; + + +// a set of data that specifies the input data needed to process +// the annotation for a given childrev -> parentrev edge. +class annotate_node_work { +public: + annotate_node_work (boost::shared_ptr annotations_, + boost::shared_ptr lineage_, + revision_id node_revision_, file_id node_fid_, file_path node_fpath_) + : annotations(annotations_), + lineage(lineage_), + node_revision(node_revision_), + node_fid(node_fid_), + node_fpath(node_fpath_) + {} + + boost::shared_ptr annotations; + boost::shared_ptr lineage; + revision_id node_revision; + file_id node_fid; + file_path node_fpath; +}; + +class annotation_formatter { +}; + +class annotation_text_formatter : public annotation_formatter { +}; + + +class app_state; + +extern void do_annotate_node (const annotate_node_work &workunit, + app_state &app, + std::deque &nodes_to_process, + std::set &nodes_seen); + +extern void write_annotations (boost::shared_ptr acp, + boost::shared_ptr frmt); + +#endif // defined __ANNOTATE_HH__ --- commands.cc +++ commands.cc @@ -43,6 +43,7 @@ #include "automate.hh" #include "inodeprint.hh" #include "platform.hh" +#include "annotate.hh" // // this file defines the task-oriented "top level" commands which can be @@ -3765,6 +3766,83 @@ } } + +void +do_annotate (app_state &app, file_path fpath, file_id fid, revision_id rid) +{ + L(F("annotating file %s with id %s in revision %s\n") % fpath % fid % rid); + + // get the file length... + file_data fdata; + data fdata_unpacked; + app.db.get_file_version(fid, fdata); + //unpack(fdata.inner(), fdata_unpacked); + fdata_unpacked = fdata.inner(); + + boost::shared_ptr acp(new annotate_context(fid, app)); + boost::shared_ptr lineage(new annotate_lineage(std::make_pair(0, fdata_unpacked().size()))); + + // build node work unit + std::deque nodes_to_process; + std::set nodes_seen; + annotate_node_work workunit(acp, lineage, rid, fid, fpath); + nodes_to_process.push_back(workunit); + + while (nodes_to_process.size() && !acp->is_complete()) { + annotate_node_work work = nodes_to_process.front(); + nodes_to_process.pop_front(); + do_annotate_node(work, app, nodes_to_process, nodes_seen); + } + if (!acp->is_complete()) { + L(F("do_annotate acp remains incomplete after processing all nodes\n")); + revision_id null_revision; + I(!(acp->get_root_revision() == null_revision)); + acp->complete(acp->get_root_revision()); + } + + acp->dump(); + boost::shared_ptr frmt(new annotation_text_formatter()); + write_annotations(acp, frmt); // automatically write to stdout, or make take a stream argument? +} + +CMD(annotate, "informative", "[ID] file", "print annotated copy of 'file' from revision 'ID')") +{ + revision_id rid; + file_path file; + + if ((args.size() > 2) || (args.size() < 1)) + throw usage(name); + + if (args.size() == 2) + { + complete(app, idx(args, 0)(), rid); + file=file_path(idx(args, 1)()); + } + else if (args.size() == 1) + { + app.require_working_copy(); // no id arg, must have working copy + + file = file_path(idx(args, 0)()); + get_revision_id(rid); + } + + L(F("annotate file file_path '%s'\n") % file); + + // find the version of the file requested + manifest_map mm; + revision_set rev; + app.db.get_revision(rid, rev); + app.db.get_manifest(rev.new_manifest, mm); + manifest_map::const_iterator i = mm.find(file); + //N(i != mm.end(), + // L(F("No such file '%s' in revision %s\n") % file % rid)); + I(i != mm.end()); + file_id fid = manifest_entry_id(*i); + L(F("annotate for file_id %s\n") % manifest_entry_id(*i)); + + do_annotate(app, file, fid, rid); +} + CMD(log, "informative", "[ID] [file]", "print history in reverse order starting from 'ID' (filtering by 'file')") { revision_set rev; --- database.cc +++ database.cc @@ -895,6 +895,42 @@ unpack(del_packed, del); } + +void +database::get_file_delta(file_id const & src, + file_id const & dst, + file_delta & del) +{ + results res; + // FIXME: testing, only ever do it the hard way + if (0) { //try { + fetch(res, one_col, one_row, + "SELECT delta from '%q' WHERE id = '%q' AND base = '%q'", + "file_deltas", (src.inner())().c_str(), (dst.inner())().c_str()); + del = file_delta(res[0][0]); + } else { //} catch (...) { + // no delta, try to fetch each file and construct delta + file_data src_data, dst_data; + + get_file_version(src, src_data); + get_file_version(dst, dst_data); + + data src_unpacked, dst_unpacked; + //unpack(src_data.inner(), src_unpacked); + //unpack(dst_data.inner(), dst_unpacked); + src_unpacked = src_data.inner(); + dst_unpacked = dst_data.inner(); + + // do the xdelta + string result; + compute_delta(src_unpacked(), dst_unpacked(), result); + //compute_delta(dst_unpacked(), src_unpacked(), result); + base64< gzip > del_inner; + pack(delta(result), del_inner); + del = file_delta(delta(result)); //del_inner); + } +} + void database::put(hexenc const & ident, data const & dat,