# # # patch "ChangeLog" # from [8625cc1978b0c07052772462b2311c520aeec071] # to [fbe72d24a83f1df8b92473882790a580942e1bb7] # # patch "database.cc" # from [a5e4bb8071e7d5980ead69694c9c0c45aa8d1541] # to [9e4ab2b6779dab5af2a3fb39ac09db1cd7d1290d] # # patch "database.hh" # from [2d93a208f09fc5827c1b04643929688384f5d586] # to [0bdf8b9a13428fa4e342c3f6cb1504eb5968e0d8] # # patch "netsync.cc" # from [9e577332850f154ed42b8a3104b57d0c1406f788] # to [12a4896a3232dc7d020a90ed3a9857179741e739] # # patch "xdelta.cc" # from [21dd536e55a0e81ccd7c781eb692141782100cbf] # to [37392c3da669a727508f0a65df42b21495b42ac2] # # patch "xdelta.hh" # from [4df1153df6a717ff6e733bc6c479a7486f9bc067] # to [a6335a827b6d8ef7d7123d2cf7bacc83c63ead47] # ============================================================ --- ChangeLog 8625cc1978b0c07052772462b2311c520aeec071 +++ ChangeLog fbe72d24a83f1df8b92473882790a580942e1bb7 @@ -1,3 +1,11 @@ +2006-04-30 Graydon Hoare + + * xdelta.{cc,hh} (invert_xdelta): New function. + * database.{cc,hh} (database::get_arbitrary_file_delta): Add. + (database::put_file_version): Use invert_xdelta. + * netsync.cc (session::note_file_delta): + Use get_arbitrary_file_delta. + 2006-04-29 Richard Levitte * po/sv.po: Updated a fuzzy. ============================================================ --- database.cc a5e4bb8071e7d5980ead69694c9c0c45aa8d1541 +++ database.cc 9e4ab2b6779dab5af2a3fb39ac09db1cd7d1290d @@ -1180,7 +1180,16 @@ get_version(old_id, old_data, data_table, delta_table); patch(old_data, del, new_data); - diff(new_data, old_data, reverse_delta); + { + string tmp; + invert_xdelta(old_data(), del(), tmp); + reverse_delta = delta(tmp); + data old_tmp; + hexenc old_tmp_id; + patch(new_data, reverse_delta, old_tmp); + calculate_ident(old_tmp, old_tmp_id); + I(old_tmp_id == old_id); + } transaction_guard guard(*this); if (exists(old_id, data_table)) @@ -1406,6 +1415,56 @@ } void +database::get_arbitrary_file_delta(file_id const & src_id, + file_id const & dst_id, + file_delta & del) +{ + delta dtmp; + // Deltas stored in the database go from base -> id. + results res; + query q1("SELECT delta FROM file_deltas " + "WHERE base = ? AND id = ?"); + fetch(res, one_col, any_rows, + q1 % text(src_id.inner()()) % text(dst_id.inner()())); + + if (!res.empty()) + { + // Exact hit: a plain delta from src -> dst. + gzip del_packed(res[0][0]); + decode_gzip(del_packed, dtmp); + del = file_delta(dtmp); + return; + } + + query q2("SELECT delta FROM file_deltas " + "WHERE id = ? AND base = ?"); + fetch(res, one_col, any_rows, + q2 % text(dst_id.inner()()) % text(src_id.inner()())); + + if (!res.empty()) + { + // We have a delta from dst -> src; we need to + // invert this to a delta from src -> dst. + gzip del_packed(res[0][0]); + decode_gzip(del_packed, dtmp); + string fwd_delta; + file_data dst; + get_file_version(dst_id, dst); + invert_xdelta(dst.inner()(), dtmp(), fwd_delta); + del = file_delta(fwd_delta); + return; + } + + // No deltas of use; just load both versions and diff. + file_data fd1, fd2; + get_file_version(src_id, fd1); + get_file_version(dst_id, fd2); + diff(fd1.inner(), fd2.inner(), dtmp); + del = file_delta(dtmp); +} + + +void database::get_revision_ancestry(std::multimap & graph) { results res; ============================================================ --- database.hh 2d93a208f09fc5827c1b04643929688384f5d586 +++ database.hh 0bdf8b9a13428fa4e342c3f6cb1504eb5968e0d8 @@ -247,6 +247,10 @@ file_id const & new_id, file_delta const & del); + void get_arbitrary_file_delta(file_id const & src_id, + file_id const & dst_id, + file_delta & del); + // get plain version if it exists, or reconstruct version // from deltas (if they exist). void get_manifest_version(manifest_id const & id, ============================================================ --- netsync.cc 9e577332850f154ed42b8a3104b57d0c1406f788 +++ netsync.cc 12a4896a3232dc7d020a90ed3a9857179741e739 @@ -614,15 +614,12 @@ { if (role == sink_role) return; - file_data fd1, fd2; - delta del; + file_delta fdel; id fid1, fid2; decode_hexenc(src.inner(), fid1); decode_hexenc(dst.inner(), fid2); - app.db.get_file_version(src, fd1); - app.db.get_file_version(dst, fd2); - diff(fd1.inner(), fd2.inner(), del); - queue_delta_cmd(file_item, fid1, fid2, del); + app.db.get_arbitrary_file_delta(src, dst, fdel); + queue_delta_cmd(file_item, fid1, fid2, fdel.inner()); file_items_sent.insert(dst); } ============================================================ --- xdelta.cc 21dd536e55a0e81ccd7c781eb692141782100cbf +++ xdelta.cc 37392c3da669a727508f0a65df42b21495b42ac2 @@ -232,6 +232,19 @@ } } +void +write_delta_insns(vector const & delta_insns, + string & delta) +{ + delta.clear(); + ostringstream oss; + for (vector::const_iterator i = delta_insns.begin(); + i != delta_insns.end(); ++i) + { + oss << *i; + } + delta = oss.str(); +} void compute_delta(string const & a, @@ -619,6 +632,117 @@ return boost::shared_ptr(new piecewise_applicator()); } + +// inversion + +struct copied_extent +{ + copied_extent(string::size_type op, + string::size_type np, + string::size_type len) + : old_pos(op), + new_pos(np), + len(len) + {} + string::size_type old_pos; + string::size_type new_pos; + string::size_type len; + bool operator<(copied_extent const & other) const + { + return old_pos < other.old_pos; + } +}; + +struct +inverse_delta_writing_applicator : + public delta_applicator +{ + string const & old; + set copied_extents; + string::size_type new_pos; + + inverse_delta_writing_applicator(string const & old) + : old(old), + new_pos(0) + {} + + virtual void begin(std::string const & base) {} + virtual void next() {} + virtual void finish(std::string & out) + { + // We are trying to write a delta instruction stream which + // produces 'old' from 'new'. We don't care what was in 'new', + // because we're only going to copy some parts forwards, and we + // already know which parts: those in the table. Our table lists + // extents which were copied in order that they appear in 'old'. + // + // When we run into a section of 'old' which isn't in the table, + // we have to emit an insert instruction for the gap. + + string::size_type old_pos = 0; + out.clear(); + vector delta_insns; + + for (set::iterator i = copied_extents.begin(); + i != copied_extents.end(); ++i) + { + // It is possible that this extent left a gap after the + // previously copied extent; in this case we wish to pad + // the intermediate space with an insert. + while (old_pos < i->old_pos) + { + I(old_pos < old.size()); + // Don't worry, adjacent inserts are merged. + insert_insn(delta_insns, old.at(old_pos++)); + } + + // It is also possible that this extent *overlapped* the + // previously copied extent; in this case we wish to subtract + // the overlap from the inverse copy. + + string::size_type overlap = 0; + if (i->old_pos < old_pos) + overlap = old_pos - i->old_pos; + + if (i->len <= overlap) + continue; + + I(i->len > overlap); + copy_insn(delta_insns, i->new_pos + overlap, i->len - overlap); + old_pos += (i->len - overlap); + } + while (old_pos < old.size()) + insert_insn(delta_insns, old.at(old_pos++)); + + write_delta_insns(delta_insns, out); + } + + virtual void copy(std::string::size_type old_pos, + std::string::size_type len) + { + I(old_pos < old.size()); + copied_extents.insert(copied_extent(old_pos, new_pos, len)); + new_pos += len; + } + + virtual void insert(std::string const & str) + { + new_pos += str.size(); + } +}; + + +void +invert_xdelta(string const & old_str, + string const & delta, + string & delta_inverse) +{ + boost::shared_ptr da(new inverse_delta_writing_applicator(old_str)); + apply_delta(da, delta); + da->finish(delta_inverse); +} + + #ifdef BUILD_UNIT_TESTS #include "unit_tests.hh" ============================================================ --- xdelta.hh 4df1153df6a717ff6e733bc6c479a7486f9bc067 +++ xdelta.hh a6335a827b6d8ef7d7123d2cf7bacc83c63ead47 @@ -38,5 +38,8 @@ std::string const & delta); u64 measure_delta_target_size(std::string const & delta); +void invert_xdelta(std::string const & old_str, + std::string const & delta, + std::string & delta_inverse); #endif // __XDELTA_HH__