# # # patch "database.cc" # from [59fbd4330846a03ea4fb35a4ab451e8968acbbc2] # to [f3e6f8f1f59ab14f9015c7d63938bd8d91c1b5c1] # # patch "database.hh" # from [4effa081c0dda55e543f5c30342eb76f5cf76412] # to [dfd7561eb1ebf12fdfd1c979e1691334ff94c20c] # # patch "netsync.cc" # from [ff048e5f5c432920b0ddbffbe0fb7133b8eb9f46] # to [92db9aca3e9bbdd54925ba4e8213602cda673cc7] # ============================================================ --- database.cc 59fbd4330846a03ea4fb35a4ab451e8968acbbc2 +++ database.cc f3e6f8f1f59ab14f9015c7d63938bd8d91c1b5c1 @@ -58,6 +58,19 @@ int const any_rows = -1; int const any_cols = -1; +struct +vlog_extent +{ + bool is_fulltext() + { + return base == off; + } + off_t off; + size_t len; + off_t base; + hexenc content; +}; + namespace { struct query_param @@ -2739,47 +2752,48 @@ // revisions, we just pick the first. It probably doesn't matter for // the sake of delta-encoding. - string data_table = "rosters"; - string delta_table = "roster_deltas"; - transaction_guard guard(*this); execute(query("INSERT into revision_roster VALUES (?, ?)") % text(rev_id.inner()()) % text(new_id())); - if (exists(new_id, data_table) - || delta_exists(new_id, delta_table)) + if (exists(new_id)) { guard.commit(); return; } - // Else we have a new roster the database hasn't seen yet; our task is to - // add it, and deltify all the incoming edges (if they aren't already). + // Try to find a parent to hang this off, otherwise add in full. - put(new_id, new_data, data_table); - + hexenc base; std::set parents; get_revision_parents(rev_id, parents); - // Now do what deltify would do if we bothered (we have the - // roster written now, so might as well do it here). for (std::set::const_iterator i = parents.begin(); i != parents.end(); ++i) { if (null_id(*i)) continue; revision_id old_rev = *i; - get_roster_id_for_revision(old_rev, old_id); - if (exists(new_id, data_table)) - { - get_version(old_id, old_data, data_table, delta_table); - diff(new_data, old_data, reverse_delta); - drop(old_id, data_table); - put_delta(old_id, new_id, reverse_delta, delta_table); - } + std::vector vi; + vlog_id vid = get_existing_vlog_id_for_ident(old_rev.inner()); + get_final_vlog_cluster(vid, vi); + if (null_id(base) || vi.back().content == base) + base = vi.back().content; } + + if (null_id(base)) + put_data(new_id, new_data); + else + { + delta del; + gzip gzdel; + get_roster(base, old_data); + diff(old_data, new_data, del); + encode_gzip(del, gzdel); + put_delta(base, new_id, gzdel); + } guard.commit(); } @@ -3041,18 +3055,6 @@ */ -struct -vlog_extent -{ - bool is_fulltext() - { - return base == off; - } - off_t off; - size_t len; - off_t base; - hexenc content; -}; bool @@ -3343,7 +3345,88 @@ guard.commit(); } +void +database::get_data(hexenc const & ident, gzip & dat) +{ + data tmp; + get_data(ident, tmp); + encode_gzip(tmp, dat); +} +void +database::get_delta(hexenc const & src, + hexenc const & dst, + delta & del) +{ + gzip gzdel; + if (get_exact_delta(src, dst, gzdel)) + decode_gzip(gzdel, del); + else + { + data sdat, ddat; + get_data(src, sdat); + get_data(dst, ddat); + diff(sdat, ddat, del); + } +} + +bool +database::get_exact_delta(hexenc const & src, + hexenc const & dst, + gzip & del) +{ + vlog_id src_vid = get_existing_vlog_id_for_ident(src); + vlog_id dst_vid = get_existing_vlog_id_for_ident(dst); + + if (src_vid == dst_vid) + { + // FIXME: you probably want to teach the applicator to do + // delta-combining xdelta synthesis here, for the general case. + + system_path vlog_pth; + get_vlog_path_for_vlog_id(dst_vid, vlog_pth); + L(FL("vlog %d comes from system path '%s'") % dst_vid % vlog_pth); + + require_path_is_file(vlog_pth, + F("reading version log file, '%s' does not exist") % vlog_pth, + F("reading version log file, %s is a directory") % vlog_pth); + + std::vector vi; + get_vlog_extents(dst_vid, dst, vi); + + // Fast path if they're requesting one we have an exact delta for: + I(vi.back().content == dst); + if (vi.size() > 1 && vi.at(vi.size() - 2).content == src) + { + L(FL("loading precise delta extent at physical extent " + "[pos:%d, len:%d] in vlog file '%s'") + % vi.back().off % vi.back().len % vlog_pth); + data buf; + get_extent_from_file(vlog_pth, vi.front().off, vi.back().len, buf); + del = buf(); + return true; + } + } + return false; +} + +void +database::get_delta(hexenc const & src, + hexenc const & dst, + gzip & del) +{ + if (get_exact_delta(src, dst, del)) + return; + + data sdat, ddat; + delta deltmp; + get_data(src, sdat); + get_data(dst, ddat); + diff(sdat, ddat, deltmp); + encode_gzip(deltmp, del); +} + + void database::get_data(hexenc const & ident, data & dat) ============================================================ --- database.hh 4effa081c0dda55e543f5c30342eb76f5cf76412 +++ database.hh dfd7561eb1ebf12fdfd1c979e1691334ff94c20c @@ -446,10 +446,15 @@ hexenc const & new_id, gzip const & dat); - void get_data(hexenc const & ident, data & dat); void put_data(hexenc const & new_id, data const & dat); void put_data(hexenc const & new_id, gzip const & dat); + void get_data(hexenc const & ident, data & dat); + void get_data(hexenc const & ident, gzip & dat); + void get_delta(hexenc const & src, hexenc const & dst, delta & del); + void get_delta(hexenc const & src, hexenc const & dst, gzip & del); + bool get_exact_delta(hexenc const & src, hexenc const & dst, gzip & del); + // node and vlog id stuff. u64 next_sequence_number(std::string const & name); ============================================================ --- netsync.cc ff048e5f5c432920b0ddbffbe0fb7133b8eb9f46 +++ netsync.cc 92db9aca3e9bbdd54925ba4e8213602cda673cc7 @@ -596,9 +596,7 @@ id fid1, fid2; decode_hexenc(src.inner(), fid1); decode_hexenc(dst.inner(), fid2); - app.db.get_file_version(src, fd1); - app.db.get_file_version(dst, fd2); - diff(fd1.inner(), fd2.inner(), del); + app.db.get_delta(src.inner(), dst.inner(), del); queue_delta_cmd(file_item, fid1, fid2, del); file_items_sent.insert(dst); }