# # # add_file "lru_cache.h" # content [a9febaa58dd2fe049373186090c306db0717e5b2] # # patch "AUTHORS" # from [66d90072b355ef6e2b66f56c0dfa00de1565dc98] # to [dd5bd13b7de3c47615bf09d881a148033b3b79a5] # # patch "ChangeLog" # from [e740b27fde448d4ea8815b882e631795d31ace3a] # to [86a0e296ce3738249b72b37a0ce10eacd136d5d6] # # patch "Makefile.am" # from [380383c22ef6bb6b1d7203b215dbed739c249c63] # to [c9577f4e9a331ff0110753c599226cc3cc304731] # # patch "basic_io.cc" # from [f335df2f41cf065b3162f49a8192b36ae1848913] # to [f648969974baf376d08f017aeb7497eaf18f4dd5] # # patch "botan/gzip.h" # from [18beaca49947b5a5727c6396ecb6091442e4fe62] # to [0b391725625531a7f5c73d372355fb71e00c2b27] # # patch "constants.cc" # from [c7bc2142cf0e9861c9fd744f458da6e8c7a0323e] # to [f2baab50e8fa565411d429fe4a5099f0266c3384] # # patch "constants.hh" # from [528788cf84a56e076de55706b0fb989f570a6e9f] # to [b8396f4cd024f5c1fd0d1cbd1975d2359e3b7771] # # patch "database.cc" # from [28a3e88a03157ffd23eb6f9ebf3a9653969692c9] # to [8e4d19ee73104472276b9f352fe88c512cc2a056] # # patch "hmac.cc" # from [6abde45d788c47d50bb104e76e26ee903b93be7f] # to [01430adfa6339a539db4327741755a05191bab7b] # # patch "transforms.cc" # from [d530e6715819b76c4173dc4afc3d3341da60b0a0] # to [dc064013066c8b3260accf2cde5f3216cc334ed5] # ============================================================ --- lru_cache.h a9febaa58dd2fe049373186090c306db0717e5b2 +++ lru_cache.h a9febaa58dd2fe049373186090c306db0717e5b2 @@ -0,0 +1,228 @@ +/*************************************************************************** +* Copyright (C) 2004 by Patrick Audley * +* address@hidden * +* * +***************************************************************************/ +/** + * @file lru_cache.cpp Template cache with an LRU removal policy + * @author Patrick Audley + * @version 1.0 + * @date December 2004 + * @par + * This cache is thread safe if compiled with the _REENTRANT defined. It + * uses the BOOST scientific computing library to provide the thread safety + * mutexes. + */ +#include +#include +#ifdef _REENTRANT +#include +/// If we are reentrant then use a BOOST scoped mutex where neccessary. +#define SCOPED_MUTEX boost::mutex::scoped_lock lock(this->_mutex); +#else +/// If we aren't reentrant then don't do anything. +#define SCOPED_MUTEX +#endif + +template +struct Countfn { + unsigned long operator()( const T &x ) { return 1; } +}; + +/** + * @brief Template cache with an LRU removal policy. + * @class LRUCache + * + * @par + * This template creats a simple collection of key-value pairs that grows + * until the size specified at construction is reached and then begins + * discard the Least Recently Used element on each insertion. + * + */ +template > class LRUCache { + public: + /// Main cache storage typedef + typedef std::list< std::pair< Key, Data > > List; + /// Main cache iterator + typedef typename List::iterator List_Iter; + /// Index typedef + typedef std::map< Key, List_Iter > Map; + /// Index iterator + typedef typename Map::iterator Map_Iter; + + /// Deletion strategies for element removal. + enum deletion_strategies { + IGNORE, ///< Simply set erase the element from the cache and allow it leave scope. + DELETE, ///< Delete the Data elements before removal from the cache. + }; + + private: + /// Main cache storage + List _list; + /// Cache storage index + Map _index; + + /// Maximum abstract size of the cache + unsigned long _max_size; + + /// Current abstract size of the cache + unsigned long _curr_size; + + /// Current deletion strategy + deletion_strategies _deletion_strategy; + +#ifdef _REENTRANT + boost::mutex _mutex; +#endif + + public: + /** @brief Creates a cache that holds at most Size worth of elements. + * @param Size maximum size of cache + * @param DeletionStrategy how we dispose of elements when we remove them from the cache + */ + LRUCache( const unsigned long Size, const deletion_strategies DeletionStrategy = IGNORE ) : + _max_size( Size ), + _deletion_strategy( DeletionStrategy ) + {} + /// Destructor - cleans up both index and storage + ~LRUCache() { this->clear(); } + + /** @brief Gets the current abstract size of the cache. + * @return current size + */ + inline const unsigned long size( void ) const { return _curr_size; } + + /** @brief Gets the maximum sbstract size of the cache. + * @return maximum size + */ + inline const unsigned long max_size( void ) const { return _max_size; } + + /// Clears all storage and indices. + void clear( void ) { + SCOPED_MUTEX; + _list.clear(); + _index.clear(); + }; + + /** @brief Checks for the existance of a key in the cache. + * @param key to check for + * @return bool indicating whether or not the key was found. + */ + inline bool exists( const Key &key ) const { + return _index.find( key ) != _index.end(); + } + + /** @brief Removes a key-data pair from the cache. + * @param key to be removed + */ + inline void remove( const Key &key ) { + SCOPED_MUTEX; + Map_Iter miter = _index.find( key ); + if( miter == _index.end() ) return; + this->_remove( miter ); + } + + /** @brief Touches a key in the Cache and makes it the most recently used. + * @param key to be touched + */ + inline void touch( const Key &key ) { + SCOPED_MUTEX; + // Find the key in the index. + Map_Iter miter = this->_touch( key ); + } + + /** @brief Fetches a pointer to cache data. + * @param key to fetch data for + * @param touch whether or not to touch the data + * @return pointer to data or NULL on error + */ + inline Data *fetch( const Key &key, bool touch = true ) { + Map_Iter miter = _index.find( key ); + if( miter == _index.end() ) return NULL; + if (touch) + this->touch( key ); + return &(miter->second->second); + } + + /** @brief Fetches a pointer to cache data. + * @param key to fetch data for + * @param data to fetch data into + * @param touch whether or not to touch the data + * @return whether or not data was filled in + */ + inline bool fetch( const Key &key, Data &data, bool touch = true ) { + Map_Iter miter = _index.find( key ); + if( miter == _index.end() ) return false; + if (touch) + this->touch( key ); + data = miter->second->second; + return true; + } + + + /** @brief Inserts a key-data pair into the cache and removes entries if neccessary. + * @param key object key for insertion + * @param data object data for insertion + * @note This function checks key existance and touches the key if it already exists. + */ + inline void insert( const Key &key, const Data &data ) { + SCOPED_MUTEX; + // Touch the key, if it exists, then replace the content. + Map_Iter miter = this->_touch( key ); + if( miter != _index.end() ) { + this->_remove( miter ); + } + // Ok, do the actual insert at the head of the list + _list.push_front(std::make_pair(key,data)); + List_Iter liter = _list.begin(); + // Store the index + _index[ key ] = liter; + _curr_size += Sizefn()( data ); + // Check to see if we need to remove an element due to exceeding max_size + if( _curr_size > _max_size ) { + // Remove the last element. + liter = _list.end(); + --liter; + this->_remove( liter->first ); + } + } + +#ifdef DEBUG + /** @brief DEBUG only function, this returns the pointer to the internal list. + * @return evil pointer to private data for debugging only. + */ + List *debug_get_list_pointer( void ) { return &_list; } +#endif + + private: + /** @brief Internal touch function. + * @param key to be touched + * @return a Map_Iter pointing to the key that was touched. + */ + inline Map_Iter _touch( const Key &key ) { + Map_Iter miter = _index.find( key ); + if( miter == _index.end() ) return miter; + // Move the found node to the head of the list. + _list.splice( _list.begin(), _list, miter->second ); + return miter; + } + + /** @brief Interal remove function + * @param miter Map_Iter that points to the key to remove + * @warning miter is now longer usable after being passed to this function. + */ + inline void _remove( const Map_Iter &miter ) { + _curr_size -= Sizefn()(miter->second->second); + _list.erase( miter->second ); + _index.erase( miter ); + } + + /** @brief Interal remove function + * @param key to remove + */ + inline void _remove( const Key &key ) { + Map_Iter miter = _index.find( key ); + _remove( miter ); + } +}; + ============================================================ --- AUTHORS 66d90072b355ef6e2b66f56c0dfa00de1565dc98 +++ AUTHORS dd5bd13b7de3c47615bf09d881a148033b3b79a5 @@ -257,10 +257,18 @@ It's reproduced in popt/COPYING. +--- + The file contrib/colorize is a copy of a perl script written by Cédric Bouvier, which is released under the GNU GPL. +--- +The file lru_cache.h is a copy of a C++ header written by Patrick +Audley, which was released under the GNU GPL. + +--- + copyright status: ----------------- ============================================================ --- ChangeLog e740b27fde448d4ea8815b882e631795d31ace3a +++ ChangeLog 86a0e296ce3738249b72b37a0ce10eacd136d5d6 @@ -1,3 +1,16 @@ +2006-03-05 Graydon Hoare + + * lru_cache.h: New file. + * AUTHORS: Mention Patrick. + * Makefile.am: Include file. + * basic_io.cc: Change uses of isxdigit, isalnum. + * botan/gzip.h: Reduce gzip to level 1. + * constants.cc (db_roster_cache_sz): Set to 7. + * database.cc: Use lru cache for vcache; add roster cache. + * hmac.cc: Instantiate "SHA-160" not "SHA-1" + (corrects lookup miss in botan's algorithm table). + * transforms.cc ({encode,decode}_hexenc): Specialize for idlen. + 2006-03-01 Matthew Gregan * transforms.cc, transforms.hh: Add utf8_validate function and ============================================================ --- Makefile.am 380383c22ef6bb6b1d7203b215dbed739c249c63 +++ Makefile.am c9577f4e9a331ff0110753c599226cc3cc304731 @@ -50,6 +50,8 @@ merge.cc merge.hh \ legacy.cc legacy.hh \ \ + lru_cache.h \ + \ cleanup.hh unit_tests.hh interner.hh \ cycle_detector.hh randomfile.hh adler32.hh quick_alloc.hh \ netio.hh smap.hh gettext.h \ ============================================================ --- basic_io.cc f335df2f41cf065b3162f49a8192b36ae1848913 +++ basic_io.cc f648969974baf376d08f017aeb7497eaf18f4dd5 @@ -64,10 +64,10 @@ void basic_io::stanza::push_hex_pair(std::string const & k, std::string const & v) { for (std::string::const_iterator i = k.begin(); i != k.end(); ++i) - I(std::isalnum(*i) || *i == '_'); + I(is_alnum(*i) || *i == '_'); for (std::string::const_iterator i = v.begin(); i != v.end(); ++i) - I(std::isxdigit(*i)); + I(is_xdigit(*i)); entries.push_back(std::make_pair(k, "[" + v + "]")); if (k.size() > indent) @@ -79,10 +79,10 @@ std::string const & v) { for (std::string::const_iterator i = k.begin(); i != k.end(); ++i) - I(std::isalnum(*i) || *i == '_'); + I(is_alnum(*i) || *i == '_'); for (std::string::const_iterator i = v.begin(); i != v.end(); ++i) - I(std::isxdigit(*i)); + I(is_xdigit(*i)); entries.push_back(std::make_pair(k, escape(n) + " " + "[" + v + "]")); if (k.size() > indent) @@ -92,7 +92,7 @@ void basic_io::stanza::push_str_pair(std::string const & k, std::string const & v) { for (std::string::const_iterator i = k.begin(); i != k.end(); ++i) - I(std::isalnum(*i) || *i == '_'); + I(is_alnum(*i) || *i == '_'); entries.push_back(std::make_pair(k, escape(v))); if (k.size() > indent) @@ -108,7 +108,7 @@ std::vector const & v) { for (std::string::const_iterator i = k.begin(); i != k.end(); ++i) - I(std::isalnum(*i) || *i == '_'); + I(is_alnum(*i) || *i == '_'); std::string val; bool first = true; @@ -130,7 +130,7 @@ std::string const & v) { for (std::string::const_iterator i = k.begin(); i != k.end(); ++i) - I(std::isalnum(*i) || *i == '_'); + I(is_alnum(*i) || *i == '_'); entries.push_back(std::make_pair(k, escape(n) + " " + escape(v))); if (k.size() > indent) ============================================================ --- botan/gzip.h 18beaca49947b5a5727c6396ecb6091442e4fe62 +++ botan/gzip.h 0b391725625531a7f5c73d372355fb71e00c2b27 @@ -39,7 +39,7 @@ void start_msg(); void end_msg(); - Gzip_Compression(u32bit = 6); + Gzip_Compression(u32bit = 1); ~Gzip_Compression(); private: void clear(); ============================================================ --- constants.cc c7bc2142cf0e9861c9fd744f458da6e8c7a0323e +++ constants.cc f2baab50e8fa565411d429fe4a5099f0266c3384 @@ -37,6 +37,8 @@ // be tweaked further. size_t const db_version_cache_sz = 7 * (1 << 20); + size_t const db_roster_cache_sz = 7; + // size of a line of text in the log buffer, beyond which log lines will be // truncated. size_t const log_line_sz = 0x300; ============================================================ --- constants.hh 528788cf84a56e076de55706b0fb989f570a6e9f +++ constants.hh b8396f4cd024f5c1fd0d1cbd1975d2359e3b7771 @@ -48,6 +48,9 @@ // size in bytes of the database xdelta version reconstruction cache extern size_t const db_version_cache_sz; + // number of rosters in the database roster cache + extern size_t const db_roster_cache_sz; + // size of a line of text in the log buffer, beyond which log lines will be // truncated. extern size_t const log_line_sz; ============================================================ --- database.cc 28a3e88a03157ffd23eb6f9ebf3a9653969692c9 +++ database.cc 8e4d19ee73104472276b9f352fe88c512cc2a056 @@ -38,6 +38,9 @@ #include "xdelta.hh" #include "epoch.hh" +#undef _REENTRANT +#include "lru_cache.h" + // defined in schema.sql, converted to header: #include "schema.h" @@ -907,63 +910,13 @@ // static ticker cache_hits("vcache hits", "h", 1); -struct version_cache +struct datasz { - size_t capacity; - size_t use; - std::map, data> cache; - - version_cache() : capacity(constants::db_version_cache_sz), use(0) - { - srand(time(NULL)); - } - - void put(hexenc const & ident, data const & dat) - { - while (!cache.empty() - && use + dat().size() > capacity) - { - std::string key = (boost::format("%08.8x%08.8x%08.8x%08.8x%08.8x") - % rand() % rand() % rand() % rand() % rand()).str(); - std::map, data>::const_iterator i; - i = cache.lower_bound(hexenc(key)); - if (i == cache.end()) - { - // we can't find a random entry, probably there's only one - // entry and we missed it. delete first entry instead. - i = cache.begin(); - } - I(i != cache.end()); - I(use >= i->second().size()); - L(FL("version cache expiring %s\n") % i->first); - use -= i->second().size(); - cache.erase(i->first); - } - cache.insert(std::make_pair(ident, dat)); - use += dat().size(); - } - - bool exists(hexenc const & ident) - { - std::map, data>::const_iterator i; - i = cache.find(ident); - return i != cache.end(); - } - - bool get(hexenc const & ident, data & dat) - { - std::map, data>::const_iterator i; - i = cache.find(ident); - if (i == cache.end()) - return false; - // ++cache_hits; - L(FL("version cache hit on %s\n") % ident); - dat = i->second; - return true; - } + unsigned long operator()(data const & t) { return t().size(); } }; -static version_cache vcache; +static LRUCache, data, datasz> +vcache(constants::db_version_cache_sz); typedef vector< hexenc > version_path; @@ -996,8 +949,7 @@ string const & delta_table) { I(ident() != ""); - - if (vcache.get(ident, dat)) + if (vcache.fetch(ident, dat)) { return; } @@ -1112,7 +1064,7 @@ if (vcache.exists(curr)) { - I(vcache.get(curr, begin)); + I(vcache.fetch(curr, begin)); } else { @@ -1131,7 +1083,7 @@ { string tmp; app->finish(tmp); - vcache.put(curr, tmp); + vcache.insert(curr, tmp); } L(FL("following delta %s -> %s\n") % curr % nxt); @@ -1151,7 +1103,7 @@ calculate_ident(dat, final); I(final == ident); } - vcache.put(ident, dat); + vcache.insert(ident, dat); } @@ -2619,6 +2571,11 @@ get_version(ros_id, dat, data_table, delta_table); } + +static LRUCache > > +rcache(constants::db_roster_cache_sz); + void database::get_roster(revision_id const & rev_id, roster_t & roster, @@ -2631,12 +2588,23 @@ return; } + boost::shared_ptr > sp; + if (rcache.fetch(rev_id, sp)) + { + roster = sp->first; + marks = sp->second; + return; + } + data dat; hexenc ident; get_roster_id_for_revision(rev_id, ident); get_roster(ident, dat); read_roster_and_marking(dat, roster, marks); + sp = boost::shared_ptr > + (new pair(roster, marks)); + rcache.insert(rev_id, sp); } @@ -2650,6 +2618,13 @@ delta reverse_delta; hexenc old_id, new_id; + if (!rcache.exists(rev_id)) + { + boost::shared_ptr > sp + (new pair(roster, marks)); + rcache.insert(rev_id, sp); + } + write_roster_and_marking(roster, marks, new_data); calculate_ident(new_data, new_id); ============================================================ --- hmac.cc 6abde45d788c47d50bb104e76e26ee903b93be7f +++ hmac.cc 01430adfa6339a539db4327741755a05191bab7b @@ -30,7 +30,7 @@ I(pos + n <= str.size()); - Botan::Pipe p(new Botan::MAC_Filter("HMAC(SHA-1)", key, + Botan::Pipe p(new Botan::MAC_Filter("HMAC(SHA-160)", key, constants::sha1_digest_length)); p.start_msg(); p.write(chain_val); @@ -52,7 +52,7 @@ I(pos + n <= str.size()); - Botan::Pipe p(new Botan::MAC_Filter("HMAC(SHA-1)", key, + Botan::Pipe p(new Botan::MAC_Filter("HMAC(SHA-160)", key, constants::sha1_digest_length)); p.start_msg(); p.write(chain_val); ============================================================ --- transforms.cc d530e6715819b76c4173dc4afc3d3341da60b0a0 +++ transforms.cc dc064013066c8b3260accf2cde5f3216cc334ed5 @@ -80,22 +80,39 @@ // for use in hexenc encoding -string encode_hexenc(string const & in) +static inline void +encode_hexenc_inner(string::const_iterator i, + string::const_iterator end, + char *out) { - boost::scoped_array buf(new char[in.size() * 2]); static char const *tab = "0123456789abcdef"; - char *c = buf.get(); - for (string::const_iterator i = in.begin(); - i != in.end(); ++i) + for (; i != end; ++i) { - *c++ = tab[(*i >> 4) & 0xf]; - *c++ = tab[*i & 0xf]; + *out++ = tab[(*i >> 4) & 0xf]; + *out++ = tab[*i & 0xf]; } - return string(buf.get(), in.size() *2); } + -static inline char decode_hex_char(char c) +string encode_hexenc(string const & in) { + if (LIKELY(in.size() == constants::idlen / 2)) + { + char buf[constants::idlen]; + encode_hexenc_inner(in.begin(), in.end(), buf); + return string(buf, constants::idlen); + } + else + { + boost::scoped_array buf(new char[in.size() * 2]); + encode_hexenc_inner(in.begin(), in.end(), buf.get()); + return string(buf.get(), in.size() *2); + } +} + +static inline char +decode_hex_char(char c) +{ if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') @@ -103,22 +120,38 @@ I(false); } -string decode_hexenc(string const & in) +static inline void +decode_hexenc_inner(string::const_iterator i, + string::const_iterator end, + char *out) { - I(in.size() % 2 == 0); - boost::scoped_array buf(new char[in.size() / 2]); - char *c = buf.get(); - for (string::const_iterator i = in.begin(); - i != in.end(); ++i) + for (; i != end; ++i) { char t = decode_hex_char(*i++); t <<= 4; t |= decode_hex_char(*i); - *c++ = t; + *out++ = t; } - return string(buf.get(), in.size() / 2); } +string decode_hexenc(string const & in) +{ + + I(in.size() % 2 == 0); + if (LIKELY(in.size() == constants::idlen)) + { + char buf[constants::idlen / 2]; + decode_hexenc_inner(in.begin(), in.end(), buf); + return string(buf, constants::idlen / 2); + } + else + { + boost::scoped_array buf(new char[in.size() / 2]); + decode_hexenc_inner(in.begin(), in.end(), buf.get()); + return string(buf.get(), in.size() / 2); + } +} + struct lowerize { @@ -212,7 +245,7 @@ calculate_ident(data const & dat, hexenc & ident) { - Botan::Pipe p(new Botan::Hash_Filter("SHA-1")); + Botan::Pipe p(new Botan::Hash_Filter("SHA-160")); p.process_msg(dat()); id ident_decoded(p.read_all_as_string()); @@ -299,7 +332,7 @@ // no conversions necessary, use streaming form // Best to be safe and check it isn't a dir. assert_path_is_file(file); - Botan::Pipe p(new Botan::Hash_Filter("SHA-1"), new Botan::Hex_Encoder()); + Botan::Pipe p(new Botan::Hash_Filter("SHA-160"), new Botan::Hex_Encoder()); Botan::DataSource_Stream infile(file.as_external(), true); p.process_msg(infile);