# # # patch "ChangeLog" # from [1a0ea70e9c673c115a12bc000804d467b71dd570] # to [44bceb604bcb5b776129356bacdb9f793cd53403] # # patch "cset.hh" # from [7bd817258ac15fa2b6e4c3473e9b97858721969d] # to [2d06997a4c2475aa45550dbe967c09397575d908] # # patch "paths.cc" # from [31e473f534a09b58cab1b815a4aaf533115c9265] # to [b77cfcbc73e191883145492b2c7517f09365ce7f] # # patch "paths.hh" # from [efc61a747969595d633387c4e3bb0705b7cd4a10] # to [97283f2f57949e69eee43772c097e1e1707481bd] # # patch "roster.cc" # from [9d8a98859ba806ec4946bac03e274b349f149696] # to [f8dadb5487d8385b6572fdc658dd30f1c4481e3f] # # patch "vocab.cc" # from [7cf1d84a1a30eebaeb5edfbb8c3a46d1598d56a4] # to [f74bcb1c80f9c49a91923861feaef0b8693da0c6] # ============================================================ --- ChangeLog 1a0ea70e9c673c115a12bc000804d467b71dd570 +++ ChangeLog 44bceb604bcb5b776129356bacdb9f793cd53403 @@ -1,3 +1,12 @@ +2006-05-17 Matt Johnston + + * paths.cc (internal_string_to_split_path): move from cset.hh to + here, make it share the path splitting with fully_normalized_path(). + (has_bad_chars): use lookup table to check bad chars. + (bad_component): don't compare string vs char* every time. + * roster.cc, cset.hh: internal_string_to_split_path changes + * vocab.cc (verify(hexenc): .empty() rather than == "" + 2006-05-16 Matt Johnston * roster.cc (parse_from): don't use lexical_cast, avoid copying a ============================================================ --- cset.hh 7bd817258ac15fa2b6e4c3473e9b97858721969d +++ cset.hh 2d06997a4c2475aa45550dbe967c09397575d908 @@ -128,15 +128,4 @@ template <> void dump(cset const & cs, std::string & out); - -// Some helpers. - -inline split_path -internal_string_to_split_path(std::string const & str) -{ - split_path sp; - file_path_internal(str).split(sp); - return sp; -} - #endif // __CSET_HH__ ============================================================ --- paths.cc 31e473f534a09b58cab1b815a4aaf533115c9265 +++ paths.cc b77cfcbc73e191883145492b2c7517f09365ce7f @@ -113,21 +113,50 @@ static inline bool bad_component(std::string const & component) { - if (component == "") + static const std::string dot("."); + static const std::string dotdot(".."); + if (component.empty()) return true; - if (component == ".") + if (component == dot) return true; - if (component == "..") + if (component == dotdot) return true; return false; } static inline bool -fully_normalized_path(std::string const & path) +has_bad_chars(std::string const & path) { - // FIXME: probably should make this a 256-byte static lookup table - const static std::string bad_chars = std::string("\\") + constants::illegal_path_bytes + std::string(1, '\0'); - + static bool bad_chars_init(false); + static u8 bad_table[128] = {0}; + if (UNLIKELY(!bad_chars_init)) + { + std::string bad_chars = std::string("\\") + constants::illegal_path_bytes + std::string(1, '\0'); + for (std::string::const_iterator b = bad_chars.begin(); b != bad_chars.end(); b++) + { + u8 x = (u8)*b; + I((x) < sizeof(bad_table)); + bad_table[x] = 1; + } + bad_chars_init = true; + } + + for (std::string::const_iterator c = path.begin(); c != path.end(); c++) + { + u8 x = (u8)*c; + if (x < sizeof(bad_table) && bad_table[x]) + return true; + } + return false; +} + +// fully_normalized_path performs very similar function to file_path.split(). +// if want_split is set, split_path will be filled with the '/' separated +// components of the path. +static inline bool +fully_normalized_path_split(std::string const & path, bool want_split, + split_path & sp) +{ // empty path is fine if (path.empty()) return true; @@ -136,7 +165,7 @@ if (path.size() > 1 && path[1] == ':') return false; // first scan for completely illegal bytes - if (path.find_first_of(bad_chars) != std::string::npos) + if (has_bad_chars(path)) return false; // now check each component std::string::size_type start, stop; @@ -146,17 +175,30 @@ stop = path.find('/', start); if (stop == std::string::npos) { - if (bad_component(path.substr(start))) + std::string const & s(path.substr(start)); + if (bad_component(s)) return false; + if (want_split) + sp.push_back(s); break; } - if (bad_component(path.substr(start, stop - start))) + std::string const & s(path.substr(start, stop - start)); + if (bad_component(s)) return false; + if (want_split) + sp.push_back(s); start = stop + 1; } return true; } +static inline bool +fully_normalized_path(std::string const & path) +{ + split_path sp; + return fully_normalized_path_split(path, false, sp); +} + // This function considers _MTN, _MTn, _MtN, _mtn etc. to all be bookkeeping // paths, because on case insensitive filesystems, files put in any of them // may end up in _MTN instead. This allows arbitrary code execution. A @@ -189,6 +231,19 @@ && !in_bookkeeping_dir(path)); } +// equivalent to file_path_internal(path).split(sp), but +// avoids splitting the string twice +void +internal_string_to_split_path(std::string const & path, split_path & sp) +{ + I(utf8_validate(path)); + I(!in_bookkeeping_dir(path)); + sp.clear(); + sp.reserve(8); + sp.push_back(the_null_component); + I(fully_normalized_path_split(path, true, sp)); +} + file_path::file_path(file_path::source_type type, std::string const & path) { MM(path); ============================================================ --- paths.hh efc61a747969595d633387c4e3bb0705b7cd4a10 +++ paths.hh 97283f2f57949e69eee43772c097e1e1707481bd @@ -254,4 +254,7 @@ typedef std::set path_set; +// equivalent to file_path_internal(path).split(sp) but more efficient. +void internal_string_to_split_path(std::string const & path, split_path & sp); + #endif ============================================================ --- roster.cc 9d8a98859ba806ec4946bac03e274b349f149696 +++ roster.cc f8dadb5487d8385b6572fdc658dd30f1c4481e3f @@ -2516,7 +2516,9 @@ else { I(!pth.empty()); - attach_node(n->self, internal_string_to_split_path(pth)); + split_path sp; + internal_string_to_split_path(pth, sp); + attach_node(n->self, sp); } // Non-dormant attrs ============================================================ --- vocab.cc 7cf1d84a1a30eebaeb5edfbb8c3a46d1598d56a4 +++ vocab.cc f74bcb1c80f9c49a91923861feaef0b8693da0c6 @@ -61,7 +61,7 @@ if (val.ok) return; - if (val() == "") + if (val().empty()) return; N(val().size() == constants::idlen,