#
#
# patch "ChangeLog"
# from [1a0ea70e9c673c115a12bc000804d467b71dd570]
# to [44bceb604bcb5b776129356bacdb9f793cd53403]
#
# patch "cset.hh"
# from [7bd817258ac15fa2b6e4c3473e9b97858721969d]
# to [2d06997a4c2475aa45550dbe967c09397575d908]
#
# patch "paths.cc"
# from [31e473f534a09b58cab1b815a4aaf533115c9265]
# to [b77cfcbc73e191883145492b2c7517f09365ce7f]
#
# patch "paths.hh"
# from [efc61a747969595d633387c4e3bb0705b7cd4a10]
# to [97283f2f57949e69eee43772c097e1e1707481bd]
#
# patch "roster.cc"
# from [9d8a98859ba806ec4946bac03e274b349f149696]
# to [f8dadb5487d8385b6572fdc658dd30f1c4481e3f]
#
# patch "vocab.cc"
# from [7cf1d84a1a30eebaeb5edfbb8c3a46d1598d56a4]
# to [f74bcb1c80f9c49a91923861feaef0b8693da0c6]
#
============================================================
--- ChangeLog 1a0ea70e9c673c115a12bc000804d467b71dd570
+++ ChangeLog 44bceb604bcb5b776129356bacdb9f793cd53403
@@ -1,3 +1,12 @@
+2006-05-17 Matt Johnston
+
+ * paths.cc (internal_string_to_split_path): move from cset.hh to
+ here, make it share the path splitting with fully_normalized_path().
+ (has_bad_chars): use lookup table to check bad chars.
+ (bad_component): don't compare string vs char* every time.
+ * roster.cc, cset.hh: internal_string_to_split_path changes
+ * vocab.cc (verify(hexenc): .empty() rather than == ""
+
2006-05-16 Matt Johnston
* roster.cc (parse_from): don't use lexical_cast, avoid copying a
============================================================
--- cset.hh 7bd817258ac15fa2b6e4c3473e9b97858721969d
+++ cset.hh 2d06997a4c2475aa45550dbe967c09397575d908
@@ -128,15 +128,4 @@
template <> void
dump(cset const & cs, std::string & out);
-
-// Some helpers.
-
-inline split_path
-internal_string_to_split_path(std::string const & str)
-{
- split_path sp;
- file_path_internal(str).split(sp);
- return sp;
-}
-
#endif // __CSET_HH__
============================================================
--- paths.cc 31e473f534a09b58cab1b815a4aaf533115c9265
+++ paths.cc b77cfcbc73e191883145492b2c7517f09365ce7f
@@ -113,21 +113,50 @@
static inline bool
bad_component(std::string const & component)
{
- if (component == "")
+ static const std::string dot(".");
+ static const std::string dotdot("..");
+ if (component.empty())
return true;
- if (component == ".")
+ if (component == dot)
return true;
- if (component == "..")
+ if (component == dotdot)
return true;
return false;
}
static inline bool
-fully_normalized_path(std::string const & path)
+has_bad_chars(std::string const & path)
{
- // FIXME: probably should make this a 256-byte static lookup table
- const static std::string bad_chars = std::string("\\") + constants::illegal_path_bytes + std::string(1, '\0');
-
+ static bool bad_chars_init(false);
+ static u8 bad_table[128] = {0};
+ if (UNLIKELY(!bad_chars_init))
+ {
+ std::string bad_chars = std::string("\\") + constants::illegal_path_bytes + std::string(1, '\0');
+ for (std::string::const_iterator b = bad_chars.begin(); b != bad_chars.end(); b++)
+ {
+ u8 x = (u8)*b;
+ I((x) < sizeof(bad_table));
+ bad_table[x] = 1;
+ }
+ bad_chars_init = true;
+ }
+
+ for (std::string::const_iterator c = path.begin(); c != path.end(); c++)
+ {
+ u8 x = (u8)*c;
+ if (x < sizeof(bad_table) && bad_table[x])
+ return true;
+ }
+ return false;
+}
+
+// fully_normalized_path performs very similar function to file_path.split().
+// if want_split is set, split_path will be filled with the '/' separated
+// components of the path.
+static inline bool
+fully_normalized_path_split(std::string const & path, bool want_split,
+ split_path & sp)
+{
// empty path is fine
if (path.empty())
return true;
@@ -136,7 +165,7 @@
if (path.size() > 1 && path[1] == ':')
return false;
// first scan for completely illegal bytes
- if (path.find_first_of(bad_chars) != std::string::npos)
+ if (has_bad_chars(path))
return false;
// now check each component
std::string::size_type start, stop;
@@ -146,17 +175,30 @@
stop = path.find('/', start);
if (stop == std::string::npos)
{
- if (bad_component(path.substr(start)))
+ std::string const & s(path.substr(start));
+ if (bad_component(s))
return false;
+ if (want_split)
+ sp.push_back(s);
break;
}
- if (bad_component(path.substr(start, stop - start)))
+ std::string const & s(path.substr(start, stop - start));
+ if (bad_component(s))
return false;
+ if (want_split)
+ sp.push_back(s);
start = stop + 1;
}
return true;
}
+static inline bool
+fully_normalized_path(std::string const & path)
+{
+ split_path sp;
+ return fully_normalized_path_split(path, false, sp);
+}
+
// This function considers _MTN, _MTn, _MtN, _mtn etc. to all be bookkeeping
// paths, because on case insensitive filesystems, files put in any of them
// may end up in _MTN instead. This allows arbitrary code execution. A
@@ -189,6 +231,19 @@
&& !in_bookkeeping_dir(path));
}
+// equivalent to file_path_internal(path).split(sp), but
+// avoids splitting the string twice
+void
+internal_string_to_split_path(std::string const & path, split_path & sp)
+{
+ I(utf8_validate(path));
+ I(!in_bookkeeping_dir(path));
+ sp.clear();
+ sp.reserve(8);
+ sp.push_back(the_null_component);
+ I(fully_normalized_path_split(path, true, sp));
+}
+
file_path::file_path(file_path::source_type type, std::string const & path)
{
MM(path);
============================================================
--- paths.hh efc61a747969595d633387c4e3bb0705b7cd4a10
+++ paths.hh 97283f2f57949e69eee43772c097e1e1707481bd
@@ -254,4 +254,7 @@
typedef std::set path_set;
+// equivalent to file_path_internal(path).split(sp) but more efficient.
+void internal_string_to_split_path(std::string const & path, split_path & sp);
+
#endif
============================================================
--- roster.cc 9d8a98859ba806ec4946bac03e274b349f149696
+++ roster.cc f8dadb5487d8385b6572fdc658dd30f1c4481e3f
@@ -2516,7 +2516,9 @@
else
{
I(!pth.empty());
- attach_node(n->self, internal_string_to_split_path(pth));
+ split_path sp;
+ internal_string_to_split_path(pth, sp);
+ attach_node(n->self, sp);
}
// Non-dormant attrs
============================================================
--- vocab.cc 7cf1d84a1a30eebaeb5edfbb8c3a46d1598d56a4
+++ vocab.cc f74bcb1c80f9c49a91923861feaef0b8693da0c6
@@ -61,7 +61,7 @@
if (val.ok)
return;
- if (val() == "")
+ if (val().empty())
return;
N(val().size() == constants::idlen,