# # # add_file "paths.cc" # content [dde6659e387c8890c96278ea29e3196bdf588139] # ============================================================ --- paths.cc dde6659e387c8890c96278ea29e3196bdf588139 +++ paths.cc dde6659e387c8890c96278ea29e3196bdf588139 @@ -0,0 +1,1948 @@ +// Copyright (C) 2005 Nathaniel Smith +// +// This program is made available under the GNU GPL version 2.0 or +// greater. See the accompanying file COPYING for details. +// +// This program is distributed WITHOUT ANY WARRANTY; without even the +// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. + +#include "base.hh" +#include + +#include "paths.hh" +#include "file_io.hh" +#include "charset.hh" +#include "lua.hh" + +using std::exception; +using std::ostream; +using std::ostringstream; +using std::string; +using std::vector; + +// some structure to ensure we aren't doing anything broken when resolving +// filenames. the idea is to make sure +// -- we don't depend on the existence of something before it has been set +// -- we don't re-set something that has already been used +// -- sometimes, we use the _non_-existence of something, so we shouldn't +// set anything whose un-setted-ness has already been used +template +struct access_tracker +{ + void set(T const & val, bool may_be_initialized) + { + I(may_be_initialized || !initialized); + I(!very_uninitialized); + I(!used); + initialized = true; + value = val; + } + T const & get() + { + I(initialized); + used = true; + return value; + } + T const & get_but_unused() + { + I(initialized); + return value; + } + void may_not_initialize() + { + I(!initialized); + very_uninitialized = true; + } + // for unit tests + void unset() + { + used = initialized = very_uninitialized = false; + } + T value; + bool initialized, used, very_uninitialized; + access_tracker() : initialized(false), used(false), very_uninitialized(false) {}; +}; + +// paths to use in interpreting paths from various sources, +// conceptually: +// working_root / initial_rel_path == initial_abs_path + +// initial_abs_path is for interpreting relative system_path's +static access_tracker initial_abs_path; +// initial_rel_path is for interpreting external file_path's +// we used to make it a file_path, but then you can't run monotone from +// inside the _MTN/ dir (even when referring to files outside the _MTN/ +// dir). use of a bare string requires some caution but does work. +static access_tracker initial_rel_path; +// working_root is for converting file_path's and bookkeeping_path's to +// system_path's. +static access_tracker working_root; + +void +save_initial_path() +{ + // FIXME: BUG: this only works if the current working dir is in utf8 + initial_abs_path.set(system_path(get_current_working_dir()), false); + L(FL("initial abs path is: %s") % initial_abs_path.get_but_unused()); +} + +/////////////////////////////////////////////////////////////////////////// +// verifying that internal paths are indeed normalized. +// this code must be superfast +/////////////////////////////////////////////////////////////////////////// + +// normalized means: +// -- / as path separator +// -- not an absolute path (on either posix or win32) +// operationally, this means: first character != '/', first character != '\', +// second character != ':' +// -- no illegal characters +// -- 0x00 -- 0x1f, 0x7f, \ are the illegal characters. \ is illegal +// unconditionally to prevent people checking in files on posix that +// have a different interpretation on win32 +// -- (may want to allow 0x0a and 0x0d (LF and CR) in the future, but this +// is blocked on manifest format changing) +// (also requires changes to 'automate inventory', possibly others, to +// handle quoting) +// -- no doubled /'s +// -- no trailing / +// -- no "." or ".." path components + +static inline bool +bad_component(string const & component) +{ + if (component.empty()) + return true; + if (component == ".") + return true; + if (component == "..") + return true; + return false; +} + +static inline bool +has_bad_chars(string const & path) +{ + for (string::const_iterator c = path.begin(); LIKELY(c != path.end()); c++) + { + // char is often a signed type; convert to unsigned to ensure that + // bytes 0x80-0xff are considered > 0x1f. + u8 x = (u8)*c; + // 0x5c is '\\'; we use the hex constant to make the dependency on + // ASCII encoding explicit. + if (UNLIKELY(x <= 0x1f || x == 0x5c || x == 0x7f)) + return true; + } + return false; +} + +// as above, but disallows / as well. +static inline bool +has_bad_component_chars(string const & pc) +{ + for (string::const_iterator c = pc.begin(); LIKELY(c != pc.end()); c++) + { + // char is often a signed type; convert to unsigned to ensure that + // bytes 0x80-0xff are considered > 0x1f. + u8 x = (u8)*c; + // 0x2f is '/' and 0x5c is '\\'; we use hex constants to make the + // dependency on ASCII encoding explicit. + if (UNLIKELY(x <= 0x1f || x == 0x2f || x == 0x5c || x == 0x7f)) + return true; + } + return false; + +} + +static bool +is_absolute_here(string const & path) +{ + if (path.empty()) + return false; + if (path[0] == '/') + return true; +#ifdef WIN32 + if (path[0] == '\\') + return true; + if (path.size() > 1 && path[1] == ':') + return true; +#endif + return false; +} + +static inline bool +is_absolute_somewhere(string const & path) +{ + if (path.empty()) + return false; + if (path[0] == '/') + return true; + if (path[0] == '\\') + return true; + if (path.size() > 1 && path[1] == ':') + return true; + return false; +} + +// fully_normalized_path verifies a complete pathname for validity and +// having been properly normalized (as if by normalize_path, below). +static inline bool +fully_normalized_path(string const & path) +{ + // empty path is fine + if (path.empty()) + return true; + // could use is_absolute_somewhere, but this is the only part of it that + // wouldn't be redundant + if (path.size() > 1 && path[1] == ':') + return false; + // first scan for completely illegal bytes + if (has_bad_chars(path)) + return false; + // now check each component + string::size_type start = 0, stop; + while (1) + { + stop = path.find('/', start); + if (stop == string::npos) + break; + string const & s(path.substr(start, stop - start)); + if (bad_component(s)) + return false; + start = stop + 1; + } + + string const & s(path.substr(start)); + return !bad_component(s); +} + +// This function considers _MTN, _MTn, _MtN, _mtn etc. to all be bookkeeping +// paths, because on case insensitive filesystems, files put in any of them +// may end up in _MTN instead. This allows arbitrary code execution. A +// better solution would be to fix this in the working directory writing +// code -- this prevents all-unix projects from naming things "_mtn", which +// is less rude than when the bookkeeping root was "MT", but still rude -- +// but as a temporary security kluge it works. +static inline bool +in_bookkeeping_dir(string const & path) +{ + if (path.size() == 0 || (path[0] != '_')) + return false; + if (path.size() == 1 || (path[1] != 'M' && path[1] != 'm')) + return false; + if (path.size() == 2 || (path[2] != 'T' && path[2] != 't')) + return false; + if (path.size() == 3 || (path[3] != 'N' && path[3] != 'n')) + return false; + // if we've gotten here, the first three letters are _, M, T, and N, in + // either upper or lower case. So if that is the whole path, or else if it + // continues but the next character is /, then this is a bookkeeping path. + if (path.size() == 4 || (path[4] == '/')) + return true; + return false; +} + +static inline bool +is_valid_internal(string const & path) +{ + return (fully_normalized_path(path) + && !in_bookkeeping_dir(path)); +} + +static string +normalize_path(string const & in) +{ + string inT = in; + string leader; + MM(inT); + +#ifdef WIN32 + // the first thing we do is kill all the backslashes + for (string::iterator i = inT.begin(); i != inT.end(); i++) + if (*i == '\\') + *i = '/'; +#endif + + if (is_absolute_here (inT)) + { + if (inT[0] == '/') + { + leader = "/"; + inT = inT.substr(1); + + if (inT.size() > 0 && inT[0] == '/') + { + // if there are exactly two slashes at the beginning they + // are both preserved. three or more are the same as one. + string::size_type f = inT.find_first_not_of("/"); + if (f == string::npos) + f = inT.size(); + if (f == 1) + leader = "//"; + inT = inT.substr(f); + } + } +#ifdef WIN32 + else + { + I(inT[1] == ':'); + if (inT.size() > 2 && inT[2] == '/') + { + leader = inT.substr(0, 3); + inT = inT.substr(3); + } + else + { + leader = inT.substr(0, 2); + inT = inT.substr(2); + } + } +#endif + + I(!is_absolute_here(inT)); + if (inT.size() == 0) + return leader; + } + + vector stack; + string::const_iterator head, tail; + string::size_type size_estimate = leader.size(); + for (head = inT.begin(); head != inT.end(); head = tail) + { + tail = head; + while (tail != inT.end() && *tail != '/') + tail++; + + string elt(head, tail); + while (tail != inT.end() && *tail == '/') + tail++; + + if (elt == ".") + continue; + // remove foo/.. element pairs; leave leading .. components alone + if (elt == ".." && !stack.empty() && stack.back() != "..") + { + stack.pop_back(); + continue; + } + + size_estimate += elt.size() + 1; + stack.push_back(elt); + } + + leader.reserve(size_estimate); + for (vector::const_iterator i = stack.begin(); i != stack.end(); i++) + { + if (i != stack.begin()) + leader += "/"; + leader += *i; + } + return leader; +} + +LUAEXT(normalize_path, ) +{ + const char *pathstr = luaL_checkstring(L, -1); + N(pathstr, F("%s called with an invalid parameter") % "normalize_path"); + + lua_pushstring(L, normalize_path(string(pathstr)).c_str()); + return 1; +} + +static void +normalize_external_path(string const & path, string & normalized) +{ + if (!initial_rel_path.initialized) + { + // we are not in a workspace; treat this as an internal + // path, and set the access_tracker() into a very uninitialised + // state so that we will hit an exception if we do eventually + // enter a workspace + initial_rel_path.may_not_initialize(); + normalized = path; + N(is_valid_internal(path), + F("path '%s' is invalid") % path); + } + else + { + N(!path.empty(), F("empty path '%s' is invalid") % path); + N(!is_absolute_here(path), F("absolute path '%s' is invalid") % path); + string base; + try + { + base = initial_rel_path.get(); + if (base == "") + normalized = normalize_path(path); + else + normalized = normalize_path(base + "/" + path); + } + catch (exception &) + { + N(false, F("path '%s' is invalid") % path); + } + if (normalized == ".") + normalized = string(""); + N(fully_normalized_path(normalized), + F("path '%s' is invalid") % normalized); + } +} + +/////////////////////////////////////////////////////////////////////////// +// single path component handling. +/////////////////////////////////////////////////////////////////////////// + +// these constructors confirm that what they are passed is a legitimate +// component. note that the empty string is a legitimate component, +// but is not acceptable to bad_component (above) and therefore we have +// to open-code most of those checks. +path_component::path_component(utf8 const & d) + : data(d()) +{ + MM(data); + I(!has_bad_component_chars(data) && data != "." && data != ".."); +} + +path_component::path_component(string const & d) + : data(d) +{ + MM(data); + I(utf8_validate(utf8(data)) + && !has_bad_component_chars(data) + && data != "." && data != ".."); +} + +path_component::path_component(char const * d) + : data(d) +{ + MM(data); + I(utf8_validate(utf8(data)) + && !has_bad_component_chars(data) + && data != "." && data != ".."); +} + +std::ostream & operator<<(std::ostream & s, path_component const & pc) +{ + return s << pc(); +} + +template <> void dump(path_component const & pc, std::string & to) +{ + to = pc(); +} + +/////////////////////////////////////////////////////////////////////////// +// complete paths to files within a working directory +/////////////////////////////////////////////////////////////////////////// + +file_path::file_path(file_path::source_type type, string const & path) +{ + MM(path); + I(utf8_validate(utf8(path))); + if (type == external) + { + string normalized; + normalize_external_path(path, normalized); + N(!in_bookkeeping_dir(normalized), + F("path '%s' is in bookkeeping dir") % normalized); + data = normalized; + } + else + data = path; + MM(data); + I(is_valid_internal(data)); +} + +file_path::file_path(file_path::source_type type, utf8 const & path) +{ + MM(path); + I(utf8_validate(path)); + if (type == external) + { + string normalized; + normalize_external_path(path(), normalized); + N(!in_bookkeeping_dir(normalized), + F("path '%s' is in bookkeeping dir") % normalized); + data = normalized; + } + else + data = path(); + MM(data); + I(is_valid_internal(data)); +} + +bookkeeping_path::bookkeeping_path(string const & path) +{ + I(fully_normalized_path(path)); + I(in_bookkeeping_dir(path)); + data = path; +} + +bool +bookkeeping_path::external_string_is_bookkeeping_path(utf8 const & path) +{ + // FIXME: this charset casting everywhere is ridiculous + string normalized; + normalize_external_path(path(), normalized); + return internal_string_is_bookkeeping_path(utf8(normalized)); +} +bool bookkeeping_path::internal_string_is_bookkeeping_path(utf8 const & path) +{ + return in_bookkeeping_dir(path()); +} + +/////////////////////////////////////////////////////////////////////////// +// splitting/joining +// this code must be superfast +// it depends very much on knowing that it can only be applied to fully +// normalized, relative, paths. +/////////////////////////////////////////////////////////////////////////// + +// this peels off the last component of any path and returns it. +// the last component of a path with no slashes in it is the complete path. +// the last component of a path referring to the root directory is an +// empty string. +path_component +any_path::basename() const +{ + string const & s = data; + string::size_type sep = s.rfind('/'); +#ifdef WIN32 + if (sep == string::npos && s.size()>= 2 && s[1] == ':') + sep = 1; +#endif + if (sep == string::npos) + return path_component(s, 0); // force use of short circuit + if (sep == s.size()) + return path_component(); + return path_component(s, sep + 1); +} + +// this returns all but the last component of any path. It has to take +// care at the root. +any_path +any_path::dirname() const +{ + string const & s = data; + string::size_type sep = s.rfind('/'); +#ifdef WIN32 + if (sep == string::npos && s.size()>= 2 && s[1] == ':') + sep = 1; +#endif + if (sep == string::npos) + return any_path(); + + // dirname() of the root directory is itself + if (sep == s.size() - 1) + return *this; + + // dirname() of a direct child of the root is the root + if (sep == 0 || (sep == 1 && s[1] == '/') +#ifdef WIN32 + || (sep == 1 || sep == 2 && s[1] == ':') +#endif + ) + return any_path(s, 0, sep+1); + + return any_path(s, 0, sep); +} + +// these variations exist to get the return type right. also, +// file_path dirname() can be a little simpler. +file_path +file_path::dirname() const +{ + string const & s = data; + string::size_type sep = s.rfind('/'); + if (sep == string::npos) + return file_path(); + return file_path(s, 0, sep); +} + +system_path +system_path::dirname() const +{ + string const & s = data; + string::size_type sep = s.rfind('/'); +#ifdef WIN32 + if (sep == string::npos && s.size()>= 2 && s[1] == ':') + sep = 1; +#endif + I(sep != string::npos); + + // dirname() of the root directory is itself + if (sep == s.size() - 1) + return *this; + + // dirname() of a direct child of the root is the root + if (sep == 0 || (sep == 1 && s[1] == '/') +#ifdef WIN32 + || (sep == 1 || sep == 2 && s[1] == ':') +#endif + ) + return system_path(s, 0, sep+1); + + return system_path(s, 0, sep); +} + + +// produce dirname and basename at the same time +void +file_path::dirname_basename(file_path & dir, path_component & base) const +{ + string const & s = data; + string::size_type sep = s.rfind('/'); + if (sep == string::npos) + { + dir = file_path(); + base = path_component(s, 0); + } + else + { + I(sep < s.size() - 1); // last component must have at least one char + dir = file_path(s, 0, sep); + base = path_component(s, sep + 1); + } +} + +// count the number of /-separated components of the path. +unsigned int +file_path::depth() const +{ + if (data.empty()) + return 0; + + unsigned int components = 1; + for (string::const_iterator p = data.begin(); p != data.end(); p++) + if (*p == '/') + components++; + + return components; +} + +/////////////////////////////////////////////////////////////////////////// +// localizing file names (externalizing them) +// this code must be superfast when there is no conversion needed +/////////////////////////////////////////////////////////////////////////// + +string +any_path::as_external() const +{ +#ifdef __APPLE__ + // on OS X paths for the filesystem/kernel are UTF-8 encoded, regardless of + // locale. + return data; +#else + // on normal systems we actually have some work to do, alas. + // not much, though, because utf8_to_system_string does all the hard work. + // it is carefully optimized. do not screw it up. + external out; + utf8_to_system_strict(utf8(data), out); + return out(); +#endif +} + +/////////////////////////////////////////////////////////////////////////// +// writing out paths +/////////////////////////////////////////////////////////////////////////// + +ostream & +operator <<(ostream & o, any_path const & a) +{ + o << a.as_internal(); + return o; +} + +template <> +void dump(file_path const & p, string & out) +{ + ostringstream oss; + oss << p << '\n'; + out = oss.str(); +} + +template <> +void dump(system_path const & p, string & out) +{ + ostringstream oss; + oss << p << '\n'; + out = oss.str(); +} + +template <> +void dump(bookkeeping_path const & p, string & out) +{ + ostringstream oss; + oss << p << '\n'; + out = oss.str(); +} + +/////////////////////////////////////////////////////////////////////////// +// path manipulation +// this code's speed does not matter much +/////////////////////////////////////////////////////////////////////////// + +// relies on its arguments already being validated, except that you may not +// append the empty path component, and if you are appending to the empty +// path, you may not create an absolute path or a path into the bookkeeping +// directory. +file_path +file_path::operator /(path_component const & to_append) const +{ + I(!to_append.empty()); + if (empty()) + { + string const & s = to_append(); + I(!is_absolute_somewhere(s) && !in_bookkeeping_dir(s)); + return file_path(s, 0, string::npos); + } + else + return file_path(((*(data.end() - 1) == '/') ? data : data + "/") + + to_append(), 0, string::npos); +} + +// similarly, but even less checking is needed. +file_path +file_path::operator /(file_path const & to_append) const +{ + I(!to_append.empty()); + if (empty()) + return to_append; + return file_path(((*(data.end() - 1) == '/') ? data : data + "/") + + to_append.as_internal(), 0, string::npos); +} + +bookkeeping_path +bookkeeping_path::operator /(path_component const & to_append) const +{ + I(!to_append.empty()); + I(!empty()); + return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/") + + to_append(), 0, string::npos); +} + +system_path +system_path::operator /(path_component const & to_append) const +{ + I(!to_append.empty()); + I(!empty()); + return system_path(((*(data.end() - 1) == '/') ? data : data + "/") + + to_append(), 0, string::npos); +} + +any_path +any_path::operator /(path_component const & to_append) const +{ + I(!to_append.empty()); + I(!empty()); + return any_path(((*(data.end() - 1) == '/') ? data : data + "/") + + to_append(), 0, string::npos); +} + +// these take strings and validate +bookkeeping_path +bookkeeping_path::operator /(char const * to_append) const +{ + I(!is_absolute_somewhere(to_append)); + I(!empty()); + return bookkeeping_path(((*(data.end() - 1) == '/') ? data : data + "/") + + to_append); +} + +system_path +system_path::operator /(char const * to_append) const +{ + I(!empty()); + I(!is_absolute_here(to_append)); + return system_path(((*(data.end() - 1) == '/') ? data : data + "/") + + to_append); +} + +/////////////////////////////////////////////////////////////////////////// +// system_path +/////////////////////////////////////////////////////////////////////////// + +system_path::system_path(any_path const & other, bool in_true_workspace) +{ + if (is_absolute_here(other.as_internal())) + // another system_path. the normalizing isn't really necessary, but it + // makes me feel warm and fuzzy. + data = normalize_path(other.as_internal()); + else + { + system_path wr; + if (in_true_workspace) + wr = working_root.get(); + else + wr = working_root.get_but_unused(); + data = normalize_path(wr.as_internal() + "/" + other.as_internal()); + } +} + +static inline string const_system_path(utf8 const & path) +{ + N(!path().empty(), F("invalid path ''")); + string expanded = tilde_expand(path()); + if (is_absolute_here(expanded)) + return normalize_path(expanded); + else + return normalize_path(initial_abs_path.get().as_internal() + + "/" + path()); +} + +system_path::system_path(string const & path) +{ + data = const_system_path(utf8(path)); +} + +system_path::system_path(utf8 const & path) +{ + data = const_system_path(utf8(path)); +} + +/////////////////////////////////////////////////////////////////////////// +// workspace (and path root) handling +/////////////////////////////////////////////////////////////////////////// + +static bool +find_bookdir(system_path const & root, path_component const & bookdir, + system_path & current, string & removed) +{ + current = initial_abs_path.get(); + removed.clear(); + + // check that the current directory is below the specified search root + if (current.as_internal().find(root.as_internal()) != 0) + { + W(F("current directory '%s' is not below root '%s'") % current % root); + return false; + } + + L(FL("searching for '%s' directory with root '%s'") % bookdir % root); + + system_path check; + while (!(current == root)) + { + check = current / bookdir; + switch (get_path_status(check)) + { + case path::nonexistent: + L(FL("'%s' not found in '%s' with '%s' removed") + % bookdir % current % removed); + if (removed.empty()) + removed = current.basename()(); + else + removed = current.basename()() + "/" + removed; + current = current.dirname(); + continue; + + case path::file: + L(FL("'%s' is not a directory") % check); + return false; + + case path::directory: + goto found; + } + } + + // if we get here, we have hit the root; try once more + check = current / bookdir; + switch (get_path_status(check)) + { + case path::nonexistent: + L(FL("'%s' not found in '%s' with '%s' removed") + % bookdir % current % removed); + return false; + + case path::file: + L(FL("'%s' is not a directory") % check); + return false; + + case path::directory: + goto found; + } + return false; + + found: + // check for _MTN/. and _MTN/.. to see if mt dir is readable + try + { + if (!path_exists(check / ".") || !path_exists(check / "..")) + { + L(FL("problems with '%s' (missing '.' or '..')") % check); + return false; + } + } + catch(exception &) + { + L(FL("problems with '%s' (cannot check for '.' or '..')") % check); + return false; + } + return true; +} + + +bool +find_and_go_to_workspace(string const & search_root) +{ + system_path root, current; + string removed; + + if (search_root.empty()) + { +#ifdef WIN32 + std::string cur_str = get_current_working_dir(); + current = cur_str; + if (cur_str[0] == '/' || cur_str[0] == '\\') + { + if (cur_str.size() > 1 && (cur_str[1] == '/' || cur_str[1] == '\\')) + { + // UNC name + string::size_type uncend = cur_str.find_first_of("\\/", 2); + if (uncend == string::npos) + root = system_path(cur_str + "/"); + else + root = system_path(cur_str.substr(0, uncend)); + } + else + root = system_path("/"); + } + else if (cur_str.size() > 1 && cur_str[1] == ':') + { + root = system_path(cur_str.substr(0,2) + "/"); + } + else I(false); +#else + root = system_path("/"); +#endif + } + else + { + root = system_path(search_root); + L(FL("limiting search for workspace to %s") % root); + + require_path_is_directory(root, + F("search root '%s' does not exist") % root, + F("search root '%s' is not a directory") % root); + } + + // first look for the current name of the bookkeeping directory. + // if we don't find it, look for it under the old name, so that + // migration has a chance to work. + if (!find_bookdir(root, bookkeeping_root_component, current, removed)) + if (!find_bookdir(root, old_bookkeeping_root_component, current, removed)) + return false; + + working_root.set(current, true); + initial_rel_path.set(removed, true); + + L(FL("working root is '%s'") % working_root.get_but_unused()); + L(FL("initial relative path is '%s'") % initial_rel_path.get_but_unused()); + + change_current_working_dir(working_root.get_but_unused()); + + return true; +} + +void +go_to_workspace(system_path const & new_workspace) +{ + working_root.set(new_workspace, true); + initial_rel_path.set(string(), true); + change_current_working_dir(new_workspace); +} + +void +mark_std_paths_used(void) +{ + working_root.get(); + initial_rel_path.get(); +} + +/////////////////////////////////////////////////////////////////////////// +// tests +/////////////////////////////////////////////////////////////////////////// + +#ifdef BUILD_UNIT_TESTS +#include "unit_tests.hh" +#include "randomizer.hh" + +using std::logic_error; + +UNIT_TEST(paths, path_component) +{ + char const * const baddies[] = {".", + "..", + "/foo", + "\\foo", + "foo/bar", + "foo\\bar", + 0 }; + + // these would not be okay in a full file_path, but are okay here. + char const * const goodies[] = {"c:foo", + "_mtn", + "_mtN", + "_mTn", + "_Mtn", + "_MTn", + "_MtN", + "_MTN", + 0 }; + + + for (char const * const * c = baddies; *c; ++c) + { + // the comparison prevents the compiler from eliminating the + // expression. + UNIT_TEST_CHECK_THROW((path_component(*c)()) == *c, logic_error); + } + for (char const * const *c = goodies; *c; ++c) + { + path_component p(*c); + UNIT_TEST_CHECK_THROW(file_path() / p, logic_error); + } + + UNIT_TEST_CHECK_THROW(file_path_internal("foo") / path_component(), + logic_error); +} + + +UNIT_TEST(paths, file_path_internal) +{ + char const * const baddies[] = {"/foo", + "foo//bar", + "foo/../bar", + "../bar", + "_MTN", + "_MTN/blah", + "foo/bar/", + "foo/bar/.", + "foo/bar/./", + "foo/./bar", + "./foo", + ".", + "..", + "c:\\foo", + "c:foo", + "c:/foo", + // some baddies made bad by a security kluge -- + // see the comment in in_bookkeeping_dir + "_mtn", + "_mtN", + "_mTn", + "_Mtn", + "_MTn", + "_MtN", + "_mTN", + "_mtn/foo", + "_mtN/foo", + "_mTn/foo", + "_Mtn/foo", + "_MTn/foo", + "_MtN/foo", + "_mTN/foo", + 0 }; + initial_rel_path.unset(); + initial_rel_path.set(string(), true); + for (char const * const * c = baddies; *c; ++c) + { + UNIT_TEST_CHECK_THROW(file_path_internal(*c), logic_error); + } + initial_rel_path.unset(); + initial_rel_path.set("blah/blah/blah", true); + for (char const * const * c = baddies; *c; ++c) + { + UNIT_TEST_CHECK_THROW(file_path_internal(*c), logic_error); + } + + UNIT_TEST_CHECK(file_path().empty()); + UNIT_TEST_CHECK(file_path_internal("").empty()); + + char const * const goodies[] = {"", + "a", + "foo", + "foo/bar/baz", + "foo/bar.baz", + "foo/with-hyphen/bar", + "foo/with_underscore/bar", + "foo/with,address@hidden/bar", + ".foo/bar", + "..foo/bar", + "_MTNfoo/bar", + "foo:bar", + 0 }; + + for (int i = 0; i < 2; ++i) + { + initial_rel_path.unset(); + initial_rel_path.set(i ? string() + : string("blah/blah/blah"), + true); + for (char const * const * c = goodies; *c; ++c) + { + file_path fp = file_path_internal(*c); + UNIT_TEST_CHECK(fp.as_internal() == *c); + UNIT_TEST_CHECK(file_path_internal(fp.as_internal()) == fp); + } + } + + initial_rel_path.unset(); +} + +static void check_fp_normalizes_to(char const * before, char const * after) +{ + L(FL("check_fp_normalizes_to: '%s' -> '%s'") % before % after); + file_path fp = file_path_external(utf8(before)); + L(FL(" (got: %s)") % fp); + UNIT_TEST_CHECK(fp.as_internal() == after); + UNIT_TEST_CHECK(file_path_internal(fp.as_internal()) == fp); + // we compare after to the external form too, since as far as we know + // relative normalized posix paths are always good win32 paths too + UNIT_TEST_CHECK(fp.as_external() == after); +} + +UNIT_TEST(paths, file_path_external_null_prefix) +{ + initial_rel_path.unset(); + initial_rel_path.set(string(), true); + + char const * const baddies[] = {"/foo", + "../bar", + "_MTN/blah", + "_MTN", + "//blah", + "\\foo", + "..", + "c:\\foo", + "c:foo", + "c:/foo", + "", + // some baddies made bad by a security kluge -- + // see the comment in in_bookkeeping_dir + "_mtn", + "_mtN", + "_mTn", + "_Mtn", + "_MTn", + "_MtN", + "_mTN", + "_mtn/foo", + "_mtN/foo", + "_mTn/foo", + "_Mtn/foo", + "_MTn/foo", + "_MtN/foo", + "_mTN/foo", + 0 }; + for (char const * const * c = baddies; *c; ++c) + { + L(FL("test_file_path_external_null_prefix: trying baddie: %s") % *c); + UNIT_TEST_CHECK_THROW(file_path_external(utf8(*c)), informative_failure); + } + + check_fp_normalizes_to("a", "a"); + check_fp_normalizes_to("foo", "foo"); + check_fp_normalizes_to("foo/bar", "foo/bar"); + check_fp_normalizes_to("foo/bar/baz", "foo/bar/baz"); + check_fp_normalizes_to("foo/bar.baz", "foo/bar.baz"); + check_fp_normalizes_to("foo/with-hyphen/bar", "foo/with-hyphen/bar"); + check_fp_normalizes_to("foo/with_underscore/bar", "foo/with_underscore/bar"); + check_fp_normalizes_to(".foo/bar", ".foo/bar"); + check_fp_normalizes_to("..foo/bar", "..foo/bar"); + check_fp_normalizes_to(".", ""); +#ifndef WIN32 + check_fp_normalizes_to("foo:bar", "foo:bar"); +#endif + check_fp_normalizes_to("foo/with,address@hidden/bar", + "foo/with,address@hidden/bar"); + + // Why are these tests with // in them commented out? because boost::fs + // sucks and can't normalize them. FIXME. + //check_fp_normalizes_to("foo//bar", "foo/bar"); + check_fp_normalizes_to("foo/../bar", "bar"); + check_fp_normalizes_to("foo/bar/", "foo/bar"); + check_fp_normalizes_to("foo/bar/.", "foo/bar"); + check_fp_normalizes_to("foo/bar/./", "foo/bar"); + check_fp_normalizes_to("foo/./bar/", "foo/bar"); + check_fp_normalizes_to("./foo", "foo"); + //check_fp_normalizes_to("foo///.//", "foo"); + + initial_rel_path.unset(); +} + +UNIT_TEST(paths, file_path_external_prefix__MTN) +{ + initial_rel_path.unset(); + initial_rel_path.set(string("_MTN"), true); + + UNIT_TEST_CHECK_THROW(file_path_external(utf8("foo")), informative_failure); + UNIT_TEST_CHECK_THROW(file_path_external(utf8(".")), informative_failure); + UNIT_TEST_CHECK_THROW(file_path_external(utf8("./blah")), informative_failure); + check_fp_normalizes_to("..", ""); + check_fp_normalizes_to("../foo", "foo"); +} + +UNIT_TEST(paths, file_path_external_prefix_a_b) +{ + initial_rel_path.unset(); + initial_rel_path.set(string("a/b"), true); + + char const * const baddies[] = {"/foo", + "../../../bar", + "../../..", + "../../_MTN", + "../../_MTN/foo", + "//blah", + "\\foo", + "c:\\foo", +#ifdef WIN32 + "c:foo", + "c:/foo", +#endif + "", + // some baddies made bad by a security kluge -- + // see the comment in in_bookkeeping_dir + "../../_mtn", + "../../_mtN", + "../../_mTn", + "../../_Mtn", + "../../_MTn", + "../../_MtN", + "../../_mTN", + "../../_mtn/foo", + "../../_mtN/foo", + "../../_mTn/foo", + "../../_Mtn/foo", + "../../_MTn/foo", + "../../_MtN/foo", + "../../_mTN/foo", + 0 }; + for (char const * const * c = baddies; *c; ++c) + { + L(FL("test_file_path_external_prefix_a_b: trying baddie: %s") % *c); + UNIT_TEST_CHECK_THROW(file_path_external(utf8(*c)), informative_failure); + } + + check_fp_normalizes_to("foo", "a/b/foo"); + check_fp_normalizes_to("a", "a/b/a"); + check_fp_normalizes_to("foo/bar", "a/b/foo/bar"); + check_fp_normalizes_to("foo/bar/baz", "a/b/foo/bar/baz"); + check_fp_normalizes_to("foo/bar.baz", "a/b/foo/bar.baz"); + check_fp_normalizes_to("foo/with-hyphen/bar", "a/b/foo/with-hyphen/bar"); + check_fp_normalizes_to("foo/with_underscore/bar", "a/b/foo/with_underscore/bar"); + check_fp_normalizes_to(".foo/bar", "a/b/.foo/bar"); + check_fp_normalizes_to("..foo/bar", "a/b/..foo/bar"); + check_fp_normalizes_to(".", "a/b"); +#ifndef WIN32 + check_fp_normalizes_to("foo:bar", "a/b/foo:bar"); +#endif + check_fp_normalizes_to("foo/with,address@hidden/bar", + "a/b/foo/with,address@hidden/bar"); + // why are the tests with // in them commented out? because boost::fs sucks + // and can't normalize them. FIXME. + //check_fp_normalizes_to("foo//bar", "a/b/foo/bar"); + check_fp_normalizes_to("foo/../bar", "a/b/bar"); + check_fp_normalizes_to("foo/bar/", "a/b/foo/bar"); + check_fp_normalizes_to("foo/bar/.", "a/b/foo/bar"); + check_fp_normalizes_to("foo/bar/./", "a/b/foo/bar"); + check_fp_normalizes_to("foo/./bar/", "a/b/foo/bar"); + check_fp_normalizes_to("./foo", "a/b/foo"); + //check_fp_normalizes_to("foo///.//", "a/b/foo"); + // things that would have been bad without the initial_rel_path: + check_fp_normalizes_to("../foo", "a/foo"); + check_fp_normalizes_to("..", "a"); + check_fp_normalizes_to("../..", ""); + check_fp_normalizes_to("_MTN/foo", "a/b/_MTN/foo"); + check_fp_normalizes_to("_MTN", "a/b/_MTN"); +#ifndef WIN32 + check_fp_normalizes_to("c:foo", "a/b/c:foo"); + check_fp_normalizes_to("c:/foo", "a/b/c:/foo"); +#endif + + initial_rel_path.unset(); +} + +UNIT_TEST(paths, basename) +{ + struct t + { + char const * in; + char const * out; + }; + // file_paths cannot be absolute, but may be the empty string. + struct t const fp_cases[] = { + { "", "" }, + { "foo", "foo" }, + { "foo/bar", "bar" }, + { "foo/bar/baz", "baz" }, + { 0, 0 } + }; + // bookkeeping_paths cannot be absolute and must start with the + // bookkeeping_root_component. + struct t const bp_cases[] = { + { "_MTN", "_MTN" }, + { "_MTN/foo", "foo" }, + { "_MTN/foo/bar", "bar" }, + { 0, 0 } + }; + + // system_paths must be absolute. this relies on the setting of + // initial_abs_path below. note that most of the cases whose full paths + // vary between Unix and Windows will still have the same basenames. + struct t const sp_cases[] = { + { "/", "" }, + { "//", "" }, + { "foo", "foo" }, + { "/foo", "foo" }, + { "//foo", "foo" }, + { "~/foo", "foo" }, + { "c:/foo", "foo" }, + { "foo/bar", "bar" }, + { "/foo/bar", "bar" }, + { "//foo/bar", "bar" }, + { "~/foo/bar", "bar" }, + { "c:/foo/bar", "bar" }, +#ifdef WIN32 + { "c:/", "" }, + { "c:foo", "foo" }, +#else + { "c:/", "c:" }, + { "c:foo", "c:foo" }, +#endif + { 0, 0 } + }; + + UNIT_TEST_CHECKPOINT("file_path basenames"); + for (struct t const *p = fp_cases; p->in; p++) + { + file_path fp = file_path_internal(p->in); + path_component pc(fp.basename()); + UNIT_TEST_CHECK_MSG(pc == path_component(p->out), + FL("basename('%s') = '%s' (expect '%s')") + % p->in % pc % p->out); + } + + UNIT_TEST_CHECKPOINT("bookkeeping_path basenames"); + for (struct t const *p = bp_cases; p->in; p++) + { + bookkeeping_path fp(p->in); + path_component pc(fp.basename()); + UNIT_TEST_CHECK_MSG(pc == path_component(p->out), + FL("basename('%s') = '%s' (expect '%s')") + % p->in % pc % p->out); + } + + + UNIT_TEST_CHECKPOINT("system_path basenames"); + + initial_abs_path.unset(); + initial_abs_path.set(system_path("/a/b"), true); + + for (struct t const *p = sp_cases; p->in; p++) + { + system_path fp(p->in); + path_component pc(fp.basename()); + UNIT_TEST_CHECK_MSG(pc == path_component(p->out), + FL("basename('%s') = '%s' (expect '%s')") + % p->in % pc % p->out); + } + + // any_path::basename() should return exactly the same thing that + // the corresponding specialized basename() does, but with type any_path. + UNIT_TEST_CHECKPOINT("any_path basenames"); + for (struct t const *p = fp_cases; p->in; p++) + { + any_path ap(file_path_internal(p->in)); + path_component pc(ap.basename()); + UNIT_TEST_CHECK_MSG(pc == path_component(p->out), + FL("basename('%s') = '%s' (expect '%s')") + % p->in % pc % p->out); + } + for (struct t const *p = bp_cases; p->in; p++) + { + any_path ap(bookkeeping_path(p->in)); + path_component pc(ap.basename()); + UNIT_TEST_CHECK_MSG(pc == path_component(p->out), + FL("basename('%s') = '%s' (expect '%s')") + % p->in % pc % p->out); + } + for (struct t const *p = sp_cases; p->in; p++) + { + any_path ap(system_path(p->in)); + path_component pc(ap.basename()); + UNIT_TEST_CHECK_MSG(pc == path_component(p->out), + FL("basename('%s') = '%s' (expect '%s')") + % p->in % pc % p->out); + } + + initial_abs_path.unset(); +} + +UNIT_TEST(paths, dirname) +{ + struct t + { + char const * in; + char const * out; + }; + // file_paths cannot be absolute, but may be the empty string. + struct t const fp_cases[] = { + { "", "" }, + { "foo", "" }, + { "foo/bar", "foo" }, + { "foo/bar/baz", "foo/bar" }, + { 0, 0 } + }; + + // system_paths must be absolute. this relies on the setting of + // initial_abs_path below. + struct t const sp_cases[] = { + { "/", "/" }, + { "//", "//" }, + { "foo", "/a/b" }, + { "/foo", "/" }, + { "//foo", "//" }, + { "~/foo", "~" }, + { "foo/bar", "/a/b/foo" }, + { "/foo/bar", "/foo" }, + { "//foo/bar", "//foo" }, + { "~/foo/bar", "~/foo" }, +#ifdef WIN32 + { "c:", "c:" }, + { "c:foo", "c:" }, + { "c:/", "c:/" }, + { "c:/foo", "c:/" }, + { "c:/foo/bar", "c:/foo" }, +#else + { "c:", "/a/b" }, + { "c:foo", "/a/b" }, + { "c:/", "/a/b" }, + { "c:/foo", "/a/b/c:" }, + { "c:/foo/bar", "/a/b/c:/foo" }, +#endif + { 0, 0 } + }; + + initial_abs_path.unset(); + + UNIT_TEST_CHECKPOINT("file_path dirnames"); + for (struct t const *p = fp_cases; p->in; p++) + { + file_path fp = file_path_internal(p->in); + file_path dn = fp.dirname(); + UNIT_TEST_CHECK_MSG(dn == file_path_internal(p->out), + FL("dirname('%s') = '%s' (expect '%s')") + % p->in % dn % p->out); + } + + + initial_abs_path.set(system_path("/a/b"), true); + UNIT_TEST_CHECKPOINT("system_path dirnames"); + for (struct t const *p = sp_cases; p->in; p++) + { + system_path fp(p->in); + system_path dn(fp.dirname()); + + UNIT_TEST_CHECK_MSG(dn == system_path(p->out), + FL("dirname('%s') = '%s' (expect '%s')") + % p->in % dn % p->out); + } + + // any_path::dirname() should return exactly the same thing that + // the corresponding specialized dirname() does, but with type any_path. + UNIT_TEST_CHECKPOINT("any_path dirnames"); + for (struct t const *p = fp_cases; p->in; p++) + { + any_path ap(file_path_internal(p->in)); + any_path dn(ap.dirname()); + any_path rf(file_path_internal(p->out)); + UNIT_TEST_CHECK_MSG(dn.as_internal() == rf.as_internal(), + FL("dirname('%s') = '%s' (expect '%s')") + % p->in % dn % rf); + } + for (struct t const *p = sp_cases; p->in; p++) + { + any_path ap(system_path(p->in)); + any_path dn(ap.dirname()); + any_path rf(system_path(p->out)); + UNIT_TEST_CHECK_MSG(dn.as_internal() == rf.as_internal(), + FL("dirname('%s') = '%s' (expect '%s')") + % p->in % dn % rf); + } + + initial_abs_path.unset(); +} + +UNIT_TEST(paths, depth) +{ + char const * const cases[] = {"", "foo", "foo/bar", "foo/bar/baz", 0}; + for (unsigned int i = 0; cases[i]; i++) + { + file_path fp = file_path_internal(cases[i]); + unsigned int d = fp.depth(); + UNIT_TEST_CHECK_MSG(d == i, + FL("depth('%s') = %d (expect %d)") % fp % d % i); + } +} + +static void check_bk_normalizes_to(char const * before, char const * after) +{ + bookkeeping_path bp(bookkeeping_root / before); + L(FL("normalizing %s to %s (got %s)") % before % after % bp); + UNIT_TEST_CHECK(bp.as_external() == after); + UNIT_TEST_CHECK(bookkeeping_path(bp.as_internal()).as_internal() == bp.as_internal()); +} + +UNIT_TEST(paths, bookkeeping) +{ + char const * const baddies[] = {"/foo", + "foo//bar", + "foo/../bar", + "../bar", + "foo/bar/", + "foo/bar/.", + "foo/bar/./", + "foo/./bar", + "./foo", + ".", + "..", + "c:\\foo", + "c:foo", + "c:/foo", + "", + "a:b", + 0 }; + string tmp_path_string; + + for (char const * const * c = baddies; *c; ++c) + { + L(FL("test_bookkeeping_path baddie: trying '%s'") % *c); + UNIT_TEST_CHECK_THROW(bookkeeping_path(tmp_path_string.assign(*c)), + logic_error); + UNIT_TEST_CHECK_THROW(bookkeeping_root / *c, logic_error); + } + + // these are legitimate as things to append to bookkeeping_root, but + // not as bookkeeping_paths in themselves. + UNIT_TEST_CHECK_THROW(bookkeeping_path("a"), logic_error); + UNIT_TEST_CHECK_NOT_THROW(bookkeeping_root / "a", logic_error); + UNIT_TEST_CHECK_THROW(bookkeeping_path("foo/bar"), logic_error); + UNIT_TEST_CHECK_NOT_THROW(bookkeeping_root / "foo/bar", logic_error); + + check_bk_normalizes_to("a", "_MTN/a"); + check_bk_normalizes_to("foo", "_MTN/foo"); + check_bk_normalizes_to("foo/bar", "_MTN/foo/bar"); + check_bk_normalizes_to("foo/bar/baz", "_MTN/foo/bar/baz"); +} + +static void check_system_normalizes_to(char const * before, char const * after) +{ + system_path sp(before); + L(FL("normalizing '%s' to '%s' (got '%s')") % before % after % sp); + UNIT_TEST_CHECK(sp.as_external() == after); + UNIT_TEST_CHECK(system_path(sp.as_internal()).as_internal() == sp.as_internal()); +} + +UNIT_TEST(paths, system) +{ + initial_abs_path.unset(); + initial_abs_path.set(system_path("/a/b"), true); + + UNIT_TEST_CHECK_THROW(system_path(""), informative_failure); + + check_system_normalizes_to("foo", "/a/b/foo"); + check_system_normalizes_to("foo/bar", "/a/b/foo/bar"); + check_system_normalizes_to("/foo/bar", "/foo/bar"); + check_system_normalizes_to("//foo/bar", "//foo/bar"); +#ifdef WIN32 + check_system_normalizes_to("c:foo", "c:foo"); + check_system_normalizes_to("c:/foo", "c:/foo"); + check_system_normalizes_to("c:\\foo", "c:/foo"); +#else + check_system_normalizes_to("c:foo", "/a/b/c:foo"); + check_system_normalizes_to("c:/foo", "/a/b/c:/foo"); + check_system_normalizes_to("c:\\foo", "/a/b/c:\\foo"); + check_system_normalizes_to("foo:bar", "/a/b/foo:bar"); +#endif + // we require that system_path normalize out ..'s, because of the following + // case: + // /work mkdir newdir + // /work$ cd newdir + // /work/newdir$ monotone setup --db=../foo.db + // Now they have either "/work/foo.db" or "/work/newdir/../foo.db" in + // _MTN/options + // /work/newdir$ cd .. + // /work$ mv newdir newerdir # better name + // Oops, now, if we stored the version with ..'s in, this workspace + // is broken. + check_system_normalizes_to("../foo", "/a/foo"); + check_system_normalizes_to("foo/..", "/a/b"); + check_system_normalizes_to("/foo/bar/..", "/foo"); + check_system_normalizes_to("/foo/..", "/"); + // can't do particularly interesting checking of tilde expansion, but at + // least we can check that it's doing _something_... + string tilde_expanded = system_path("~/foo").as_external(); +#ifdef WIN32 + UNIT_TEST_CHECK(tilde_expanded[1] == ':'); +#else + UNIT_TEST_CHECK(tilde_expanded[0] == '/'); +#endif + UNIT_TEST_CHECK(tilde_expanded.find('~') == string::npos); + // on Windows, ~name is not expanded +#ifdef WIN32 + UNIT_TEST_CHECK(system_path("~this_user_does_not_exist_anywhere") + .as_external() + == "/a/b/~this_user_does_not_exist_anywhere"); +#else + UNIT_TEST_CHECK_THROW(system_path("~this_user_does_not_exist_anywhere"), + informative_failure); +#endif + + // finally, make sure that the copy-from-any_path constructor works right + // in particular, it should interpret the paths it gets as being relative to + // the project root, not the initial path + working_root.unset(); + working_root.set(system_path("/working/root"), true); + initial_rel_path.unset(); + initial_rel_path.set(string("rel/initial"), true); + + UNIT_TEST_CHECK(system_path(system_path("foo/bar")).as_internal() == "/a/b/foo/bar"); + UNIT_TEST_CHECK(!working_root.used); + UNIT_TEST_CHECK(system_path(system_path("/foo/bar")).as_internal() == "/foo/bar"); + UNIT_TEST_CHECK(!working_root.used); + UNIT_TEST_CHECK(system_path(file_path_internal("foo/bar"), false).as_internal() + == "/working/root/foo/bar"); + UNIT_TEST_CHECK(!working_root.used); + UNIT_TEST_CHECK(system_path(file_path_internal("foo/bar")).as_internal() + == "/working/root/foo/bar"); + UNIT_TEST_CHECK(working_root.used); + UNIT_TEST_CHECK(system_path(file_path_external(utf8("foo/bar"))).as_external() + == "/working/root/rel/initial/foo/bar"); + file_path a_file_path; + UNIT_TEST_CHECK(system_path(a_file_path).as_external() + == "/working/root"); + UNIT_TEST_CHECK(system_path(bookkeeping_path("_MTN/foo/bar")).as_internal() + == "/working/root/_MTN/foo/bar"); + UNIT_TEST_CHECK(system_path(bookkeeping_root).as_internal() + == "/working/root/_MTN"); + initial_abs_path.unset(); + working_root.unset(); + initial_rel_path.unset(); +} + +UNIT_TEST(paths, access_tracker) +{ + access_tracker a; + UNIT_TEST_CHECK_THROW(a.get(), logic_error); + a.set(1, false); + UNIT_TEST_CHECK_THROW(a.set(2, false), logic_error); + a.set(2, true); + UNIT_TEST_CHECK_THROW(a.set(3, false), logic_error); + UNIT_TEST_CHECK(a.get() == 2); + UNIT_TEST_CHECK_THROW(a.set(3, true), logic_error); + a.unset(); + a.may_not_initialize(); + UNIT_TEST_CHECK_THROW(a.set(1, false), logic_error); + UNIT_TEST_CHECK_THROW(a.set(2, true), logic_error); + a.unset(); + a.set(1, false); + UNIT_TEST_CHECK_THROW(a.may_not_initialize(), logic_error); +} + +static void test_path_less_than(string const & left, string const & right) +{ + MM(left); + MM(right); + file_path left_fp = file_path_internal(left); + file_path right_fp = file_path_internal(right); + I(left_fp < right_fp); +} + +static void test_path_equal(string const & left, string const & right) +{ + MM(left); + MM(right); + file_path left_fp = file_path_internal(left); + file_path right_fp = file_path_internal(right); + I(left_fp == right_fp); +} + +UNIT_TEST(paths, ordering) +{ + // this ordering is very important: + // -- it is used to determine the textual form of csets and manifests + // (in particular, it cannot be changed) + // -- it is used to determine in what order cset operations can be applied + // (in particular, foo must sort before foo/bar, so that we can use it + // to do top-down and bottom-up traversals of a set of paths). + test_path_less_than("a", "b"); + test_path_less_than("a", "c"); + test_path_less_than("ab", "ac"); + test_path_less_than("a", "ab"); + test_path_less_than("", "a"); + test_path_less_than("", ".foo"); + test_path_less_than("foo", "foo/bar"); + // . is before / asciibetically, so sorting by strings will give the wrong + // answer on this: + test_path_less_than("foo/bar", "foo.bar"); + + // path_components used to be interned strings, and we used the default sort + // order, which meant that in practice path components would sort in the + // _order they were first used in the program_. So let's put in a test that + // would catch this sort of brokenness. + test_path_less_than("fallanopic_not_otherwise_mentioned", "xyzzy"); + test_path_less_than("fallanoooo_not_otherwise_mentioned_and_smaller", + "fallanopic_not_otherwise_mentioned"); +} + +UNIT_TEST(paths, ordering_random) +{ + char x[4] = {0,0,0,0}; + char y[4] = {0,0,0,0}; + u8 a, b, c, d; + const int ntrials = 1000; + int i; + randomizer rng; + + // use of numbers is intentional; these strings are defined to be UTF-8. + + UNIT_TEST_CHECKPOINT("a and b"); + for (i = 0; i < ntrials; i++) + { + do a = rng.uniform(0x7f - 0x20) + 0x20; + while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.' + + do b = rng.uniform(0x7f - 0x20) + 0x20; + while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.' + + x[0] = a; + y[0] = b; + if (a < b) + test_path_less_than(x, y); + else if (a > b) + test_path_less_than(y, x); + else + test_path_equal(x, y); + } + + UNIT_TEST_CHECKPOINT("ab and cd"); + for (i = 0; i < ntrials; i++) + { + do + { + do a = rng.uniform(0x7f - 0x20) + 0x20; + while (a == 0x5c || a == 0x2f); // '\\', '/' + + do b = rng.uniform(0x7f - 0x20) + 0x20; + while (b == 0x5c || b == 0x2f || b == 0x3a); // '\\', '/', ':' + } + while (a == 0x2e && b == 0x2e); // ".." + + do + { + do c = rng.uniform(0x7f - 0x20) + 0x20; + while (c == 0x5c || c == 0x2f); // '\\', '/' + + do d = rng.uniform(0x7f - 0x20) + 0x20; + while (d == 0x5c || d == 0x2f || d == 0x3a); // '\\', '/', ':' + } + while (c == 0x2e && d == 0x2e); // ".." + + x[0] = a; + x[1] = b; + y[0] = c; + y[1] = d; + + if (a < c || (a == c && b < d)) + test_path_less_than(x, y); + else if (a > c || (a == c && b > d)) + test_path_less_than(y, x); + else + test_path_equal(x, y); + } + + UNIT_TEST_CHECKPOINT("a and b/c"); + x[1] = 0; + y[1] = '/'; + for (i = 0; i < ntrials; i++) + { + do a = rng.uniform(0x7f - 0x20) + 0x20; + while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.' + + do b = rng.uniform(0x7f - 0x20) + 0x20; + while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.' + + do c = rng.uniform(0x7f - 0x20) + 0x20; + while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.' + + x[0] = a; + y[0] = b; + y[2] = c; + + // only the order of a and b matters. 1 sorts before 1/2. + if (a <= b) + test_path_less_than(x, y); + else + test_path_less_than(y, x); + } + + UNIT_TEST_CHECKPOINT("ab and c/d"); + for (i = 0; i < ntrials; i++) + { + do + { + do a = rng.uniform(0x7f - 0x20) + 0x20; + while (a == 0x5c || a == 0x2f); // '\\', '/' + + do b = rng.uniform(0x7f - 0x20) + 0x20; + while (b == 0x5c || b == 0x2f || b == 0x3a); // '\\', '/', ':' + } + while (a == 0x2e && b == 0x2e); // ".." + + do c = rng.uniform(0x7f - 0x20) + 0x20; + while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.' + + do d = rng.uniform(0x7f - 0x20) + 0x20; + while (d == 0x5c || d == 0x2f || d == 0x2e); // '\\', '/', '.' + + + x[0] = a; + x[1] = b; + y[0] = c; + y[2] = d; + + // only the order of a and c matters, + // but this time, 12 sorts after 1/2. + if (a < c) + test_path_less_than(x, y); + else + test_path_less_than(y, x); + } + + + UNIT_TEST_CHECKPOINT("a/b and c/d"); + x[1] = '/'; + for (i = 0; i < ntrials; i++) + { + do a = rng.uniform(0x7f - 0x20) + 0x20; + while (a == 0x5c || a == 0x2f || a == 0x2e); // '\\', '/', '.' + + do b = rng.uniform(0x7f - 0x20) + 0x20; + while (b == 0x5c || b == 0x2f || b == 0x2e); // '\\', '/', '.' + + do c = rng.uniform(0x7f - 0x20) + 0x20; + while (c == 0x5c || c == 0x2f || c == 0x2e); // '\\', '/', '.' + + do d = rng.uniform(0x7f - 0x20) + 0x20; + while (d == 0x5c || d == 0x2f || d == 0x2e); // '\\', '/', '.' + + x[0] = a; + x[2] = b; + y[0] = c; + y[2] = d; + + if (a < c || (a == c && b < d)) + test_path_less_than(x, y); + else if (a > c || (a == c && b > d)) + test_path_less_than(y, x); + else + test_path_equal(x, y); + } +} + +UNIT_TEST(paths, test_internal_string_is_bookkeeping_path) +{ + char const * const yes[] = {"_MTN", + "_MTN/foo", + "_mtn/Foo", + 0 }; + char const * const no[] = {"foo/_MTN", + "foo/bar", + 0 }; + for (char const * const * c = yes; *c; ++c) + UNIT_TEST_CHECK(bookkeeping_path + ::internal_string_is_bookkeeping_path(utf8(std::string(*c)))); + for (char const * const * c = no; *c; ++c) + UNIT_TEST_CHECK(!bookkeeping_path + ::internal_string_is_bookkeeping_path(utf8(std::string(*c)))); +} + +UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix_none) +{ + initial_rel_path.unset(); + initial_rel_path.set(string(), true); + + char const * const yes[] = {"_MTN", + "_MTN/foo", + "_mtn/Foo", + "_MTN/foo/..", + 0 }; + char const * const no[] = {"foo/_MTN", + "foo/bar", + "_MTN/..", + 0 }; + for (char const * const * c = yes; *c; ++c) + UNIT_TEST_CHECK(bookkeeping_path + ::external_string_is_bookkeeping_path(utf8(std::string(*c)))); + for (char const * const * c = no; *c; ++c) + UNIT_TEST_CHECK(!bookkeeping_path + ::external_string_is_bookkeeping_path(utf8(std::string(*c)))); +} + +UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix_a_b) +{ + initial_rel_path.unset(); + initial_rel_path.set(string("a/b"), true); + + char const * const yes[] = {"../../_MTN", + "../../_MTN/foo", + "../../_mtn/Foo", + "../../_MTN/foo/..", + "../../foo/../_MTN/foo", + 0 }; + char const * const no[] = {"foo/_MTN", + "foo/bar", + "_MTN", + "../../foo/_MTN", + 0 }; + for (char const * const * c = yes; *c; ++c) + UNIT_TEST_CHECK(bookkeeping_path + ::external_string_is_bookkeeping_path(utf8(std::string(*c)))); + for (char const * const * c = no; *c; ++c) + UNIT_TEST_CHECK(!bookkeeping_path + ::external_string_is_bookkeeping_path(utf8(std::string(*c)))); +} + +UNIT_TEST(paths, test_external_string_is_bookkeeping_path_prefix__MTN) +{ + initial_rel_path.unset(); + initial_rel_path.set(string("_MTN"), true); + + char const * const yes[] = {".", + "foo", + "../_MTN/foo/..", + "../_mtn/foo", + "../foo/../_MTN/foo", + 0 }; + char const * const no[] = {"../foo", + "../foo/bar", + "../foo/_MTN", + 0 }; + for (char const * const * c = yes; *c; ++c) + UNIT_TEST_CHECK(bookkeeping_path + ::external_string_is_bookkeeping_path(utf8(std::string(*c)))); + for (char const * const * c = no; *c; ++c) + UNIT_TEST_CHECK(!bookkeeping_path + ::external_string_is_bookkeeping_path(utf8(std::string(*c)))); +} + +#endif // BUILD_UNIT_TESTS + +// Local Variables: +// mode: C++ +// fill-column: 76 +// c-file-style: "gnu" +// indent-tabs-mode: nil +// End: +// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s: