# # patch "ChangeLog" # from [501f140e9335c16d7459bfa98e179e902ec232a5] # to [fe6f3b4ee6fb1b43cfb90fd5a83d260af7b9c08b] # # patch "file_io.cc" # from [2358b3a2e4ca8b85c46ebd0290102511b4798e13] # to [ea97ee017c1ce25bec7d6ca2fa7e3117da36de3b] # # patch "file_io.hh" # from [305745304e709b57093043582937e0709de0981d] # to [7438900771d9c148c4c929639705e679409561fa] # # patch "transforms.cc" # from [cc6ef8813aeaa3e25bc2a029085c84b6b0c66690] # to [7e3fd7a145e67cbbaa04487eeb734064059fb5af] # # patch "transforms.hh" # from [19f4be0e47d83830cc4a4d7279a2a24a54ce2e3d] # to [91e70d85705e114c5c3e8cb3f5fac19f42219927] # --- ChangeLog +++ ChangeLog @@ -1,3 +1,9 @@ +2005-04-23 Nathaniel Smith + + * file_io.{cc,hh} (localized): Move from here... + * transforms.{cc,hh} (localized): ...to here. Add lots of gunk to + avoid calling iconv whenever possible. + 2005-04-22 Nathaniel Smith * manifest.cc (build_restricted_manifest_map): Fixup after merge --- file_io.cc +++ file_io.cc @@ -163,38 +163,6 @@ #endif } - -inline static fs::path -localized_impl(string const & utf) -{ - fs::path tmp = mkpath(utf), ret; - for (fs::path::iterator i = tmp.begin(); i != tmp.end(); ++i) - { - external ext; - utf8_to_system(utf8(*i), ext); - ret /= mkpath(ext()); - } - return ret; -} - -fs::path -localized(file_path const & fp) -{ - return localized_impl(fp()); -} - -fs::path -localized(local_path const & lp) -{ - return localized_impl(lp()); -} - -static fs::path -localized(utf8 const & utf) -{ - return localized_impl(utf()); -} - string absolutify(string const & path) { --- file_io.hh +++ file_io.hh @@ -45,8 +45,6 @@ fs::path mkpath(std::string const & s); std::string get_homedir(); -fs::path localized(file_path const & fp); -fs::path localized(local_path const & fp); std::string absolutify(std::string const & path); std::string tilde_expand(std::string const & path); --- transforms.cc +++ transforms.cc @@ -547,7 +547,94 @@ free(out); } +// Lots of gunk to avoid charset conversion as much as possible. Running +// iconv over every element of every path in a 30,000 file manifest takes +// multiple seconds, which then is a minimum bound on pretty much any +// operation we do... +static inline bool +filesystem_is_utf8_impl() +{ + std::string lc_encoding = lowercase(system_charset()); + return (lc_encoding == "utf-8" + || lc_encoding == "utf_8" + || lc_encoding == "utf8"); +} +static inline bool +filesystem_is_utf8() +{ + static bool it_is = filesystem_is_utf8_impl(); + return it_is; +} + +static inline bool +filesystem_is_ascii_extension_impl() +{ + if (filesystem_is_utf8()) + return true; + std::string lc_encoding = lowercase(system_charset()); + // if your character set is identical to ascii in the lower 7 bits, then add + // it here for a speed boost. + return (lc_encoding.find("ascii") != std::string::npos + || lc_encoding.find("8859") != std::string::npos + || lc_encoding.find("ansi_x3.4") != std::string::npos); +} + +static inline bool +filesystem_is_ascii_extension() +{ + static bool it_is = filesystem_is_ascii_extension_impl(); + return it_is; +} + +inline static fs::path +localized_impl(string const & utf) +{ + if (filesystem_is_utf8()) + return mkpath(utf); + if (filesystem_is_ascii_extension()) + { + bool is_all_ascii = true; + // could speed this up by vectorization -- mask against 0x80808080, + // process a whole word at at time... + for (std::string::const_iterator i = utf.begin(); i != utf.end(); ++i) + if (0x80 & *i) + { + is_all_ascii = false; + break; + } + if (is_all_ascii) + return mkpath(utf); + } + fs::path tmp = mkpath(utf), ret; + for (fs::path::iterator i = tmp.begin(); i != tmp.end(); ++i) + { + external ext; + utf8_to_system(utf8(*i), ext); + ret /= mkpath(ext()); + } + return ret; +} + +fs::path +localized(file_path const & fp) +{ + return localized_impl(fp()); +} + +fs::path +localized(local_path const & lp) +{ + return localized_impl(lp()); +} + +fs::path +localized(utf8 const & utf) +{ + return localized_impl(utf()); +} + + void internalize_cert_name(utf8 const & utf, cert_name & c) { --- transforms.hh +++ transforms.hh @@ -170,6 +170,10 @@ void ace_to_utf8(ace const & ac, utf8 & utf); void utf8_to_ace(utf8 const & utf, ace & a); +fs::path localized(file_path const & path); +fs::path localized(local_path const & path); +fs::path localized(utf8 const & path); + // specific internal / external conversions for various vocab terms void internalize_cert_name(utf8 const & utf, cert_name & c); void internalize_cert_name(external const & ext, cert_name & c);