# # add_file "tests/t_normalized_filenames.at" # # patch "ChangeLog" # from [1ca0c140585eb24f3192496cb9194d1cde27c84b] # to [b9f8dc67eb052de09b945006babfa6821b7ab659] # # patch "file_io.cc" # from [5554c12f8dc3935afe2f99fd83e57b515fb3d0cb] # to [2358b3a2e4ca8b85c46ebd0290102511b4798e13] # # patch "file_io.hh" # from [7438900771d9c148c4c929639705e679409561fa] # to [305745304e709b57093043582937e0709de0981d] # # patch "manifest.cc" # from [aefd8d086360ea7349a541e2dfc0f4f7c28d9913] # to [463177663506d774001b5864819b9f6563b05793] # # patch "tests/t_i18n_file.at" # from [424bbaaa4ad6d3cb1d0b52194f61e74a27165530] # to [24f53ae44968a9520711fd7874995a8c4b0a65e0] # # patch "tests/t_normalized_filenames.at" # from [] # to [50cb5355b10548ca00c2bd8c5c95439ca9b61dd5] # # patch "testsuite.at" # from [24d7159f4abea9ab26794f48237ae19d14a397c4] # to [114eaff7efcf80f206d58b4b9657782dc1ea9d74] # # patch "transforms.cc" # from [7c75b6c118e849c06ac04466a1546ac2b152c1a4] # to [cc6ef8813aeaa3e25bc2a029085c84b6b0c66690] # # patch "unix/inodeprint.cc" # from [ba3f21edca6b9c5461bdc844d9c519b70fc4ad1a] # to [01ee59f9f98edf2aee545ad583dd6edf83e4149d] # # patch "vocab.cc" # from [cbb860ad105161ddfcf1e3e1bb4e20da5f5950ea] # to [fb4266f80d864d01b7e2449060df50c29d55b2c5] # # patch "win32/inodeprint.cc" # from [d5935da66c0603bca05f8a4d4778e46fe5f21bb1] # to [a142bc0822aad4a65f09f3ba30729e3c1b7a1671] # --- ChangeLog +++ ChangeLog @@ -1,3 +1,31 @@ +2005-04-22 Nathaniel Smith + + * file_io.{cc,hh} (localized): Take file_path/local_path instead + of string; expose in public interface. Adjust rest of file to + match. + (walk_tree): Don't convert the (OS-supplied) current directory + from UTF-8 to current locale. + + * transforms.{cc,hh} (charset_convert): Be more informative on + error. + (calculate_ident): Localize the filename, even on the fast-path. + Also assert file exists and is not a directory, since Crypto++ + will happily hash directories. (They are like empty files, + apparently.) + + * manifest.cc (build_restricted_manifest_map): Use file_exists + instead of fs::exists, to handle localized paths. + * {win32,unix}/inodeprint.cc (inodeprint_file): Use localized + filenames to stat. + + * tests/t_i18n_file.at: Rewrite to work right. + + * tests/t_normalized_filenames.at: New test. + * testsuite.at: Add it. + * vocab.cc (test_file_path_verification): MT/path is not a valid + file_path either. + (test_file_path_normalization): New unit-test. + 2005-04-21 Jeremy Cowgar * tests/t_multiple_heads_msg.at: Now checks to ensure 'multiple head' --- file_io.cc +++ file_io.cc @@ -164,8 +164,8 @@ } -static fs::path -localized(string const & utf) +inline static fs::path +localized_impl(string const & utf) { fs::path tmp = mkpath(utf), ret; for (fs::path::iterator i = tmp.begin(); i != tmp.end(); ++i) @@ -177,6 +177,24 @@ return ret; } +fs::path +localized(file_path const & fp) +{ + return localized_impl(fp()); +} + +fs::path +localized(local_path const & lp) +{ + return localized_impl(lp()); +} + +static fs::path +localized(utf8 const & utf) +{ + return localized_impl(utf()); +} + string absolutify(string const & path) { @@ -240,27 +258,27 @@ bool directory_exists(local_path const & p) { - return fs::exists(localized(p())) && - fs::is_directory(localized(p())); + return fs::exists(localized(p)) && + fs::is_directory(localized(p)); } bool directory_exists(file_path const & p) { - return fs::exists(localized(p())) && - fs::is_directory(localized(p())); + return fs::exists(localized(p)) && + fs::is_directory(localized(p)); } bool file_exists(file_path const & p) { - return fs::exists(localized(p())); + return fs::exists(localized(p)); } bool file_exists(local_path const & p) { - return fs::exists(localized(p())); + return fs::exists(localized(p)); } void @@ -268,7 +286,7 @@ { N(file_exists(p), F("file to delete '%s' does not exist") % p); - fs::remove(localized(p())); + fs::remove(localized(p)); } void @@ -276,7 +294,7 @@ { N(file_exists(p), F("file to delete '%s' does not exist") % p); - fs::remove(localized(p())); + fs::remove(localized(p)); } void @@ -284,7 +302,7 @@ { N(directory_exists(p), F("directory to delete '%s' does not exist") % p); - fs::remove_all(localized(p())); + fs::remove_all(localized(p)); } void @@ -292,7 +310,7 @@ { N(directory_exists(p), F("directory to delete '%s' does not exist") % p); - fs::remove_all(localized(p())); + fs::remove_all(localized(p)); } void @@ -303,8 +321,8 @@ F("rename source file '%s' does not exist") % old_path); N(! file_exists(new_path), F("rename target file '%s' already exists") % new_path); - fs::rename(localized(old_path()), - localized(new_path())); + fs::rename(localized(old_path), + localized(new_path)); } void @@ -315,8 +333,8 @@ F("rename source file '%s' does not exist") % old_path); N(! file_exists(new_path), F("rename target file '%s' already exists") % new_path); - fs::rename(localized(old_path()), - localized(new_path())); + fs::rename(localized(old_path), + localized(new_path)); } void @@ -327,8 +345,8 @@ F("rename source dir '%s' does not exist") % old_path); N(!directory_exists(new_path), F("rename target dir '%s' already exists") % new_path); - fs::rename(localized(old_path()), - localized(new_path())); + fs::rename(localized(old_path), + localized(new_path)); } void @@ -339,26 +357,26 @@ F("rename source dir '%s' does not exist") % old_path); N(!directory_exists(new_path), F("rename target dir '%s' already exists") % new_path); - fs::rename(localized(old_path()), - localized(new_path())); + fs::rename(localized(old_path), + localized(new_path)); } void mkdir_p(local_path const & p) { - fs::create_directories(localized(p())); + fs::create_directories(localized(p)); } void mkdir_p(file_path const & p) { - fs::create_directories(localized(p())); + fs::create_directories(localized(p)); } void make_dir_for(file_path const & p) { - fs::path tmp = localized(p()); + fs::path tmp = localized(p); if (tmp.has_branch_path()) { fs::create_directories(tmp.branch_path()); @@ -399,13 +417,13 @@ void read_data(local_path const & path, data & dat) { - read_data_impl(localized(path()), dat); + read_data_impl(localized(path), dat); } void read_data(file_path const & path, data & dat) { - read_data_impl(localized(path()), dat); + read_data_impl(localized(path), dat); } void @@ -413,7 +431,7 @@ base64< gzip > & dat) { data data_plain; - read_data_impl(localized(path()), data_plain); + read_data_impl(localized(path), data_plain); gzip data_compressed; base64< gzip > data_encoded; encode_gzip(data_plain, data_compressed); @@ -477,9 +495,10 @@ if (path() == "-") read_data_stdin(dat); else - read_data_impl(localized(path()), dat); + read_data_impl(localized(path), dat); } + // FIXME: this is probably not enough brains to actually manage "atomic // filesystem writes". at some point you have to draw the line with even // trying, and I'm not sure it's really a strict requirement of this tool, @@ -523,13 +542,13 @@ void write_data(local_path const & path, data const & dat) { - write_data_impl(localized(path()), dat); + write_data_impl(localized(path), dat); } void write_data(file_path const & path, data const & dat) { - write_data_impl(localized(path()), dat); + write_data_impl(localized(path), dat); } void @@ -580,7 +599,7 @@ data data_decompressed; decode_base64(dat, data_decoded); decode_gzip(data_decoded, data_decompressed); - write_data_impl(localized(path()), data_decompressed); + write_data_impl(localized(path), data_decompressed); } void @@ -641,14 +660,15 @@ tree_walker & walker, bool require_existing_path) { - if (fs::exists(localized(path()))) + if (fs::exists(localized(path))) { - if (! fs::is_directory(localized(path()))) + if (! fs::is_directory(localized(path))) walker.visit_file(path); else { - fs::path root(localized(fs::current_path().string())); - fs::path rel(localized(path())); + // the current path does not need localization + fs::path root(fs::current_path()); + fs::path rel(localized(path)); walk_tree_recursive(root / rel, rel, walker); } } --- file_io.hh +++ file_io.hh @@ -45,6 +45,8 @@ fs::path mkpath(std::string const & s); std::string get_homedir(); +fs::path localized(file_path const & fp); +fs::path localized(local_path const & fp); std::string absolutify(std::string const & path); std::string tilde_expand(std::string const & path); --- manifest.cc +++ manifest.cc @@ -94,7 +94,7 @@ } } // ...ah, well, no good fingerprint, just check directly. - N(fs::exists(mkpath((*i)())), + N(file_exists(*i), F("file disappeared but exists in new manifest: %s") % (*i)()); hexenc ident; calculate_ident(*i, ident, app.lua); --- tests/t_i18n_file.at +++ tests/t_i18n_file.at @@ -16,27 +16,33 @@ FUNNY_FILENAME="address@hidden:" fi -AT_CHECK(mkdir tmp) -AT_CHECK(touch "tmp/file name with spaces", [], [ignore], [ignore]) -AT_CHECK(touch "tmp/$FUNNY_FILENAME", [], [ignore], [ignore]) -AT_CHECK(touch "tmp/$EUROPEAN_UTF8", [], [ignore], [ignore]) -AT_CHECK(touch "tmp/$JAPANESE_UTF8", [], [ignore], [ignore]) -AT_CHECK(touch "tmp/$EUROPEAN_8859_1", [], [ignore], [ignore]) -AT_CHECK(touch "tmp/$JAPANESE_EUC_JP", [], [ignore], [ignore]) +AT_CHECK(mkdir weird utf8 8859-1 euc) +AT_CHECK(touch "weird/file name with spaces", [], [ignore], [ignore]) +AT_CHECK(touch "weird/$FUNNY_FILENAME", [], [ignore], [ignore]) +AT_CHECK(touch "utf8/$EUROPEAN_UTF8", [], [ignore], [ignore]) +AT_CHECK(touch "utf8/$JAPANESE_UTF8", [], [ignore], [ignore]) +AT_CHECK(touch "8859-1/$EUROPEAN_8859_1", [], [ignore], [ignore]) +AT_CHECK(touch "euc/$JAPANESE_EUC_JP", [], [ignore], [ignore]) -AT_CHECK(MONOTONE add "tmp/file name with spaces", [], [ignore], [ignore]) -AT_CHECK(MONOTONE add "tmp/$FUNNY_FILENAME", [], [ignore], [ignore]) +AT_CHECK(MONOTONE add "weird/file name with spaces", [], [ignore], [ignore]) +AT_CHECK(MONOTONE add "weird/$FUNNY_FILENAME", [], [ignore], [ignore]) +export LANG=en_US.UTF-8 +export CHARSET=utf-8 + # add some files with UTF8 names export LANG=en_US.utf-8 export CHARSET=utf-8 -AT_CHECK(MONOTONE add "tmp/$EUROPEAN_UTF8", [], [ignore], [ignore]) -AT_CHECK(MONOTONE add "tmp/$JAPANESE_UTF8", [], [ignore], [ignore]) +AT_CHECK(MONOTONE add "utf8/$EUROPEAN_UTF8", [], [ignore], [ignore]) +AT_CHECK(MONOTONE add "utf8/$JAPANESE_UTF8", [], [ignore], [ignore]) AT_CHECK(MONOTONE --branch=testbranch commit --message 'adding european and japanese names in UTF-8', [], [ignore], [ignore]) # check the names showed up in our manifest +export LANG=en_US.UTF-8 +export CHARSET=utf-8 + AT_CHECK(MONOTONE cat manifest, [], [stdout]) AT_CHECK(mv stdout manifest) AT_CHECK(grep funny manifest, [], [ignore], [ignore]) @@ -44,19 +50,43 @@ AT_CHECK(grep $JAPANESE_UTF8 manifest, [], [ignore], [ignore]) AT_CHECK(grep $EUROPEAN_UTF8 manifest, [], [ignore], [ignore]) -# remove the UTF8 files, change locales, and add them again. both should work. +# okay, now we try in two different locales. monotone is happy to +# have arbirary utf8 filenames in it, but these locales don't support +# arbitrary utf8 -- you have to use a utf8 locale if you want to put +# filenames on your disk in utf8. if we keep all the utf8 files in +# the tree, then, monotone will attempt to convert them to the current +# locale, and fail miserably. so get rid of them first. +AT_CHECK(MONOTONE drop "utf8/$EUROPEAN_UTF8" "utf8/$JAPANESE_UTF8", [], [ignore], [ignore]) +AT_CHECK(MONOTONE --branch=testbranch commit --message 'cleaning up utf8 files', [], [ignore], [ignore]) + +# now try iso-8859-1 + export LANG=de_DE.iso-8859-1 export CHARSET=iso-8859-1 -AT_CHECK(MONOTONE drop "tmp/$EUROPEAN_8859_1", [], [ignore], [ignore]) -AT_CHECK(MONOTONE add "tmp/$EUROPEAN_8859_1", [], [ignore], [ignore]) +AT_CHECK(MONOTONE add "8859-1/$EUROPEAN_8859_1", [], [ignore], [ignore]) AT_CHECK(MONOTONE --branch=testbranch commit --message 'adding european name in ISO 8859-1', [], [ignore], [ignore]) +# check the names showed up in our manifest + +AT_CHECK(MONOTONE cat manifest, [], [stdout]) +AT_CHECK(mv stdout manifest) +AT_CHECK(grep funny manifest, [], [ignore], [ignore]) +AT_CHECK(grep spaces manifest, [], [ignore], [ignore]) +AT_CHECK(grep "8859-1/$EUROPEAN_UTF8" manifest, [], [ignore], [ignore]) + +# okay, clean up again + +AT_CHECK(MONOTONE drop "8859-1/$EUROPEAN_8859_1", [], [ignore], [ignore]) +AT_CHECK(MONOTONE --branch=testbranch commit --message 'cleaning up 8859-1 files', [], [ignore], [ignore]) + + +# now try euc + export LANG=ja_JP.euc-jp export CHARSET=euc-jp -AT_CHECK(MONOTONE drop "tmp/$JAPANESE_EUC_JP", [], [ignore], [ignore]) -AT_CHECK(MONOTONE add "tmp/$JAPANESE_EUC_JP", [], [ignore], [ignore]) +AT_CHECK(MONOTONE add "euc/$JAPANESE_EUC_JP", [], [ignore], [ignore]) AT_CHECK(MONOTONE --branch=testbranch commit --message 'adding japanese name in EUC-JP', [], [ignore], [ignore]) @@ -66,9 +96,6 @@ AT_CHECK(mv stdout manifest) AT_CHECK(grep funny manifest, [], [ignore], [ignore]) AT_CHECK(grep spaces manifest, [], [ignore], [ignore]) -AT_CHECK(grep $JAPANESE_UTF8 manifest, [], [ignore], [ignore]) -AT_CHECK(grep $EUROPEAN_UTF8 manifest, [], [ignore], [ignore]) +AT_CHECK(grep "euc/$JAPANESE_UTF8" manifest, [], [ignore], [ignore]) -AT_CHECK(rm -Rf tmp) - AT_CLEANUP --- tests/t_normalized_filenames.at +++ tests/t_normalized_filenames.at @@ -0,0 +1,20 @@ +AT_SETUP([normalized filenames]) +MONOTONE_SETUP + +AT_DATA(foo, [blah blah +]) +# The UI used to fix these, while later code did not, so let's check +# the inner code directly. +AT_DATA(MT/work, [add_file "." +]) +AT_CHECK(MONOTONE cat manifest, [3], [ignore], [ignore]) + +AT_DATA(MT/work, [add_file "./bar" +]) +AT_DATA(bar, [blah blah +]) + +AT_CHECK(MONOTONE cat manifest, [3], [ignore], [ignore]) +AT_CHECK(MONOTONE commit --message=foo --branch=foo, [3], [ignore], [ignore]) + +AT_CLEANUP --- testsuite.at +++ testsuite.at @@ -576,3 +576,4 @@ m4_include(tests/t_inodeprints_hook.at) m4_include(tests/t_bad_packets.at) m4_include(tests/t_multiple_heads_msg.at) +m4_include(tests/t_normalized_filenames.at) --- transforms.cc +++ transforms.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include "cryptopp/filters.h" @@ -265,7 +266,7 @@ ident = tid; } -// this might reasonably go in file_io.cc too.. +// this might reasonably go in file_io.cc too... void calculate_ident(file_path const & file, hexenc & ident, @@ -289,10 +290,16 @@ else { // no conversions necessary, use streaming form + // still have to localize the filename + fs::path localized_file = localized(file); + // crypto++'s FileSource will simply treat directories as empty files, + // so we'd better check ourselves. + I(fs::exists(localized_file) && !fs::is_directory(localized_file)); CryptoPP::SHA hash; unsigned int const sz = 2 * CryptoPP::SHA::DIGESTSIZE; char buffer[sz]; - CryptoPP::FileSource f(file().c_str(), true, new CryptoPP::HashFilter + CryptoPP::FileSource f(localized_file.native_file_string().c_str(), + true, new CryptoPP::HashFilter (hash, new CryptoPP::HexEncoder (new CryptoPP::ArraySink(reinterpret_cast(buffer), sz)))); ident = lowercase(string(buffer, sz)); @@ -467,7 +474,9 @@ char * converted = stringprep_convert(src.c_str(), dst_charset.c_str(), src_charset.c_str()); - I(converted != NULL); + E(converted != NULL, + F("failed to convert string from %s to %s: '%s'") + % src_charset % dst_charset % src); dst = string(converted); free(converted); } @@ -700,8 +709,8 @@ // - ] directly following an unescaped [ is escaped. string glob_to_regexp(const string & glob) { - int in_braces = 0; // counter for levels if {} - bool in_brackets = false; // flags if we're inside a [], which + int in_braces = 0; // counter for levels if {} + bool in_brackets = false; // flags if we're inside a [], which // has higher precedence than {}. // Also, [ is accepted inside [] unescaped. bool this_was_opening_bracket = false; @@ -1099,7 +1108,7 @@ BOOST_CHECK(glob_to_regexp("foo[12m,]") == "foo[12m\\,]"); // A full fledged, use all damn features test... BOOST_CHECK(glob_to_regexp("foo.{bar*,cookie?{haha,hehe[^\\123!,]}}[!]a^b]") - == "foo\\.(bar.*|cookie.(haha|hehe[^\\123\\!\\,]))[^\\]a\\^b]"); + == "foo\\.(bar.*|cookie.(haha|hehe[^\\123\\!\\,]))[^\\]a\\^b]"); } void --- unix/inodeprint.cc +++ unix/inodeprint.cc @@ -9,6 +9,7 @@ #include "platform.hh" #include "transforms.hh" +#include "file_io.hh" namespace { @@ -29,7 +30,7 @@ bool inodeprint_file(file_path const & file, hexenc & ip) { struct stat st; - if (stat(file().c_str(), &st) < 0) + if (stat(localized(file).native_file_string().c_str(), &st) < 0) return false; CryptoPP::SHA hash; --- vocab.cc +++ vocab.cc @@ -247,6 +247,7 @@ "foo/../../escape", "/rooted", "foo//nonsense", + "MT/foo", #ifdef _WIN32 "c:\\windows\\rooted", "c:/windows/rooted", @@ -280,10 +281,18 @@ BOOST_CHECK_NOT_THROW(file_path p(*c), informative_failure); } +static void test_file_path_normalization() +{ + BOOST_CHECK(file_path("./foo") == file_path("foo")); + BOOST_CHECK(file_path("foo/bar/./baz") == file_path("foo/bar/baz")); + BOOST_CHECK(file_path("foo/bar/../baz") == file_path("foo/baz")); +} + void add_vocab_tests(test_suite * suite) { I(suite); suite->add(BOOST_TEST_CASE(&test_file_path_verification)); + suite->add(BOOST_TEST_CASE(&test_file_path_normalization)); } #endif // BUILD_UNIT_TESTS --- win32/inodeprint.cc +++ win32/inodeprint.cc @@ -10,6 +10,7 @@ #include "platform.hh" #include "transforms.hh" +#include "file_io.hh" namespace { @@ -27,7 +28,7 @@ bool inodeprint_file(file_path const & file, hexenc & ip) { struct _stati64 st; - if (_stati64(file().c_str(), &st) < 0) + if (_stati64(localized(file).native_file_string().c_str(), &st) < 0) return false; CryptoPP::SHA hash;