#
# add_file "tests/t_normalized_filenames.at"
#
# patch "ChangeLog"
# from [1ca0c140585eb24f3192496cb9194d1cde27c84b]
# to [b9f8dc67eb052de09b945006babfa6821b7ab659]
#
# patch "file_io.cc"
# from [5554c12f8dc3935afe2f99fd83e57b515fb3d0cb]
# to [2358b3a2e4ca8b85c46ebd0290102511b4798e13]
#
# patch "file_io.hh"
# from [7438900771d9c148c4c929639705e679409561fa]
# to [305745304e709b57093043582937e0709de0981d]
#
# patch "manifest.cc"
# from [aefd8d086360ea7349a541e2dfc0f4f7c28d9913]
# to [463177663506d774001b5864819b9f6563b05793]
#
# patch "tests/t_i18n_file.at"
# from [424bbaaa4ad6d3cb1d0b52194f61e74a27165530]
# to [24f53ae44968a9520711fd7874995a8c4b0a65e0]
#
# patch "tests/t_normalized_filenames.at"
# from []
# to [50cb5355b10548ca00c2bd8c5c95439ca9b61dd5]
#
# patch "testsuite.at"
# from [24d7159f4abea9ab26794f48237ae19d14a397c4]
# to [114eaff7efcf80f206d58b4b9657782dc1ea9d74]
#
# patch "transforms.cc"
# from [7c75b6c118e849c06ac04466a1546ac2b152c1a4]
# to [cc6ef8813aeaa3e25bc2a029085c84b6b0c66690]
#
# patch "unix/inodeprint.cc"
# from [ba3f21edca6b9c5461bdc844d9c519b70fc4ad1a]
# to [01ee59f9f98edf2aee545ad583dd6edf83e4149d]
#
# patch "vocab.cc"
# from [cbb860ad105161ddfcf1e3e1bb4e20da5f5950ea]
# to [fb4266f80d864d01b7e2449060df50c29d55b2c5]
#
# patch "win32/inodeprint.cc"
# from [d5935da66c0603bca05f8a4d4778e46fe5f21bb1]
# to [a142bc0822aad4a65f09f3ba30729e3c1b7a1671]
#
--- ChangeLog
+++ ChangeLog
@@ -1,3 +1,31 @@
+2005-04-22 Nathaniel Smith
+
+ * file_io.{cc,hh} (localized): Take file_path/local_path instead
+ of string; expose in public interface. Adjust rest of file to
+ match.
+ (walk_tree): Don't convert the (OS-supplied) current directory
+ from UTF-8 to current locale.
+
+ * transforms.{cc,hh} (charset_convert): Be more informative on
+ error.
+ (calculate_ident): Localize the filename, even on the fast-path.
+ Also assert file exists and is not a directory, since Crypto++
+ will happily hash directories. (They are like empty files,
+ apparently.)
+
+ * manifest.cc (build_restricted_manifest_map): Use file_exists
+ instead of fs::exists, to handle localized paths.
+ * {win32,unix}/inodeprint.cc (inodeprint_file): Use localized
+ filenames to stat.
+
+ * tests/t_i18n_file.at: Rewrite to work right.
+
+ * tests/t_normalized_filenames.at: New test.
+ * testsuite.at: Add it.
+ * vocab.cc (test_file_path_verification): MT/path is not a valid
+ file_path either.
+ (test_file_path_normalization): New unit-test.
+
2005-04-21 Jeremy Cowgar
* tests/t_multiple_heads_msg.at: Now checks to ensure 'multiple head'
--- file_io.cc
+++ file_io.cc
@@ -164,8 +164,8 @@
}
-static fs::path
-localized(string const & utf)
+inline static fs::path
+localized_impl(string const & utf)
{
fs::path tmp = mkpath(utf), ret;
for (fs::path::iterator i = tmp.begin(); i != tmp.end(); ++i)
@@ -177,6 +177,24 @@
return ret;
}
+fs::path
+localized(file_path const & fp)
+{
+ return localized_impl(fp());
+}
+
+fs::path
+localized(local_path const & lp)
+{
+ return localized_impl(lp());
+}
+
+static fs::path
+localized(utf8 const & utf)
+{
+ return localized_impl(utf());
+}
+
string
absolutify(string const & path)
{
@@ -240,27 +258,27 @@
bool
directory_exists(local_path const & p)
{
- return fs::exists(localized(p())) &&
- fs::is_directory(localized(p()));
+ return fs::exists(localized(p)) &&
+ fs::is_directory(localized(p));
}
bool
directory_exists(file_path const & p)
{
- return fs::exists(localized(p())) &&
- fs::is_directory(localized(p()));
+ return fs::exists(localized(p)) &&
+ fs::is_directory(localized(p));
}
bool
file_exists(file_path const & p)
{
- return fs::exists(localized(p()));
+ return fs::exists(localized(p));
}
bool
file_exists(local_path const & p)
{
- return fs::exists(localized(p()));
+ return fs::exists(localized(p));
}
void
@@ -268,7 +286,7 @@
{
N(file_exists(p),
F("file to delete '%s' does not exist") % p);
- fs::remove(localized(p()));
+ fs::remove(localized(p));
}
void
@@ -276,7 +294,7 @@
{
N(file_exists(p),
F("file to delete '%s' does not exist") % p);
- fs::remove(localized(p()));
+ fs::remove(localized(p));
}
void
@@ -284,7 +302,7 @@
{
N(directory_exists(p),
F("directory to delete '%s' does not exist") % p);
- fs::remove_all(localized(p()));
+ fs::remove_all(localized(p));
}
void
@@ -292,7 +310,7 @@
{
N(directory_exists(p),
F("directory to delete '%s' does not exist") % p);
- fs::remove_all(localized(p()));
+ fs::remove_all(localized(p));
}
void
@@ -303,8 +321,8 @@
F("rename source file '%s' does not exist") % old_path);
N(! file_exists(new_path),
F("rename target file '%s' already exists") % new_path);
- fs::rename(localized(old_path()),
- localized(new_path()));
+ fs::rename(localized(old_path),
+ localized(new_path));
}
void
@@ -315,8 +333,8 @@
F("rename source file '%s' does not exist") % old_path);
N(! file_exists(new_path),
F("rename target file '%s' already exists") % new_path);
- fs::rename(localized(old_path()),
- localized(new_path()));
+ fs::rename(localized(old_path),
+ localized(new_path));
}
void
@@ -327,8 +345,8 @@
F("rename source dir '%s' does not exist") % old_path);
N(!directory_exists(new_path),
F("rename target dir '%s' already exists") % new_path);
- fs::rename(localized(old_path()),
- localized(new_path()));
+ fs::rename(localized(old_path),
+ localized(new_path));
}
void
@@ -339,26 +357,26 @@
F("rename source dir '%s' does not exist") % old_path);
N(!directory_exists(new_path),
F("rename target dir '%s' already exists") % new_path);
- fs::rename(localized(old_path()),
- localized(new_path()));
+ fs::rename(localized(old_path),
+ localized(new_path));
}
void
mkdir_p(local_path const & p)
{
- fs::create_directories(localized(p()));
+ fs::create_directories(localized(p));
}
void
mkdir_p(file_path const & p)
{
- fs::create_directories(localized(p()));
+ fs::create_directories(localized(p));
}
void
make_dir_for(file_path const & p)
{
- fs::path tmp = localized(p());
+ fs::path tmp = localized(p);
if (tmp.has_branch_path())
{
fs::create_directories(tmp.branch_path());
@@ -399,13 +417,13 @@
void
read_data(local_path const & path, data & dat)
{
- read_data_impl(localized(path()), dat);
+ read_data_impl(localized(path), dat);
}
void
read_data(file_path const & path, data & dat)
{
- read_data_impl(localized(path()), dat);
+ read_data_impl(localized(path), dat);
}
void
@@ -413,7 +431,7 @@
base64< gzip > & dat)
{
data data_plain;
- read_data_impl(localized(path()), data_plain);
+ read_data_impl(localized(path), data_plain);
gzip data_compressed;
base64< gzip > data_encoded;
encode_gzip(data_plain, data_compressed);
@@ -477,9 +495,10 @@
if (path() == "-")
read_data_stdin(dat);
else
- read_data_impl(localized(path()), dat);
+ read_data_impl(localized(path), dat);
}
+
// FIXME: this is probably not enough brains to actually manage "atomic
// filesystem writes". at some point you have to draw the line with even
// trying, and I'm not sure it's really a strict requirement of this tool,
@@ -523,13 +542,13 @@
void
write_data(local_path const & path, data const & dat)
{
- write_data_impl(localized(path()), dat);
+ write_data_impl(localized(path), dat);
}
void
write_data(file_path const & path, data const & dat)
{
- write_data_impl(localized(path()), dat);
+ write_data_impl(localized(path), dat);
}
void
@@ -580,7 +599,7 @@
data data_decompressed;
decode_base64(dat, data_decoded);
decode_gzip(data_decoded, data_decompressed);
- write_data_impl(localized(path()), data_decompressed);
+ write_data_impl(localized(path), data_decompressed);
}
void
@@ -641,14 +660,15 @@
tree_walker & walker,
bool require_existing_path)
{
- if (fs::exists(localized(path())))
+ if (fs::exists(localized(path)))
{
- if (! fs::is_directory(localized(path())))
+ if (! fs::is_directory(localized(path)))
walker.visit_file(path);
else
{
- fs::path root(localized(fs::current_path().string()));
- fs::path rel(localized(path()));
+ // the current path does not need localization
+ fs::path root(fs::current_path());
+ fs::path rel(localized(path));
walk_tree_recursive(root / rel, rel, walker);
}
}
--- file_io.hh
+++ file_io.hh
@@ -45,6 +45,8 @@
fs::path mkpath(std::string const & s);
std::string get_homedir();
+fs::path localized(file_path const & fp);
+fs::path localized(local_path const & fp);
std::string absolutify(std::string const & path);
std::string tilde_expand(std::string const & path);
--- manifest.cc
+++ manifest.cc
@@ -94,7 +94,7 @@
}
}
// ...ah, well, no good fingerprint, just check directly.
- N(fs::exists(mkpath((*i)())),
+ N(file_exists(*i),
F("file disappeared but exists in new manifest: %s") % (*i)());
hexenc ident;
calculate_ident(*i, ident, app.lua);
--- tests/t_i18n_file.at
+++ tests/t_i18n_file.at
@@ -16,27 +16,33 @@
FUNNY_FILENAME="address@hidden:"
fi
-AT_CHECK(mkdir tmp)
-AT_CHECK(touch "tmp/file name with spaces", [], [ignore], [ignore])
-AT_CHECK(touch "tmp/$FUNNY_FILENAME", [], [ignore], [ignore])
-AT_CHECK(touch "tmp/$EUROPEAN_UTF8", [], [ignore], [ignore])
-AT_CHECK(touch "tmp/$JAPANESE_UTF8", [], [ignore], [ignore])
-AT_CHECK(touch "tmp/$EUROPEAN_8859_1", [], [ignore], [ignore])
-AT_CHECK(touch "tmp/$JAPANESE_EUC_JP", [], [ignore], [ignore])
+AT_CHECK(mkdir weird utf8 8859-1 euc)
+AT_CHECK(touch "weird/file name with spaces", [], [ignore], [ignore])
+AT_CHECK(touch "weird/$FUNNY_FILENAME", [], [ignore], [ignore])
+AT_CHECK(touch "utf8/$EUROPEAN_UTF8", [], [ignore], [ignore])
+AT_CHECK(touch "utf8/$JAPANESE_UTF8", [], [ignore], [ignore])
+AT_CHECK(touch "8859-1/$EUROPEAN_8859_1", [], [ignore], [ignore])
+AT_CHECK(touch "euc/$JAPANESE_EUC_JP", [], [ignore], [ignore])
-AT_CHECK(MONOTONE add "tmp/file name with spaces", [], [ignore], [ignore])
-AT_CHECK(MONOTONE add "tmp/$FUNNY_FILENAME", [], [ignore], [ignore])
+AT_CHECK(MONOTONE add "weird/file name with spaces", [], [ignore], [ignore])
+AT_CHECK(MONOTONE add "weird/$FUNNY_FILENAME", [], [ignore], [ignore])
+export LANG=en_US.UTF-8
+export CHARSET=utf-8
+
# add some files with UTF8 names
export LANG=en_US.utf-8
export CHARSET=utf-8
-AT_CHECK(MONOTONE add "tmp/$EUROPEAN_UTF8", [], [ignore], [ignore])
-AT_CHECK(MONOTONE add "tmp/$JAPANESE_UTF8", [], [ignore], [ignore])
+AT_CHECK(MONOTONE add "utf8/$EUROPEAN_UTF8", [], [ignore], [ignore])
+AT_CHECK(MONOTONE add "utf8/$JAPANESE_UTF8", [], [ignore], [ignore])
AT_CHECK(MONOTONE --branch=testbranch commit --message 'adding european and japanese names in UTF-8', [], [ignore], [ignore])
# check the names showed up in our manifest
+export LANG=en_US.UTF-8
+export CHARSET=utf-8
+
AT_CHECK(MONOTONE cat manifest, [], [stdout])
AT_CHECK(mv stdout manifest)
AT_CHECK(grep funny manifest, [], [ignore], [ignore])
@@ -44,19 +50,43 @@
AT_CHECK(grep $JAPANESE_UTF8 manifest, [], [ignore], [ignore])
AT_CHECK(grep $EUROPEAN_UTF8 manifest, [], [ignore], [ignore])
-# remove the UTF8 files, change locales, and add them again. both should work.
+# okay, now we try in two different locales. monotone is happy to
+# have arbirary utf8 filenames in it, but these locales don't support
+# arbitrary utf8 -- you have to use a utf8 locale if you want to put
+# filenames on your disk in utf8. if we keep all the utf8 files in
+# the tree, then, monotone will attempt to convert them to the current
+# locale, and fail miserably. so get rid of them first.
+AT_CHECK(MONOTONE drop "utf8/$EUROPEAN_UTF8" "utf8/$JAPANESE_UTF8", [], [ignore], [ignore])
+AT_CHECK(MONOTONE --branch=testbranch commit --message 'cleaning up utf8 files', [], [ignore], [ignore])
+
+# now try iso-8859-1
+
export LANG=de_DE.iso-8859-1
export CHARSET=iso-8859-1
-AT_CHECK(MONOTONE drop "tmp/$EUROPEAN_8859_1", [], [ignore], [ignore])
-AT_CHECK(MONOTONE add "tmp/$EUROPEAN_8859_1", [], [ignore], [ignore])
+AT_CHECK(MONOTONE add "8859-1/$EUROPEAN_8859_1", [], [ignore], [ignore])
AT_CHECK(MONOTONE --branch=testbranch commit --message 'adding european name in ISO 8859-1', [], [ignore], [ignore])
+# check the names showed up in our manifest
+
+AT_CHECK(MONOTONE cat manifest, [], [stdout])
+AT_CHECK(mv stdout manifest)
+AT_CHECK(grep funny manifest, [], [ignore], [ignore])
+AT_CHECK(grep spaces manifest, [], [ignore], [ignore])
+AT_CHECK(grep "8859-1/$EUROPEAN_UTF8" manifest, [], [ignore], [ignore])
+
+# okay, clean up again
+
+AT_CHECK(MONOTONE drop "8859-1/$EUROPEAN_8859_1", [], [ignore], [ignore])
+AT_CHECK(MONOTONE --branch=testbranch commit --message 'cleaning up 8859-1 files', [], [ignore], [ignore])
+
+
+# now try euc
+
export LANG=ja_JP.euc-jp
export CHARSET=euc-jp
-AT_CHECK(MONOTONE drop "tmp/$JAPANESE_EUC_JP", [], [ignore], [ignore])
-AT_CHECK(MONOTONE add "tmp/$JAPANESE_EUC_JP", [], [ignore], [ignore])
+AT_CHECK(MONOTONE add "euc/$JAPANESE_EUC_JP", [], [ignore], [ignore])
AT_CHECK(MONOTONE --branch=testbranch commit --message 'adding japanese name in EUC-JP', [], [ignore], [ignore])
@@ -66,9 +96,6 @@
AT_CHECK(mv stdout manifest)
AT_CHECK(grep funny manifest, [], [ignore], [ignore])
AT_CHECK(grep spaces manifest, [], [ignore], [ignore])
-AT_CHECK(grep $JAPANESE_UTF8 manifest, [], [ignore], [ignore])
-AT_CHECK(grep $EUROPEAN_UTF8 manifest, [], [ignore], [ignore])
+AT_CHECK(grep "euc/$JAPANESE_UTF8" manifest, [], [ignore], [ignore])
-AT_CHECK(rm -Rf tmp)
-
AT_CLEANUP
--- tests/t_normalized_filenames.at
+++ tests/t_normalized_filenames.at
@@ -0,0 +1,20 @@
+AT_SETUP([normalized filenames])
+MONOTONE_SETUP
+
+AT_DATA(foo, [blah blah
+])
+# The UI used to fix these, while later code did not, so let's check
+# the inner code directly.
+AT_DATA(MT/work, [add_file "."
+])
+AT_CHECK(MONOTONE cat manifest, [3], [ignore], [ignore])
+
+AT_DATA(MT/work, [add_file "./bar"
+])
+AT_DATA(bar, [blah blah
+])
+
+AT_CHECK(MONOTONE cat manifest, [3], [ignore], [ignore])
+AT_CHECK(MONOTONE commit --message=foo --branch=foo, [3], [ignore], [ignore])
+
+AT_CLEANUP
--- testsuite.at
+++ testsuite.at
@@ -576,3 +576,4 @@
m4_include(tests/t_inodeprints_hook.at)
m4_include(tests/t_bad_packets.at)
m4_include(tests/t_multiple_heads_msg.at)
+m4_include(tests/t_normalized_filenames.at)
--- transforms.cc
+++ transforms.cc
@@ -13,6 +13,7 @@
#include
#include
+#include
#include
#include "cryptopp/filters.h"
@@ -265,7 +266,7 @@
ident = tid;
}
-// this might reasonably go in file_io.cc too..
+// this might reasonably go in file_io.cc too...
void
calculate_ident(file_path const & file,
hexenc & ident,
@@ -289,10 +290,16 @@
else
{
// no conversions necessary, use streaming form
+ // still have to localize the filename
+ fs::path localized_file = localized(file);
+ // crypto++'s FileSource will simply treat directories as empty files,
+ // so we'd better check ourselves.
+ I(fs::exists(localized_file) && !fs::is_directory(localized_file));
CryptoPP::SHA hash;
unsigned int const sz = 2 * CryptoPP::SHA::DIGESTSIZE;
char buffer[sz];
- CryptoPP::FileSource f(file().c_str(), true, new CryptoPP::HashFilter
+ CryptoPP::FileSource f(localized_file.native_file_string().c_str(),
+ true, new CryptoPP::HashFilter
(hash, new CryptoPP::HexEncoder
(new CryptoPP::ArraySink(reinterpret_cast(buffer), sz))));
ident = lowercase(string(buffer, sz));
@@ -467,7 +474,9 @@
char * converted = stringprep_convert(src.c_str(),
dst_charset.c_str(),
src_charset.c_str());
- I(converted != NULL);
+ E(converted != NULL,
+ F("failed to convert string from %s to %s: '%s'")
+ % src_charset % dst_charset % src);
dst = string(converted);
free(converted);
}
@@ -700,8 +709,8 @@
// - ] directly following an unescaped [ is escaped.
string glob_to_regexp(const string & glob)
{
- int in_braces = 0; // counter for levels if {}
- bool in_brackets = false; // flags if we're inside a [], which
+ int in_braces = 0; // counter for levels if {}
+ bool in_brackets = false; // flags if we're inside a [], which
// has higher precedence than {}.
// Also, [ is accepted inside [] unescaped.
bool this_was_opening_bracket = false;
@@ -1099,7 +1108,7 @@
BOOST_CHECK(glob_to_regexp("foo[12m,]") == "foo[12m\\,]");
// A full fledged, use all damn features test...
BOOST_CHECK(glob_to_regexp("foo.{bar*,cookie?{haha,hehe[^\\123!,]}}[!]a^b]")
- == "foo\\.(bar.*|cookie.(haha|hehe[^\\123\\!\\,]))[^\\]a\\^b]");
+ == "foo\\.(bar.*|cookie.(haha|hehe[^\\123\\!\\,]))[^\\]a\\^b]");
}
void
--- unix/inodeprint.cc
+++ unix/inodeprint.cc
@@ -9,6 +9,7 @@
#include "platform.hh"
#include "transforms.hh"
+#include "file_io.hh"
namespace
{
@@ -29,7 +30,7 @@
bool inodeprint_file(file_path const & file, hexenc & ip)
{
struct stat st;
- if (stat(file().c_str(), &st) < 0)
+ if (stat(localized(file).native_file_string().c_str(), &st) < 0)
return false;
CryptoPP::SHA hash;
--- vocab.cc
+++ vocab.cc
@@ -247,6 +247,7 @@
"foo/../../escape",
"/rooted",
"foo//nonsense",
+ "MT/foo",
#ifdef _WIN32
"c:\\windows\\rooted",
"c:/windows/rooted",
@@ -280,10 +281,18 @@
BOOST_CHECK_NOT_THROW(file_path p(*c), informative_failure);
}
+static void test_file_path_normalization()
+{
+ BOOST_CHECK(file_path("./foo") == file_path("foo"));
+ BOOST_CHECK(file_path("foo/bar/./baz") == file_path("foo/bar/baz"));
+ BOOST_CHECK(file_path("foo/bar/../baz") == file_path("foo/baz"));
+}
+
void add_vocab_tests(test_suite * suite)
{
I(suite);
suite->add(BOOST_TEST_CASE(&test_file_path_verification));
+ suite->add(BOOST_TEST_CASE(&test_file_path_normalization));
}
#endif // BUILD_UNIT_TESTS
--- win32/inodeprint.cc
+++ win32/inodeprint.cc
@@ -10,6 +10,7 @@
#include "platform.hh"
#include "transforms.hh"
+#include "file_io.hh"
namespace
{
@@ -27,7 +28,7 @@
bool inodeprint_file(file_path const & file, hexenc & ip)
{
struct _stati64 st;
- if (_stati64(file().c_str(), &st) < 0)
+ if (_stati64(localized(file).native_file_string().c_str(), &st) < 0)
return false;
CryptoPP::SHA hash;