[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [bug-diffutils] Bug#704182: diffutils: Diff -r will confusion betwee
From: |
Paul Eggert |
Subject: |
Re: [bug-diffutils] Bug#704182: diffutils: Diff -r will confusion between asian characters in filenames, when locale are non asian - UTF-8. (fwd) |
Date: |
Wed, 03 Apr 2013 08:26:34 -0700 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130308 Thunderbird/17.0.4 |
Thanks for reminding me about that problem. I've pushed the following patches.
The first two are merely tuning and reorganization; the third one does the
real work.
>From 37bffc430560df85029b2cacda65893542f0d455 Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Wed, 3 Apr 2013 07:48:22 -0700
Subject: [PATCH 1/3] diff: tune compare_names_for_qsort
* src/dir.c (compare_collated): New function.
(compare_names): Use it.
(compare_names_for_qsort): Use it. This is a bit more efficient
as it can avoid a double invocation of file_name_cmp when
file_name_cmp returns zero.
---
src/dir.c | 49 +++++++++++++++++++++++++++++++------------------
1 file changed, 31 insertions(+), 18 deletions(-)
diff --git a/src/dir.c b/src/dir.c
index 7f647b0..fc42f62 100644
--- a/src/dir.c
+++ b/src/dir.c
@@ -140,28 +140,34 @@ dir_read (struct file_data const *dir, struct dirdata
*dirdata)
return true;
}
-/* Compare file names, returning a value compatible with strcmp. */
+/* Compare strings in a locale-specific way, returning a value
+ compatible with strcmp. */
static int
-compare_names (char const *name1, char const *name2)
+compare_collated (char const *name1, char const *name2)
{
- if (locale_specific_sorting)
+ int r;
+ errno = 0;
+ if (ignore_file_name_case)
+ r = strcasecoll (name1, name2);
+ else
+ r = strcoll (name1, name2);
+ if (errno)
{
- int r;
- errno = 0;
- if (ignore_file_name_case)
- r = strcasecoll (name1, name2);
- else
- r = strcoll (name1, name2);
- if (errno)
- {
- error (0, errno, _("cannot compare file names '%s' and '%s'"),
- name1, name2);
- longjmp (failed_locale_specific_sorting, 1);
- }
- return r;
+ error (0, errno, _("cannot compare file names '%s' and '%s'"),
+ name1, name2);
+ longjmp (failed_locale_specific_sorting, 1);
}
+ return r;
+}
+
+/* Compare file names, returning a value compatible with strcmp. */
+static int
+compare_names (char const *name1, char const *name2)
+{
+ if (locale_specific_sorting)
+ return compare_collated (name1, name2);
return file_name_cmp (name1, name2);
}
@@ -173,8 +179,15 @@ compare_names_for_qsort (void const *file1, void const
*file2)
{
char const *const *f1 = file1;
char const *const *f2 = file2;
- int diff = compare_names (*f1, *f2);
- return diff ? diff : file_name_cmp (*f1, *f2);
+ char const *name1 = *f1;
+ char const *name2 = *f2;
+ if (locale_specific_sorting)
+ {
+ int diff = compare_collated (name1, name2);
+ if (diff)
+ return diff;
+ }
+ return file_name_cmp (name1, name2);
}
/* Compare the contents of two directories named in CMP.
--
1.7.11.7
>From 73482f40100760b276d383ed0a588ce13a3d52b4 Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Wed, 3 Apr 2013 07:51:33 -0700
Subject: [PATCH 2/3] diff: remove unnecessary decl
* src/dir.c (compare_names_for_qsort): Remove declaration.
Not needed now that we assume C89.
---
src/dir.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/src/dir.c b/src/dir.c
index fc42f62..21b1935 100644
--- a/src/dir.c
+++ b/src/dir.c
@@ -45,7 +45,6 @@ static bool locale_specific_sorting;
static jmp_buf failed_locale_specific_sorting;
static bool dir_loop (struct comparison const *, int);
-static int compare_names_for_qsort (void const *, void const *);
/* Read a directory and get its vector of names. */
--
1.7.11.7
>From e82f540d1134ba3d30434024e6fc9aea8ec71cf1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Wed, 3 Apr 2013 08:20:31 -0700
Subject: [PATCH 3/3] diff: fix bug with Asian file names
Problem reported by Errembault Philippe in:
http://lists.gnu.org/archive/html/bug-diffutils/2013-03/msg00012.html
* NEWS: Document this.
* src/dir.c (compare_names): Fall back on file_name_cmp if
compare_collated returns 0, unless ignoring file name case.
(diff_dirs): Don't bother with the O(N**2) stuff unless ignoring
file name case.
* tests/Makefile.am (TESTS): Add strcoll-0-names.
* tests/strcoll-0-names: New file.
---
NEWS | 7 +++++++
src/dir.c | 8 ++++++--
tests/Makefile.am | 1 +
tests/strcoll-0-names | 25 +++++++++++++++++++++++++
4 files changed, 39 insertions(+), 2 deletions(-)
create mode 100755 tests/strcoll-0-names
diff --git a/NEWS b/NEWS
index ac7a75e..79517f2 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,13 @@ GNU diffutils NEWS -*-
outline -*-
* Noteworthy changes in release ?.? (????-??-??) [?]
+** Bug fixes
+
+ Unless the --ignore-file-name-case option is used, diff now
+ considers file names to be equal only if they are byte-for-byte
+ equivalent. This fixes a bug where diff in an English locale might
+ consider two Asian file names to be the same merely because they
+ contain no English characters.
* Noteworthy changes in release 3.3 (2013-03-24) [stable]
diff --git a/src/dir.c b/src/dir.c
index 21b1935..d3b0a2d 100644
--- a/src/dir.c
+++ b/src/dir.c
@@ -166,7 +166,11 @@ static int
compare_names (char const *name1, char const *name2)
{
if (locale_specific_sorting)
- return compare_collated (name1, name2);
+ {
+ int diff = compare_collated (name1, name2);
+ if (diff || ignore_file_name_case)
+ return diff;
+ }
return file_name_cmp (name1, name2);
}
@@ -271,7 +275,7 @@ diff_dirs (struct comparison const *cmp,
O(N**2), where N is the number of names in a directory
that compare_names says are all equal, but in practice N
is so small it's not worth tuning. */
- if (nameorder == 0)
+ if (nameorder == 0 && ignore_file_name_case)
{
int raw_order = file_name_cmp (*names[0], *names[1]);
if (raw_order != 0)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 5cbcfb4..dd2d514 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -12,6 +12,7 @@ TESTS = \
no-dereference \
no-newline-at-eof \
stdin \
+ strcoll-0-names \
filename-quoting
EXTRA_DIST = \
diff --git a/tests/strcoll-0-names b/tests/strcoll-0-names
new file mode 100755
index 0000000..33c4a3c
--- /dev/null
+++ b/tests/strcoll-0-names
@@ -0,0 +1,25 @@
+#!/bin/sh
+# Check that diff responds well with two different file names
+# that compare equal with strcoll. See:
+# http://lists.gnu.org/archive/html/bug-diffutils/2013-03/msg00012.html
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+# These two names compare equal in the en_US.UTF-8 locale
+# in current (2013) versions of glibc.
+# On systems where the names do not compare equal,
+# this diff test should still do the right thing.
+LC_ALL=en_US.UTF-8
+export LC_ALL
+name1='エンドカード1'
+name2='ブックレット1'
+
+mkdir d1 d2 || fail=1
+echo x >d1/"$name1" || fail=1
+echo x >d2/"$name2" || fail=1
+
+# This should report a difference, but on the affected systems
+# diffutils 3.3 does not.
+diff d1 d2 && fail=1
+
+Exit $fail
--
1.7.11.7