bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] md5: accepts a new --threads option


From: Pádraig Brady
Subject: Re: [PATCH] md5: accepts a new --threads option
Date: Wed, 21 Oct 2009 13:04:23 +0100
User-agent: Thunderbird 2.0.0.6 (X11/20071008)

Jim Meyering wrote:
> Pádraig Brady wrote:
>> Jim Meyering wrote:
>>> Pádraig Brady wrote:
>>>> Pádraig Brady wrote:
>>>>> You wouldn't want multiple threads/processes fighting over
>>>>> the disk head so you would do something like:
>>>>>
>>>>>   find /disk1 | xargs md5sum & find /disk2 | xargs md5sum
>>>>>
>>>>> Note if we're piping/redirecting the output of the above
>>>>> then we must be careful to line buffer the output from md5sum
>>>>> so that it's not interspersed. Hmm I wonder should
>>>>> we linebuffer the output from *sum by default.
>>>> In the attached patch, I've changed the default buffering
>>>> to line buffered to address the above issue. For standard
>>>> size files there is a 2% performance drop.
>>> Good catch.
>>> It sounds like this fixes a real (albeit obscure) bug, so this
>>> might deserve a NEWS item, though I admit it is borderline.
>> Well it would easily be hit when one tries to parallelize the processes.
>> So I'll add a NEWS item and a test along the lines of:
> 
> Thanks!
> 
>> (mkdir t && cd t && seq 100 | xargs touch)
>> (find t t t t -type f | xargs -n100 -P4 md5sum) \
>> | sed -n '/[0-9a-f]\{32\}  /!p' | grep . >/dev/null && fail=1
> 
> Odd... that doesn't fail on any of the systems where I tried it:
> rawhide, fedora 11, debian unstable.

Yep I noticed it triggered on my linux-2.6.22 glibc-2.6-1 box
but not on my F11 laptop. I've setup the test in the attached
to output more than 16KiB per process which triggers on F11 at least.

cheers,
Pádraig.
>From 83085a61c1251f95bd324cd50f57c5154645a1b3 Mon Sep 17 00:00:00 2001
From: =?utf-8?q?P=C3=A1draig=20Brady?= <address@hidden>
Date: Tue, 20 Oct 2009 19:19:58 +0100
Subject: [PATCH] md5sum, sha*sum, sum: line-buffer the printed checksums

* src/md5sum.c (main): Set stdout to line buffered mode
to ensure parallel running instances don't intersperse
their output.  This adds 5% to the run time in the worst case
of many zero length files, or 2% with standard file sizes.
* src/sum.c (main): Likewise.
* tests/misc/md5sum-parallel: New test for atomic output.
* tests/Makefile.am: Reference it.
* NEWS: Mention the fix
---
 NEWS                       |    5 +++++
 src/md5sum.c               |    6 ++++--
 src/sum.c                  |    4 ++++
 tests/Makefile.am          |    1 +
 tests/misc/md5sum-parallel |   37 +++++++++++++++++++++++++++++++++++++
 5 files changed, 51 insertions(+), 2 deletions(-)
 create mode 100755 tests/misc/md5sum-parallel

diff --git a/NEWS b/NEWS
index 1bf87cb..29bedac 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,11 @@ GNU coreutils NEWS                                    -*- 
outline -*-
   btrfs, cgroupfs, cramfs-wend, debugfs, futexfs, hfs, inotifyfs, minux3,
   nilfs, securityfs, selinux, xenfs
 
+  md5sum now prints checksums atomically so that concurrent
+  processes will not intersperse their output.
+  This also affected sum, sha1sum, sha224sum, sha384sum and sha512sum.
+  [the bug dates back to the initial implementation]
+
 ** New features
 
   md5sum --check now also accepts openssl-style checksums.
diff --git a/src/md5sum.c b/src/md5sum.c
index aa2a144..b7db03e 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -513,7 +513,6 @@ digest_check (const char *checkfile_name)
               if (!status_only)
                 {
                   printf (_("%s: FAILED open or read\n"), filename);
-                  fflush (stdout);
                 }
             }
           else
@@ -539,7 +538,6 @@ digest_check (const char *checkfile_name)
                     printf ("%s: %s\n", filename, _("FAILED"));
                   else if (!quiet)
                     printf ("%s: %s\n", filename, _("OK"));
-                  fflush (stdout);
                 }
             }
         }
@@ -619,6 +617,10 @@ main (int argc, char **argv)
 
   atexit (close_stdout);
 
+  /* Line buffer stdout to ensure lines are written atomically and immediately
+     so that processes running in parallel do not intersperse their output.  */
+  setvbuf (stdout, NULL, _IOLBF, 0);
+
   while ((opt = getopt_long (argc, argv, "bctw", long_options, NULL)) != -1)
     switch (opt)
       {
diff --git a/src/sum.c b/src/sum.c
index 91d7f34..f0e0cc0 100644
--- a/src/sum.c
+++ b/src/sum.c
@@ -233,6 +233,10 @@ main (int argc, char **argv)
 
   atexit (close_stdout);
 
+  /* Line buffer stdout to ensure lines are written atomically and immediately
+     so that processes running in parallel do not intersperse their output.  */
+  setvbuf (stdout, NULL, _IOLBF, 0);
+
   have_read_stdin = false;
 
   while ((optc = getopt_long (argc, argv, "rs", longopts, NULL)) != -1)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 751db1c..4d8415f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -178,6 +178,7 @@ TESTS =                                             \
   misc/id-groups                               \
   misc/md5sum                                  \
   misc/md5sum-newline                          \
+  misc/md5sum-parallel                         \
   misc/mknod                                   \
   misc/nice                                    \
   misc/nl                                      \
diff --git a/tests/misc/md5sum-parallel b/tests/misc/md5sum-parallel
new file mode 100755
index 0000000..763ee79
--- /dev/null
+++ b/tests/misc/md5sum-parallel
@@ -0,0 +1,37 @@
+#!/bin/sh
+# Ensure that md5sum prints each checksum atomically
+# so that concurrent md5sums don't intersperse their output
+
+# Copyright (C) 2009 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/test-lib.sh
+
+if test "$VERBOSE" = yes; then
+  set -x
+  md5sum --version
+fi
+
+fail=0
+
+(mkdir tmp && cd tmp && seq 500 | xargs touch)
+
+# This will output at least 16KiB per process
+# which is enough to trigger on Fedora 11 at least
+(find tmp tmp -type f | xargs -n500 -P2 md5sum) |
+sed -n '/[0-9a-f]\{32\}  /!p' |
+grep . > /dev/null && fail=1
+
+Exit $fail
-- 
1.6.2.5


reply via email to

[Prev in Thread] Current Thread [Next in Thread]