bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] tests: tail-2/pid: use a 3-second timeout, not 1


From: Pádraig Brady
Subject: Re: [PATCH] tests: tail-2/pid: use a 3-second timeout, not 1
Date: Fri, 02 Oct 2009 10:08:51 +0100
User-agent: Thunderbird 2.0.0.6 (X11/20071008)

Jim Meyering wrote:
> Pádraig Brady wrote:
>> Jim Meyering wrote:
>>> FYI, I ran make -j9 check on a fast quad-core system earlier
>>> today and saw this failure:
>>>
>>>     FAIL: tail-2/pid (exit: 1)
>>>     ==========================
>>>     ...
>>>     + timeout 1 tail -s.1 -f /dev/null --pid=2147483647
>>>     + test 124 = 124
>>>     + fail=1
>>>
>>> To me, that means it took more than 1 second for the child to start,
>>> and so the parent's 1-second timeout expired, causing the failure.
>> Fair enough. Though considering it's a fast quad-core system,
>> latencies of over 900ms are surprising.
> 
> When you punish the I/O subsystem as much as a few of the tests
> do, it happens more often than I'd like.  (tests/cp/link-heap is one
> of the biggest culprits)
> 
>> Note we seem to be sleeping for this case which isn't helping.
>> I'll look at whether we can check the pid before doing a sleep
>> for the next release.
>>
>> I've nothing outstanding that needs to go into 7.7

Actually taking a closer look this morning shows
a race in tail_forever_inotify().
Hopefully the attached addresses both that and the redundant delay.

cheers,
Pádraig.
>From d0a8f4b88196e8808300ec5b4f4dd424fc0d2073 Mon Sep 17 00:00:00 2001
From: =?utf-8?q?P=C3=A1draig=20Brady?= <address@hidden>
Date: Thu, 1 Oct 2009 08:36:25 +0100
Subject: [PATCH] tail: avoid a race where we could miss new data with --pid

* src/tail.c (tail_forever, tail_forever_inotify): Close a race in
tail_forever_inotify where new data written after the file check by
a now dead process, but before the pid check is not output.  We use
the POSIX guarantee that read() and write() are serialized wrt each
other even in separate processes, to assume full file consistency
after exit() and so poll for new data _after_ the writer has exited.
This also allows us to not redundantly _wait_ for new data if the
process is dead.
* tests/tail-2/pid: Remove the now partially invalid sub second sleep
check as we now don't unconditionally wait, and replace it with a check
for the redundant sleep.  Also clarify some of the existing comments.
* NEWS: Mention the fix.
---
 NEWS             |    6 ++++++
 gnulib           |    2 +-
 src/tail.c       |   46 ++++++++++++++++++++++++----------------------
 tests/tail-2/pid |   15 ++++++++-------
 4 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/NEWS b/NEWS
index e7e4ddc..af5222c 100644
--- a/NEWS
+++ b/NEWS
@@ -22,6 +22,12 @@ GNU coreutils NEWS                                    -*- 
outline -*-
   from a failed stat/lstat.  For example ls -Lis now prints "?", not "0",
   for the inode number and allocated size of a dereferenced dangling symlink.
 
+  tail --follow --pid now avoids a race condition where data written by the
+  specified pid right before exiting might not have been output by tail.
+  Also we now don't delay if the specified pid is not present.
+  [The race was introduced in coreutils-7.5,
+   and the redundant delay was present since textutils-1.22o]
+
 ** Portability
 
   On Solaris 9, many commands would mistakenly treat file/ the same as
diff --git a/gnulib b/gnulib
index e21985a..772a85a 160000
--- a/gnulib
+++ b/gnulib
@@ -1 +1 @@
-Subproject commit e21985ad14508137d75b0dccf064adfc4e5888c6
+Subproject commit 772a85a912a2bd5a25406d9f24a587269ea3b709
diff --git a/src/tail.c b/src/tail.c
index f9f8c39..f3fe6a3 100644
--- a/src/tail.c
+++ b/src/tail.c
@@ -1135,9 +1135,6 @@ tail_forever (struct File_spec *f, size_t n_files, double 
sleep_interval)
           if (writer_is_dead)
             break;
 
-          if (xnanosleep (sleep_interval))
-            error (EXIT_FAILURE, errno, _("cannot read realtime clock"));
-
           /* Once the writer is dead, read the files once more to
              avoid a race condition.  */
           writer_is_dead = (pid != 0
@@ -1146,6 +1143,10 @@ tail_forever (struct File_spec *f, size_t n_files, 
double sleep_interval)
                                signal to the writer, so kill fails and sets
                                errno to EPERM.  */
                             && errno != EPERM);
+
+          if (!writer_is_dead && xnanosleep (sleep_interval))
+            error (EXIT_FAILURE, errno, _("cannot read realtime clock"));
+
         }
     }
 }
@@ -1179,6 +1180,7 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t 
n_files,
   Hash_table *wd_table;
 
   bool found_watchable = false;
+  bool writer_is_dead = false;
   int prev_wd;
   size_t evlen = 0;
   char *evbuf;
@@ -1266,30 +1268,30 @@ tail_forever_inotify (int wd, struct File_spec *f, 
size_t n_files,
          indefinetely.  */
       if (pid)
         {
-          fd_set rfd;
-          struct timeval select_timeout;
-          int n_descriptors;
-
-          FD_ZERO (&rfd);
-          FD_SET (wd, &rfd);
+          if (writer_is_dead)
+            exit (EXIT_SUCCESS);
 
-          select_timeout.tv_sec = (time_t) sleep_interval;
-          select_timeout.tv_usec = 1000000 * (sleep_interval
-                                              - select_timeout.tv_sec);
+          writer_is_dead = (kill (pid, 0) != 0 && errno != EPERM);
 
-          n_descriptors = select (wd + 1, &rfd, NULL, NULL, &select_timeout);
+          struct timeval delay; /* how long to wait for file changes.  */
+          if (writer_is_dead)
+            delay.tv_sec = delay.tv_usec = 0;
+          else
+            {
+              delay.tv_sec = (time_t) sleep_interval;
+              delay.tv_usec = 1000000 * (sleep_interval - delay.tv_sec);
+            }
 
-          if (n_descriptors == -1)
-            error (EXIT_FAILURE, errno, _("error monitoring inotify event"));
+           fd_set rfd;
+           FD_ZERO (&rfd);
+           FD_SET (wd, &rfd);
 
-          if (n_descriptors == 0)
-            {
-              /* See if the process we are monitoring is still alive.  */
-              if (kill (pid, 0) != 0 && errno != EPERM)
-                exit (EXIT_SUCCESS);
+           int file_change = select (wd + 1, &rfd, NULL, NULL, &delay);
 
-              continue;
-            }
+           if (file_change == 0)
+             continue;
+           else if (file_change == -1)
+             error (EXIT_FAILURE, errno, _("error monitoring inotify event"));
         }
 
       if (len <= evbuf_off)
diff --git a/tests/tail-2/pid b/tests/tail-2/pid
index 90f1684..760e289 100755
--- a/tests/tail-2/pid
+++ b/tests/tail-2/pid
@@ -29,26 +29,27 @@ touch here || framework_failure
 fail=0
 
 for inotify in ---disable-inotify ''; do
-  # Use tail itself to create a background process to monitor.
+  # Use tail itself to create a background process to monitor,
+  # which will auto exit when "here" is removed.
   tail -f $inotify here &
   bg_pid=$!
 
   # Ensure that tail --pid=PID does not exit when PID is alive.
-  timeout 1 tail -s.1 -f $inotify here --pid=$bg_pid
+  timeout 1 tail -f -s.1 --pid=$bg_pid $inotify here
   test $? = 124 || fail=1
 
   # Cleanup background process
   kill $bg_pid
 
-  # Ensure that tail --pid=PID exits successfully when PID is dead.
+  # Ensure that tail --pid=PID exits with success status when PID is dead.
   # Use an unlikely-to-be-live PID
-  timeout 3 tail -s.1 --pid=$PID_T_MAX -f $inotify /dev/null
+  timeout 3 tail -f -s.1 --pid=$PID_T_MAX $inotify /dev/null
   ret=$?
-  test $ret = 124 && skip_test_ "pid $PID_T_MAX present"
+  test $ret = 124 && skip_test_ "pid $PID_T_MAX present or tail too slow"
   test $ret = 0 || fail=1
 
-  # Ensure fractional sleep parameter is honored with --pid
-  timeout 3 tail -s.1 -f $inotify /dev/null --pid=$PID_T_MAX
+  # Ensure tail doesn't wait for data when PID is dead
+  timeout 3 tail -f -s10 --pid=$PID_T_MAX $inotify /dev/null
   test $? = 124 && fail=1
 done
 
-- 
1.6.2.5


reply via email to

[Prev in Thread] Current Thread [Next in Thread]