bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: dd interface to posix_fadvise


From: Pádraig Brady
Subject: Re: dd interface to posix_fadvise
Date: Thu, 08 Mar 2007 10:00:37 +0000
User-agent: Thunderbird 1.5.0.8 (X11/20061116)

Paul Eggert wrote:
> Pádraig Brady <address@hidden> writes:
> 
>> http://www.die.net/doc/linux/man/man2/posix_fadvise.2.html
> 
> Wow.  That makes it seem like POSIX_FADV_SEQUENTIAL isn't nearly as
> useful as it should be.

Well linux uses POSIX_FADV_SEQUENTIAL to double the readahead currently,
which should help with sequential access performance.
It's fair enough I think (albeit not intuitive) that it doesn't affect caching.

> What I think you're saying is that applications must invoke
> POSIX_FADV_DONTNEED by hand after reading or writing every sequential page.

yep.

> But this is what POSIX_FADV_SEQUENTIAL is designed for, no?
> That is, POSIX_FADV_SEQUENTIAL should be tuned for usage patterns like
> dd's, right?  Why isn't it?

There used to be an O_STREAM flag that did all this.
Pity they removed it in preference for the posix interface.

> (Now I'm starting to understand why almost nobody uses posix_fadvise.  :-)
> 
> 
>>> +  posix_fadvise (STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL);
>>> +  posix_fadvise (STDOUT_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL);
>> That's mad. That trival patch works fine here on
>> coreutils-6.2 + glibc-2.3.5-10 + 2.6.16-1.2111_FC4 at least.
> 
> It sounds like we'll have to have a fairly-careful runtime test then,
> with the default being not to use posix_fadvise unless we're sure it
> won't dump core.  Debian stable is not a platform I'd like to abandon
> quite just yet....

I'll test on debian stable also to see what the issue is.
Note I noticed another issue with glibc where it always
returns 0, even if you do posix_fadvise() on a pipe or invalid file 
descriptor...
The errno is set appropriately, but I don't think you can
generally query that if the call returns success?
I'm loath to interact with glibc about this since they
generally ignore or disagree with my suggestions. Maybe 74th time lucky?

> One option I toyed with is giving users direct access to the
> posix_fadvise parameters, so they can set the options themselves,
> and get core dumps on their own.  E.g., 
> 
> dd iadvice=sequential oadvice=sequential

I don't like that personally. I don't think that granularity
will be required in a tool like dd. I'm going with the
more abstract [io]flag=nocache interface for the moment,
a first draft of which is in the attached patch.

cheers,
Pádraig.
--- ../../coreutils-6.2/src/dd.c        2006-08-16 19:53:51.000000000 +0000
+++ dd.c        2007-03-08 08:56:46.000000000 +0000
@@ -68,6 +68,10 @@
 # define fdatasync(fd) (errno = ENOSYS, -1)
 #endif
 
+#ifndef POSIX_FADV_DONTNEED
+# define posix_fadvise(fd, offset, len, advice) (errno = ENOSYS, -1)
+#endif
+
 #define max(a, b) ((a) > (b) ? (a) : (b))
 #define output_char(c)                         \
   do                                           \
@@ -120,6 +124,16 @@
     C_FSYNC = 0100000
   };
 
+/* private flags bit masks. */
+enum
+  {
+#ifdef POSIX_FADV_DONTNEED
+    O_NOCACHE = 01
+#else
+    O_NOCACHE = 0
+#endif
+  };
+
 /* Status bit masks.  */
 enum
   {
@@ -163,6 +177,10 @@
 static int input_flags = 0;
 static int output_flags = 0;
 
+/* private flags for the input and output files.  */
+static int input_private_flags = 0;
+static int output_private_flags = 0;
+
 /* Status flags for what is printed to stderr.  */
 static int status_flags = 0;
 
@@ -227,7 +245,7 @@
 static sig_atomic_t volatile info_signal_count;
 
 /* A longest symbol in the struct symbol_values tables below.  */
-#define LONGEST_SYMBOL "fdatasync"
+#define LONGEST_SYMBOL "nocache_private"
 
 /* A symbol and the corresponding integer value.  */
 struct symbol_value
@@ -266,6 +284,7 @@
   {"directory",        O_DIRECTORY},
   {"dsync",    O_DSYNC},
   {"noatime",  O_NOATIME},
+  {"nocache_private",  O_NOCACHE},
   {"noctty",   O_NOCTTY},
   {"nofollow", O_NOFOLLOW},
   {"nolinks",  O_NOLINKS},
@@ -413,6 +432,7 @@
   cbs=BYTES       convert BYTES bytes at a time\n\
   conv=CONVS      convert the file as per the comma separated symbol list\n\
   count=BLOCKS    copy only BLOCKS input blocks\n\
+  flag=FLAGS      set iflag=FLAGS and oflag=FLAGS\n\
   ibs=BYTES       read BYTES bytes at a time\n\
 "), stdout);
       fputs (_("\
@@ -472,6 +492,9 @@
        fputs (_("  nonblock  use non-blocking I/O\n"), stdout);
       if (O_NOATIME)
        fputs (_("  noatime   do not update access time\n"), stdout);
+      if (O_NOCACHE)
+       fputs (_("  nocache   instruct the system not to cache the data\n"),
+              stdout);
       if (O_NOCTTY)
        fputs (_("  noctty    do not assign controlling terminal from file\n"),
               stdout);
@@ -725,6 +748,21 @@
     }
 }
 
+static int
+invalidate_cache(int fd, size_t size)
+{
+  /* Note be careful to invalidate only what we've read
+  so that we don't dump any readahead cache. */
+  off_t offset = lseek(fd, 0, SEEK_CUR);
+  if (offset && offset!=(off_t)-1)
+    {
+      /* TODO: coalesce into at least page_size requests.
+        Currently there are 4 extra syscalls per block. */
+      return posix_fadvise(fd, 0, offset, POSIX_FADV_DONTNEED);
+    }
+  return 0;
+}
+
 /* Read from FD into the buffer BUF of size SIZE, processing any
    signals that arrive before bytes are read.  Return the number of
    bytes read if successful, -1 (setting errno) on failure.  */
@@ -738,7 +776,11 @@
       process_signals ();
       nread = read (fd, buf, size);
       if (! (nread < 0 && errno == EINTR))
-       return nread;
+       {
+         if (input_private_flags & O_NOCACHE)
+           (void) invalidate_cache(fd, nread);
+         return nread;
+       }
     }
 }
 
@@ -774,6 +816,9 @@
        total_written += nwritten;
     }
 
+  if (output_private_flags & O_NOCACHE)
+    (void) invalidate_cache(fd, total_written);
+
   return total_written;
 }
 
@@ -803,7 +848,7 @@
 
 static int
 parse_symbols (char *str, struct symbol_value const *table,
-              char const *error_msgid)
+              char const *error_msgid, const int private)
 {
   int value = 0;
 
@@ -813,6 +858,15 @@
       char *new = strchr (str, ',');
       if (new != NULL)
        *new++ = '\0';
+      char priv_str[32];
+      if (strstr(str,"_private"))
+       {
+          error (0, 0, _(error_msgid), quote (str));
+          usage (EXIT_FAILURE);
+       }
+      else
+       snprintf(priv_str, sizeof(priv_str), "%s_private", str);
+
       for (entry = table; ; entry++)
        {
          if (! entry->symbol[0])
@@ -824,7 +878,16 @@
            {
              if (! entry->value)
                error (EXIT_FAILURE, 0, _(error_msgid), quote (str));
-             value |= entry->value;
+             if (!private)
+               value |= entry->value;
+             break;
+           }
+         if (STREQ (entry->symbol, priv_str))
+           {
+             if (! entry->value)
+               error (EXIT_FAILURE, 0, _(error_msgid), quote (str));
+             if (private)
+               value |= entry->value;
              break;
            }
        }
@@ -891,16 +954,35 @@
        output_file = val;
       else if (STREQ (name, "conv"))
        conversions_mask |= parse_symbols (val, conversions,
-                                          N_("invalid conversion: %s"));
-      else if (STREQ (name, "iflag"))
-       input_flags |= parse_symbols (val, flags,
-                                     N_("invalid input flag: %s"));
-      else if (STREQ (name, "oflag"))
-       output_flags |= parse_symbols (val, flags,
-                                      N_("invalid output flag: %s"));
+                                          N_("invalid conversion: %s"),0);
+      else if (STREQ (name, "flag") ||
+              STREQ (name, "iflag") || STREQ (name, "oflag"))
+       {
+         char* val_copy = xstrdup(val);
+         if (STREQ (name, "flag") || STREQ (name, "iflag"))
+           {
+               input_flags |= parse_symbols (val_copy, flags,
+                                             N_("invalid input flag: %s"),0);
+               input_private_flags |= parse_symbols (val, flags,
+                                             N_("invalid input flag: %s"),1);
+           }
+         else if (STREQ (name, "oflag"))
+           {
+               output_flags |= parse_symbols (val_copy, flags,
+                                              N_("invalid output flag: %s"),0);
+               output_private_flags |= parse_symbols (val, flags,
+                                              N_("invalid output flag: %s"),1);
+           }
+         if (STREQ (name, "flag"))
+           {
+               output_flags |= input_flags;
+               output_private_flags |= input_private_flags;
+           }
+         free(val_copy);
+        }
       else if (STREQ (name, "status"))
        status_flags |= parse_symbols (val, statuses,
-                                      N_("invalid status flag: %s"));
+                                      N_("invalid status flag: %s"),0);
       else
        {
          bool invalid = false;

reply via email to

[Prev in Thread] Current Thread [Next in Thread]