bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

'rm -r dir' performance improvement when removing symlinks, or root


From: Paul Eggert
Subject: 'rm -r dir' performance improvement when removing symlinks, or root
Date: Fri, 23 Mar 2007 10:56:59 -0700
User-agent: Gnus/5.1008 (Gnus v5.10.8) Emacs/21.4 (gnu/linux)

Currently 'rm -r dir' invokes three system calls for each directory
entry removed, e.g. (on Debian stable):

   lstat64("/proc/self/fd/5/output.0", ...)
   access("tmp/autom4te.cache/output.0", W_OK)
   unlink("/proc/self/fd/5/output.0")

If we are root, the lstat and access calls aren't needed.  Also, if the
directory entry is a symlink we needn't worry about access permissions.

This can be worked around with 'rm -rf' but I kind of like using 'rm
-r' when I'm root, since it's a bit more cautious and prints more
diagnostics when there are trouble.  So I'd like 'rm -r' to be fast
too.

Here's a proposed patch.

(Come to think of it, there may be some issues when accessing files via
NFS, where root doesn't have privileges to remove them.  But I think
the current patch will still be OK -- obviously no files will be
removed since root can't remove them over NFS.)

2007-03-23  Paul Eggert  <address@hidden>

        Avoid the need for euidaccess and/or lstat on every directory entry
        with 'rm -r dir' (without -f), if we are root, or if we are removing
        a directory tree that is full of symbolic links.
        * bootstrap.conf (gnulib_modules): Add write-any-file.
        * src/copy.c: Include write-any-file.h.
        (UNWRITABLE): Remove macro, replacing with....
        (writable_destination): New function, which uses can_write_any_file
        to avoid the need for euidaccess when we are privileged.
        (overwrite_prompt, abandon_move): Use it.
        * src/remove.c: Include write-any-file.h.
        (D_TYPE): New macro.
        (DT_UNKNOWN, DT_DIR, DT_LNK) [!HAVE_STRUCT_DIRENT_D_TYPE]: New macros.
        (write_protected_non_symlink): Don't bother to stat if we can write
        any file.
        (prompt): New arg PDIRENT_TYPE.  All callers changed.
        Use readdir dirent type to avoid the need for 'lstat' on each directory
        entry in cases like 'rm -r dir', if we are root, or if the tree is
        full of symbolic links.
        (DT_IS_KNOWN, DT_MUST_BE): Remove.
        (remove_entry): New arg DIRENT_TYPE_ARG.  All callers changed.

diff --git a/bootstrap.conf b/bootstrap.conf
index 3043321..4ec43fa 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -65,7 +65,8 @@ gnulib_modules="
        strpbrk strtoimax strtoumax strverscmp sys_stat timespec tzset
        unicodeio unistd-safer unlink-busy unlinkdir unlocked-io
        uptime userspec utimecmp utimens vasprintf verify version-etc-fsf
-       wcwidth winsz-ioctl winsz-termios xalloc xgetcwd xgethostname
+       wcwidth winsz-ioctl winsz-termios write-any-file
+       xalloc xgetcwd xgethostname
        xmemcoll xnanosleep xreadlink xreadlink-with-size xstrtod xstrtoimax
        xstrtol xstrtold xstrtoumax yesno
 "
diff --git a/src/copy.c b/src/copy.c
index 4bdb75c..786de2f 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -51,6 +51,7 @@
 #include "stat-time.h"
 #include "utimecmp.h"
 #include "utimens.h"
+#include "write-any-file.h"
 #include "xreadlink.h"
 #include "yesno.h"

@@ -63,11 +64,6 @@
 #define SAME_GROUP(A, B) ((A).st_gid == (B).st_gid)
 #define SAME_OWNER_AND_GROUP(A, B) (SAME_OWNER (A, B) && SAME_GROUP (A, B))

-#define UNWRITABLE(File_name, File_mode)               \
-  ( /* euidaccess is not meaningful for symlinks */    \
-    ! S_ISLNK (File_mode)                              \
-    && euidaccess (File_name, W_OK) != 0)
-
 struct dir_list
 {
   struct dir_list *parent;
@@ -793,10 +789,20 @@ same_file_ok (char const *src_name, struct stat const 
*src_sb,
   return false;
 }

+/* Return true if FILE, with mode MODE, is writable in the sense of 'mv'.
+   Always consider a symbolic link to be writable.  */
+static bool
+writable_destination (char const *file, mode_t mode)
+{
+  return (S_ISLNK (mode)
+         || can_write_any_file ()
+         || euidaccess (file, W_OK) == 0);
+}
+
 static void
 overwrite_prompt (char const *dst_name, struct stat const *dst_sb)
 {
-  if (euidaccess (dst_name, W_OK) != 0)
+  if (! writable_destination (dst_name, dst_sb->st_mode))
     {
       char perms[12];          /* "-rwxrwxrwx " ls-style modes. */
       strmode (dst_sb->st_mode, perms);
@@ -978,7 +984,7 @@ abandon_move (const struct cp_options *x,
           || ((x->interactive == I_ASK_USER
                || (x->interactive == I_UNSPECIFIED
                    && x->stdin_tty
-                   && UNWRITABLE (dst_name, dst_sb->st_mode)))
+                   && ! writable_destination (dst_name, dst_sb->st_mode)))
               && (overwrite_prompt (dst_name, dst_sb), 1)
               && ! yesno ()));
 }
diff --git a/src/remove.c b/src/remove.c
index 59ee9e5..804557d 100644
--- a/src/remove.c
+++ b/src/remove.c
@@ -39,6 +39,7 @@
 #include "remove.h"
 #include "root-dev-ino.h"
 #include "unlinkdir.h"
+#include "write-any-file.h"
 #include "yesno.h"

 /* Avoid shadowing warnings because these are functions declared
@@ -112,6 +113,23 @@ struct AD_ent
   struct dev_ino dev_ino;
 };

+/* D_TYPE(D) is the type of directory entry D if known, DT_UNKNOWN
+   otherwise.  */
+#if HAVE_STRUCT_DIRENT_D_TYPE
+# define D_TYPE(d) ((d)->d_type)
+#else
+# define D_TYPE(d) DT_UNKNOWN
+
+/* Any int values will do here, so long as they're distinct.
+   Undef any existing macros out of the way.  */
+# undef DT_UNKNOWN
+# undef DT_DIR
+# undef DT_LNK
+# define DT_UNKNOWN 0
+# define DT_DIR 1
+# define DT_LNK 2
+#endif
+
 extern char *program_name;

 struct dirstack_state
@@ -715,6 +733,8 @@ write_protected_non_symlink (int fd_cwd,
                             Dirstack_state const *ds,
                             struct stat *buf)
 {
+  if (can_write_any_file ())
+    return 0;
   if (cache_fstatat (fd_cwd, file, buf, AT_SYMLINK_NOFOLLOW) != 0)
     return errno;
   if (S_ISLNK (buf->st_mode))
@@ -791,6 +811,9 @@ write_protected_non_symlink (int fd_cwd,
    return RM_USER_DECLINED.  If not ignoring missing files and we
    cannot lstat FILENAME, then return RM_ERROR.

+   *PDIRENT_TYPE is the type of the directory entry; update it to DT_DIR
+   or DT_LNK as needed.  *SBUF is the file's status.
+
    Depending on MODE, ask whether to `descend into' or to `remove' the
    directory FILENAME.  MODE is ignored when FILENAME is not a directory.
    Set *IS_EMPTY to T_YES if FILENAME is an empty directory, and it is
@@ -798,11 +821,12 @@ write_protected_non_symlink (int fd_cwd,
    Don't even try to set *IS_EMPTY when MODE == PA_REMOVE_DIR.  */
 static enum RM_status
 prompt (int fd_cwd, Dirstack_state const *ds, char const *filename,
-       struct stat *sbuf,
+       int *pdirent_type, struct stat *sbuf,
        struct rm_options const *x, enum Prompt_action mode,
        Ternary *is_empty)
 {
   int write_protected = 0;
+  int dirent_type = *pdirent_type;

   *is_empty = T_UNKNOWN;

@@ -810,27 +834,44 @@ prompt (int fd_cwd, Dirstack_state const *ds, char const 
*filename,
     return RM_OK;

   if (!x->ignore_missing_files
-      & ((x->interactive == RMI_ALWAYS) | x->stdin_tty))
+      && ((x->interactive == RMI_ALWAYS) || x->stdin_tty)
+      && dirent_type != DT_LNK)
     write_protected = write_protected_non_symlink (fd_cwd, filename, ds, sbuf);

   if (write_protected || x->interactive == RMI_ALWAYS)
     {
-      if (write_protected <= 0
-         && cache_fstatat (fd_cwd, filename, sbuf, AT_SYMLINK_NOFOLLOW) != 0)
+      if (write_protected <= 0 && dirent_type == DT_UNKNOWN)
        {
-         /* This happens, e.g., with `rm '''.  */
-         write_protected = errno;
+         if (cache_fstatat (fd_cwd, filename, sbuf, AT_SYMLINK_NOFOLLOW) == 0)
+           {
+             if (S_ISLNK (sbuf->st_mode))
+               dirent_type = DT_LNK;
+             else if (S_ISDIR (sbuf->st_mode))
+               dirent_type = DT_DIR;
+             /* Otherwise it doesn't matter, so leave it DT_UNKNOWN.  */
+             *pdirent_type = dirent_type;
+           }
+         else
+           {
+             /* This happens, e.g., with `rm '''.  */
+             write_protected = errno;
+           }
        }

       if (write_protected <= 0)
-       {
-         /* Using permissions doesn't make sense for symlinks.  */
-         if (S_ISLNK (sbuf->st_mode) && x->interactive != RMI_ALWAYS)
-           return RM_OK;
+       switch (dirent_type)
+         {
+         case DT_LNK:
+           /* Using permissions doesn't make sense for symlinks.  */
+           if (x->interactive != RMI_ALWAYS)
+             return RM_OK;
+           break;

-         if (S_ISDIR (sbuf->st_mode) && !x->recursive)
-           write_protected = EISDIR;
-       }
+         case DT_DIR:
+           if (!x->recursive)
+             write_protected = EISDIR;
+           break;
+         }

       char const *quoted_name = quote (full_filename (filename));

@@ -844,8 +885,7 @@ prompt (int fd_cwd, Dirstack_state const *ds, char const 
*filename,
       /* FIXME: use a variant of error (instead of fprintf) that doesn't
         append a newline.  Then we won't have to declare program_name in
         this file.  */
-      if (S_ISDIR (sbuf->st_mode)
-         && x->recursive
+      if (dirent_type == DT_DIR
          && mode == PA_DESCEND_INTO_DIR
          && ((*is_empty = (is_empty_dir (fd_cwd, filename) ? T_YES : T_NO))
              == T_NO))
@@ -856,6 +896,12 @@ prompt (int fd_cwd, Dirstack_state const *ds, char const 
*filename,
                 program_name, quoted_name);
       else
        {
+         if (cache_fstatat (fd_cwd, filename, sbuf, AT_SYMLINK_NOFOLLOW) != 0)
+           {
+             error (0, errno, _("cannot remove %s"), quoted_name);
+             return RM_ERROR;
+           }
+
          /* TRANSLATORS: You may find it more convenient to translate
             the equivalent of _("%s: remove %s (write-protected) %s? ").
             It should avoid grammatical problems with the output
@@ -888,19 +934,6 @@ is_dir_lstat (char const *filename, struct stat *st)
   return is_dir;
 }

-#if HAVE_STRUCT_DIRENT_D_TYPE
-
-/* True if the type of the directory entry D is known.  */
-# define DT_IS_KNOWN(d) ((d)->d_type != DT_UNKNOWN)
-
-/* True if the type of the directory entry D must be T.  */
-# define DT_MUST_BE(d, t) ((d)->d_type == (t))
-
-#else
-# define DT_IS_KNOWN(d) false
-# define DT_MUST_BE(d, t) false
-#endif
-
 #define DO_UNLINK(Fd_cwd, Filename, X)                                 \
   do                                                                   \
     {                                                                  \
@@ -970,14 +1003,14 @@ ignorable_missing (struct rm_options const *x, int 
errnum)

 static enum RM_status
 remove_entry (int fd_cwd, Dirstack_state const *ds, char const *filename,
-             struct stat *st,
-             struct rm_options const *x, struct dirent const *dp)
+             int dirent_type_arg, struct stat *st,
+             struct rm_options const *x)
 {
   Ternary is_empty_directory;
-  enum RM_status s = prompt (fd_cwd, ds, filename, st, x, PA_DESCEND_INTO_DIR,
+  enum RM_status s = prompt (fd_cwd, ds, filename, &dirent_type_arg, st, x,
+                            PA_DESCEND_INTO_DIR,
                             &is_empty_directory);
-  bool known_to_be_dir = (cache_stat_ok (st) && S_ISDIR (st->st_mode));
-
+  int dirent_type = dirent_type_arg;
   if (s != RM_OK)
     return s;

@@ -997,7 +1030,7 @@ remove_entry (int fd_cwd, Dirstack_state const *ds, char 
const *filename,

   if (cannot_unlink_dir ())
     {
-      if (known_to_be_dir && ! x->recursive)
+      if (dirent_type == DT_DIR && ! x->recursive)
        {
          error (0, EISDIR, _("cannot remove %s"),
                 quote (full_filename (filename)));
@@ -1013,10 +1046,11 @@ remove_entry (int fd_cwd, Dirstack_state const *ds, 
char const *filename,

       /* If we happen to know that FILENAME is a directory, return now
         and let the caller remove it -- this saves the overhead of a failed
-        unlink call.  If FILENAME is a command-line argument, then dp is NULL,
-        so we'll first try to unlink it.  Using unlink here is ok, because it
-        cannot remove a directory.  */
-      if ((dp && DT_MUST_BE (dp, DT_DIR)) || known_to_be_dir)
+        unlink call.  If FILENAME is a command-line argument, then
+        DIRENT_TYPE is DT_UNKNOWN so we'll first try to unlink it.
+        Using unlink here is ok, because it cannot remove a
+        directory.  */
+      if (dirent_type == DT_DIR)
        return RM_NONEMPTY_DIR;

       DO_UNLINK (fd_cwd, filename, x);
@@ -1046,31 +1080,24 @@ remove_entry (int fd_cwd, Dirstack_state const *ds, 
char const *filename,
       /* If we don't already know whether FILENAME is a directory,
         find out now.  Then, if it's a non-directory, we can use
         unlink on it.  */
-      bool is_dir;

-      if (cache_statted (st))
-       is_dir = known_to_be_dir;
-      else
+      if (dirent_type == DT_UNKNOWN)
        {
-         if (dp && DT_IS_KNOWN (dp))
-           is_dir = DT_MUST_BE (dp, DT_DIR);
-         else
+         if (fstatat (fd_cwd, filename, st, AT_SYMLINK_NOFOLLOW))
            {
-             if (fstatat (fd_cwd, filename, st, AT_SYMLINK_NOFOLLOW))
-               {
-                 if (ignorable_missing (x, errno))
-                   return RM_OK;
+             if (ignorable_missing (x, errno))
+               return RM_OK;

-                 error (0, errno, _("cannot remove %s"),
-                        quote (full_filename (filename)));
-                 return RM_ERROR;
-               }
-
-             is_dir = !! S_ISDIR (st->st_mode);
+             error (0, errno, _("cannot remove %s"),
+                    quote (full_filename (filename)));
+             return RM_ERROR;
            }
+
+         if (S_ISDIR (st->st_mode))
+           dirent_type = DT_DIR;
        }

-      if (! is_dir)
+      if (dirent_type == DT_DIR)
        {
          /* At this point, barring race conditions, FILENAME is known
             to be a non-directory, so it's ok to try to unlink it.  */
@@ -1208,7 +1235,8 @@ remove_cwd_entries (DIR **dirp,
         Systems without the d_type member will have to endure
         the performance hit of first calling lstat F. */
       cache_stat_init (subdir_sb);
-      tmp_status = remove_entry (dirfd (*dirp), ds, f, subdir_sb, x, dp);
+      tmp_status = remove_entry (dirfd (*dirp), ds, f,
+                                D_TYPE (dp), subdir_sb, x);
       switch (tmp_status)
        {
        case RM_OK:
@@ -1404,13 +1432,10 @@ remove_dir (int fd_cwd, Dirstack_state *ds, char const 
*dir,
        /* Try to remove EMPTY_DIR only if remove_cwd_entries succeeded.  */
        if (tmp_status == RM_OK)
          {
-           /* This does a little more work than necessary when it actually
-              prompts the user.  E.g., we already know that D is a directory
-              and that it's almost certainly empty, yet we lstat it.
-              But that's no big deal since we're interactive.  */
            struct stat empty_st;
            Ternary is_empty;
-           enum RM_status s = prompt (fd, ds, empty_dir,
+           int dirent_type = DT_DIR;
+           enum RM_status s = prompt (fd, ds, empty_dir, &dirent_type,
                                       cache_stat_init (&empty_st), x,
                                       PA_REMOVE_DIR, &is_empty);

@@ -1509,7 +1534,8 @@ rm_1 (Dirstack_state *ds, char const *filename,
   AD_push_initial (ds);
   AD_INIT_OTHER_MEMBERS ();

-  enum RM_status status = remove_entry (AT_FDCWD, ds, filename, &st, x, NULL);
+  enum RM_status status = remove_entry (AT_FDCWD, ds, filename,
+                                       DT_UNKNOWN, &st, x);
   if (status == RM_NONEMPTY_DIR)
     {
       /* In the event that remove_dir->remove_cwd_entries detects
M ChangeLog
M bootstrap.conf
M src/copy.c
M src/remove.c
Committed as c77c6e3fab66ac2900dcc70a99754157a9fe297b





reply via email to

[Prev in Thread] Current Thread [Next in Thread]