findutils-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Findutils-patches] [PATCH] Optimise away calls to stat if all we need i


From: James Youngman
Subject: [Findutils-patches] [PATCH] Optimise away calls to stat if all we need is the inode number (bug #24342).
Date: Sat, 7 Mar 2009 21:13:29 +0000

* find/util.c (get_info): call get_statinfo if need_inum and we
don't already have the inode number.
(apply_predicate): Call get_info if need_inum.
* find/defs.h (enum EvaluationCost): Add NeedsInodeNumber.
(struct predicate): Add need_inum.
* find/tree.c (get_new_pred): Default need_inum to false.
(get_new_pred_chk_op): Likewise.
* find/ftsfind.c (consider_visiting): Copy the value of st_ino
into statbuf.st_ino.
* find/parser.c (parse_inum): Set need_inum=true.
(parse_samefile): Set need_inum=false, but add a comment
indicating that there is an optimisation opportunity.
(make_segment): For -printf %i, set NeedsInodeNumber rather than
NeedsStat.
* find/pred.c (pred_inum): assert that the inode number is known
(i.e. non-zero).
(print_optlist): Print "[need inum]" when need_inum is  true.
* find/tree.c (opt_expr): Also reorder expressions if the right
hand expression is -inum (that is, consider that test to be no
more expensive than -type).
(set_new_parent): Default need_inum to false.
(costlookup): Set pred_inum=NeedsInodeNumber.
(get_pred_cost): need_inum implies a data cost of
NeedsInodeNumber.
(cost_table): Add NeedsInodeNumber.
(print_tree): Handle need_inum.

Signed-off-by: James Youngman <address@hidden>
---
 ChangeLog      |   31 +++++++++++++++++++++++++++++++
 NEWS           |    9 +++++++++
 find/defs.h    |    4 ++++
 find/ftsfind.c |    1 +
 find/parser.c  |   12 +++++++++++-
 find/pred.c    |    7 +++++--
 find/tree.c    |   34 ++++++++++++++++++++++++----------
 find/util.c    |   53 +++++++++++++++++++++++++++++++++++++++++++++--------
 8 files changed, 130 insertions(+), 21 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 540b23b..3f21cc8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,34 @@
+2009-03-07  James Youngman  <address@hidden>
+
+       Optimise away calls to stat if all we need is the inode number.
+       This fixes Savannah bug #24342.
+       * find/util.c (get_info): call get_statinfo if need_inum and we
+       don't already have the inode number.
+       (apply_predicate): Call get_info if need_inum.
+       * find/defs.h (enum EvaluationCost): Add NeedsInodeNumber.
+       (struct predicate): Add need_inum.
+       * find/tree.c (get_new_pred): Default need_inum to false.
+       (get_new_pred_chk_op): Likewise.
+       * find/ftsfind.c (consider_visiting): Copy the value of st_ino
+       into statbuf.st_ino.
+       * find/parser.c (parse_inum): Set need_inum=true.
+       (parse_samefile): Set need_inum=false, but add a comment
+       indicating that there is an optimisation opportunity.
+       (make_segment): For -printf %i, set NeedsInodeNumber rather than
+       NeedsStat.
+       * find/pred.c (pred_inum): assert that the inode number is known
+       (i.e. non-zero).
+       (print_optlist): Print "[need inum]" when need_inum is  true.
+       * find/tree.c (opt_expr): Also reorder expressions if the right
+       hand expression is -inum (that is, consider that test to be no
+       more expensive than -type).
+       (set_new_parent): Default need_inum to false.
+       (costlookup): Set pred_inum=NeedsInodeNumber.
+       (get_pred_cost): need_inum implies a data cost of
+       NeedsInodeNumber.
+       (cost_table): Add NeedsInodeNumber.
+       (print_tree): Handle need_inum.
+
 2009-03-06  James Youngman  <address@hidden>
 
        Updated translation po files from translationproject.org.
diff --git a/NEWS b/NEWS
index 1fc82ef..749c7e2 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,12 @@ element of struct dirent, for example reiserfs.  Anecdotal 
evidence
 suggests this can speed updatedb up from about 30 minutes to 3-4
 minutes.
 
+The ftsfind executable also now avoids calling stat() functions to
+discover the inode number of a file, if we already read this
+information from the directory.  This does provide a speed-up, but
+only for a restricted set of commands such as "find . -inum 4001".
+This fix is listed below as bug #24342.
+
 ** Bug Fixes
 
 #25764: remove duplicate entry for 'proc' in updatedb's $PRUNEFS.
@@ -26,6 +32,9 @@ unknown user or is missing.
 #25154: Allow compilation with C compilers that don't allow
 declarations to follow statements.
 
+#24342: -inum predicate shoud use dirent.d_ino instead of stat.st_ino
+(this is a performance bug).
+
 ** Translations
 
 Updated translations for Bulgarian, German, Irish, Hungarian,
diff --git a/find/defs.h b/find/defs.h
index 973c4c1..c6fcc0f 100644
--- a/find/defs.h
+++ b/find/defs.h
@@ -242,6 +242,7 @@ struct predicate_performance_info
 enum EvaluationCost
 {
   NeedsNothing,
+  NeedsInodeNumber,
   NeedsType,
   NeedsStatInfo,
   NeedsLinkName,
@@ -285,6 +286,9 @@ struct predicate
   /* True if this predicate node requires knowledge of the file type. */
   boolean need_type;
 
+  /* True if this predicate node requires knowledge of the inode number. */
+  boolean need_inum;
+
   enum EvaluationCost p_cost;
 
   /* est_success_rate is a number between 0.0 and 1.0 */
diff --git a/find/ftsfind.c b/find/ftsfind.c
index bd09051..74018fa 100644
--- a/find/ftsfind.c
+++ b/find/ftsfind.c
@@ -409,6 +409,7 @@ consider_visiting(FTS *p, FTSENT *ent)
   inside_dir(p->fts_cwd_fd);
   prev_depth = ent->fts_level;
 
+  statbuf.st_ino = ent->fts_statp->st_ino;
 
   /* Cope with various error conditions. */
   if (ent->fts_info == FTS_ERR
diff --git a/find/parser.c b/find/parser.c
index 06c1686..0f8d77e 100644
--- a/find/parser.c
+++ b/find/parser.c
@@ -1247,6 +1247,9 @@ parse_inum (const struct parser_table* entry, char 
**argv, int *arg_ptr)
        * files match
        */
       p->est_success_rate = 1e-6;
+      p->need_inum = true;
+      p->need_stat = false;
+      p->need_type = false;
       return true;
     }
   else
@@ -2293,6 +2296,8 @@ parse_samefile (const struct parser_table* entry, char 
**argv, int *arg_ptr)
   our_pred->args.samefileid.dev = st.st_dev;
   our_pred->args.samefileid.fd  = fd;
   our_pred->need_type = false;
+  /* smarter way: compare type and inode number first. */
+  /* TODO: maybe optimise this away by being optimistic */
   our_pred->need_stat = true;
   our_pred->est_success_rate = 0.01f;
   return true;
@@ -2895,6 +2900,12 @@ make_segment (struct segment **segment,
       *fmt++ = 's';
       break;
 
+    case 'i':                  /* inode number */
+      pred->need_inum = true;
+      mycost = NeedsInodeNumber;
+      *fmt++ = 's';
+      break;
+
     case 'a':                  /* atime in `ctime' format */
     case 'A':                  /* atime in user-specified strftime format */
     case 'B':                  /* birth time in user-specified strftime format 
*/
@@ -2902,7 +2913,6 @@ make_segment (struct segment **segment,
     case 'C':                  /* ctime in user-specified strftime format */
     case 'F':                  /* file system type */
     case 'g':                  /* group name */
-    case 'i':                  /* inode number */
     case 'M':                  /* mode in `ls -l' format (eg., "drwxr-xr-x") */
     case 's':                  /* size in bytes */
     case 't':                  /* mtime in `ctime' format */
diff --git a/find/pred.c b/find/pred.c
index d7253e4..c51b4e7 100644
--- a/find/pred.c
+++ b/find/pred.c
@@ -1216,6 +1216,8 @@ pred_inum (const char *pathname, struct stat *stat_buf, 
struct predicate *pred_p
 {
   (void) pathname;
 
+  assert (stat_buf->st_ino != 0);
+
   switch (pred_ptr->args.numinfo.kind)
     {
     case COMP_GT:
@@ -2434,9 +2436,10 @@ print_optlist (FILE *fp, const struct predicate *p)
     {
       print_parenthesised(fp, p->pred_left);
       fprintf (fp,
-              "%s%s",
+              "%s%s%s",
               p->need_stat ? "[call stat] " : "",
-              p->need_type ? "[need type] " : "");
+              p->need_type ? "[need type] " : "",
+              p->need_inum ? "[need inum] " : "");
       print_predicate(fp, p);
       fprintf(fp, " [%g] ", p->est_success_rate);
       if (options.debug_options & DebugSuccessRates)
diff --git a/find/tree.c b/find/tree.c
index cb147af..929c5f6 100644
--- a/find/tree.c
+++ b/find/tree.c
@@ -748,8 +748,9 @@ opt_expr (struct predicate **eval_treep)
                }
 
              reorder = ((options.optimisation_level > 1)
-                         && (NeedsType == curr->pred_right->p_cost)
-                         && !curr->pred_right->need_stat) ||
+                        && (NeedsType == curr->pred_right->p_cost
+                            || NeedsInodeNumber == curr->pred_right->p_cost)
+                        && !curr->pred_right->need_stat) ||
                (options.optimisation_level > 2);
 
              if (reorder)
@@ -831,6 +832,7 @@ set_new_parent (struct predicate *curr, enum 
predicate_precedence high_prec, str
   new_parent->p_prec = high_prec;
   new_parent->need_stat = false;
   new_parent->need_type = false;
+  new_parent->need_inum = false;
   new_parent->p_cost = NeedsNothing;
 
   switch (high_prec)
@@ -919,7 +921,7 @@ static struct pred_cost_lookup costlookup[] =
     { pred_group     ,  NeedsStatInfo        },
     { pred_ilname    ,  NeedsLinkName        },
     { pred_iname     ,  NeedsNothing         },
-    { pred_inum      ,  NeedsStatInfo        },
+    { pred_inum      ,  NeedsInodeNumber     },
     { pred_ipath     ,  NeedsNothing         },
     { pred_links     ,  NeedsStatInfo        },
     { pred_lname     ,  NeedsLinkName        },
@@ -1004,6 +1006,10 @@ get_pred_cost(const struct predicate *p)
     {
       data_requirement_cost = NeedsStatInfo;
     }
+  else if (p->need_inum)
+    {
+      data_requirement_cost = NeedsInodeNumber;
+    }
   else if (p->need_type)
     {
       data_requirement_cost = NeedsType;
@@ -1433,6 +1439,7 @@ get_new_pred (const struct parser_table *entry)
   last_pred->no_default_print = false;
   last_pred->need_stat = true;
   last_pred->need_type = true;
+  last_pred->need_inum = false;
   last_pred->args.str = NULL;
   last_pred->pred_next = NULL;
   last_pred->pred_left = NULL;
@@ -1478,6 +1485,7 @@ get_new_pred_chk_op (const struct parser_table *entry)
        new_pred->p_prec = AND_PREC;
        new_pred->need_stat = false;
        new_pred->need_type = false;
+       new_pred->need_inum = false;
        new_pred->args.str = NULL;
        new_pred->side_effects = false;
        new_pred->no_default_print = false;
@@ -1500,15 +1508,16 @@ struct cost_assoc
 struct cost_assoc cost_table[] =
   {
     { NeedsNothing,         "Nothing" },
-    { NeedsType,           "Type" },
-    { NeedsStatInfo,       "StatInfo" },
-    { NeedsLinkName,       "LinkName" },
-    { NeedsAccessInfo,     "AccessInfo" },
-    { NeedsSyncDiskHit,            "SyncDiskHit" },
+    { NeedsInodeNumber,     "InodeNumber" },
+    { NeedsType,            "Type" },
+    { NeedsStatInfo,        "StatInfo" },
+    { NeedsLinkName,        "LinkName" },
+    { NeedsAccessInfo,      "AccessInfo" },
+    { NeedsSyncDiskHit,     "SyncDiskHit" },
     { NeedsEventualExec,    "EventualExec" },
     { NeedsImmediateExec,   "ImmediateExec" },
     { NeedsUserInteraction, "UserInteraction" },
-    { NeedsUnknown,        "Unknown" }
+    { NeedsUnknown,         "Unknown" }
   };
 
 struct prec_assoc
@@ -1604,7 +1613,7 @@ print_tree (FILE *fp, struct predicate *node, int indent)
           node->est_success_rate,
           (node->side_effects ? "" : "no "));
 
-  if (node->need_stat || node->need_type)
+  if (node->need_stat || node->need_type || node->need_inum)
     {
       int comma = 0;
 
@@ -1614,6 +1623,11 @@ print_tree (FILE *fp, struct predicate *node, int indent)
          fprintf (fp, "stat");
          comma = 1;
        }
+      if (node->need_inum)
+       {
+         fprintf (fp, "%sinode", comma ? "," : "");
+         comma = 1;
+       }
       if (node->need_type)
        {
          fprintf (fp, "%stype", comma ? "," : "");
diff --git a/find/util.c b/find/util.c
index 2570682..bbc8436 100644
--- a/find/util.c
+++ b/find/util.c
@@ -221,8 +221,8 @@ get_statinfo (const char *pathname, const char *name, 
struct stat *p)
 }
 
 
-/* Get the stat/type information for a file, if it is
- * not already known.
+/* Get the stat/type/inode information for a file, if it is not
+ * already known.
  */
 int
 get_info (const char *pathname,
@@ -235,14 +235,51 @@ get_info (const char *pathname,
    * already have it, stat the file now.
    */
   if (pred_ptr->need_stat)
-    todo = true;
+    {
+      todo = true;             /* need full stat info */
+    }
   else if ((pred_ptr->need_type && (0 == state.have_type)))
-    todo = true;
-
+    {
+      todo = true;             /* need to stat to get the type */
+    }
+  if (!todo && pred_ptr->need_inum)
+    {
+      if (state.have_type)
+       {
+         /* For now we decide not to trust struct dirent.d_ino for
+          * directry entries that are subdirectories, in case this
+          * subdirectory is a mount point.  We also need to call a
+          * stat function if we don't have st_ino (i.e. it is zero).
+          */
+         if (S_ISDIR(p->st_mode) || (!p->st_ino))
+           todo = true;
+       }
+      else
+       {
+         todo = true;
+       }
+    }
   if (todo)
-    return get_statinfo(pathname, state.rel_pathname, p);
+    {
+      int result = get_statinfo(pathname, state.rel_pathname, p);
+      if (result != 0)
+       {
+         /* Verify some postconditions.  We can't check st_mode for
+            non-zero-ness because of Savannah bug #16378. */
+         if (pred_ptr->need_type)
+           {
+             assert (state.have_type);
+           }
+         if (pred_ptr->need_inum)
+           {
+             assert (p->st_ino);
+           }
+       }
+    }
   else
-    return 0;
+    {
+      return 0;
+    }
 }
 
 /* Determine if we can use O_NOFOLLOW.
@@ -979,7 +1016,7 @@ apply_predicate(const char *pathname, struct stat 
*stat_buf, struct predicate *p
 {
   ++p->perf.visits;
 
-  if (p->need_stat || p->need_type)
+  if (p->need_stat || p->need_type || p->need_inum)
     {
       /* We may need a stat here. */
       if (get_info(pathname, stat_buf, p) != 0)
-- 
1.5.6.5





reply via email to

[Prev in Thread] Current Thread [Next in Thread]