[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] Optimise away calls to stat if all we need is the inode number (
From: |
James Youngman |
Subject: |
[PATCH] Optimise away calls to stat if all we need is the inode number (bug #24342). |
Date: |
Sat, 7 Mar 2009 21:13:29 +0000 |
* find/util.c (get_info): call get_statinfo if need_inum and we
don't already have the inode number.
(apply_predicate): Call get_info if need_inum.
* find/defs.h (enum EvaluationCost): Add NeedsInodeNumber.
(struct predicate): Add need_inum.
* find/tree.c (get_new_pred): Default need_inum to false.
(get_new_pred_chk_op): Likewise.
* find/ftsfind.c (consider_visiting): Copy the value of st_ino
into statbuf.st_ino.
* find/parser.c (parse_inum): Set need_inum=true.
(parse_samefile): Set need_inum=false, but add a comment
indicating that there is an optimisation opportunity.
(make_segment): For -printf %i, set NeedsInodeNumber rather than
NeedsStat.
* find/pred.c (pred_inum): assert that the inode number is known
(i.e. non-zero).
(print_optlist): Print "[need inum]" when need_inum is true.
* find/tree.c (opt_expr): Also reorder expressions if the right
hand expression is -inum (that is, consider that test to be no
more expensive than -type).
(set_new_parent): Default need_inum to false.
(costlookup): Set pred_inum=NeedsInodeNumber.
(get_pred_cost): need_inum implies a data cost of
NeedsInodeNumber.
(cost_table): Add NeedsInodeNumber.
(print_tree): Handle need_inum.
Signed-off-by: James Youngman <address@hidden>
---
ChangeLog | 31 +++++++++++++++++++++++++++++++
NEWS | 9 +++++++++
find/defs.h | 4 ++++
find/ftsfind.c | 1 +
find/parser.c | 12 +++++++++++-
find/pred.c | 7 +++++--
find/tree.c | 34 ++++++++++++++++++++++++----------
find/util.c | 53 +++++++++++++++++++++++++++++++++++++++++++++--------
8 files changed, 130 insertions(+), 21 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 540b23b..3f21cc8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,34 @@
+2009-03-07 James Youngman <address@hidden>
+
+ Optimise away calls to stat if all we need is the inode number.
+ This fixes Savannah bug #24342.
+ * find/util.c (get_info): call get_statinfo if need_inum and we
+ don't already have the inode number.
+ (apply_predicate): Call get_info if need_inum.
+ * find/defs.h (enum EvaluationCost): Add NeedsInodeNumber.
+ (struct predicate): Add need_inum.
+ * find/tree.c (get_new_pred): Default need_inum to false.
+ (get_new_pred_chk_op): Likewise.
+ * find/ftsfind.c (consider_visiting): Copy the value of st_ino
+ into statbuf.st_ino.
+ * find/parser.c (parse_inum): Set need_inum=true.
+ (parse_samefile): Set need_inum=false, but add a comment
+ indicating that there is an optimisation opportunity.
+ (make_segment): For -printf %i, set NeedsInodeNumber rather than
+ NeedsStat.
+ * find/pred.c (pred_inum): assert that the inode number is known
+ (i.e. non-zero).
+ (print_optlist): Print "[need inum]" when need_inum is true.
+ * find/tree.c (opt_expr): Also reorder expressions if the right
+ hand expression is -inum (that is, consider that test to be no
+ more expensive than -type).
+ (set_new_parent): Default need_inum to false.
+ (costlookup): Set pred_inum=NeedsInodeNumber.
+ (get_pred_cost): need_inum implies a data cost of
+ NeedsInodeNumber.
+ (cost_table): Add NeedsInodeNumber.
+ (print_tree): Handle need_inum.
+
2009-03-06 James Youngman <address@hidden>
Updated translation po files from translationproject.org.
diff --git a/NEWS b/NEWS
index 1fc82ef..749c7e2 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,12 @@ element of struct dirent, for example reiserfs. Anecdotal
evidence
suggests this can speed updatedb up from about 30 minutes to 3-4
minutes.
+The ftsfind executable also now avoids calling stat() functions to
+discover the inode number of a file, if we already read this
+information from the directory. This does provide a speed-up, but
+only for a restricted set of commands such as "find . -inum 4001".
+This fix is listed below as bug #24342.
+
** Bug Fixes
#25764: remove duplicate entry for 'proc' in updatedb's $PRUNEFS.
@@ -26,6 +32,9 @@ unknown user or is missing.
#25154: Allow compilation with C compilers that don't allow
declarations to follow statements.
+#24342: -inum predicate shoud use dirent.d_ino instead of stat.st_ino
+(this is a performance bug).
+
** Translations
Updated translations for Bulgarian, German, Irish, Hungarian,
diff --git a/find/defs.h b/find/defs.h
index 973c4c1..c6fcc0f 100644
--- a/find/defs.h
+++ b/find/defs.h
@@ -242,6 +242,7 @@ struct predicate_performance_info
enum EvaluationCost
{
NeedsNothing,
+ NeedsInodeNumber,
NeedsType,
NeedsStatInfo,
NeedsLinkName,
@@ -285,6 +286,9 @@ struct predicate
/* True if this predicate node requires knowledge of the file type. */
boolean need_type;
+ /* True if this predicate node requires knowledge of the inode number. */
+ boolean need_inum;
+
enum EvaluationCost p_cost;
/* est_success_rate is a number between 0.0 and 1.0 */
diff --git a/find/ftsfind.c b/find/ftsfind.c
index bd09051..74018fa 100644
--- a/find/ftsfind.c
+++ b/find/ftsfind.c
@@ -409,6 +409,7 @@ consider_visiting(FTS *p, FTSENT *ent)
inside_dir(p->fts_cwd_fd);
prev_depth = ent->fts_level;
+ statbuf.st_ino = ent->fts_statp->st_ino;
/* Cope with various error conditions. */
if (ent->fts_info == FTS_ERR
diff --git a/find/parser.c b/find/parser.c
index 06c1686..0f8d77e 100644
--- a/find/parser.c
+++ b/find/parser.c
@@ -1247,6 +1247,9 @@ parse_inum (const struct parser_table* entry, char
**argv, int *arg_ptr)
* files match
*/
p->est_success_rate = 1e-6;
+ p->need_inum = true;
+ p->need_stat = false;
+ p->need_type = false;
return true;
}
else
@@ -2293,6 +2296,8 @@ parse_samefile (const struct parser_table* entry, char
**argv, int *arg_ptr)
our_pred->args.samefileid.dev = st.st_dev;
our_pred->args.samefileid.fd = fd;
our_pred->need_type = false;
+ /* smarter way: compare type and inode number first. */
+ /* TODO: maybe optimise this away by being optimistic */
our_pred->need_stat = true;
our_pred->est_success_rate = 0.01f;
return true;
@@ -2895,6 +2900,12 @@ make_segment (struct segment **segment,
*fmt++ = 's';
break;
+ case 'i': /* inode number */
+ pred->need_inum = true;
+ mycost = NeedsInodeNumber;
+ *fmt++ = 's';
+ break;
+
case 'a': /* atime in `ctime' format */
case 'A': /* atime in user-specified strftime format */
case 'B': /* birth time in user-specified strftime format
*/
@@ -2902,7 +2913,6 @@ make_segment (struct segment **segment,
case 'C': /* ctime in user-specified strftime format */
case 'F': /* file system type */
case 'g': /* group name */
- case 'i': /* inode number */
case 'M': /* mode in `ls -l' format (eg., "drwxr-xr-x") */
case 's': /* size in bytes */
case 't': /* mtime in `ctime' format */
diff --git a/find/pred.c b/find/pred.c
index d7253e4..c51b4e7 100644
--- a/find/pred.c
+++ b/find/pred.c
@@ -1216,6 +1216,8 @@ pred_inum (const char *pathname, struct stat *stat_buf,
struct predicate *pred_p
{
(void) pathname;
+ assert (stat_buf->st_ino != 0);
+
switch (pred_ptr->args.numinfo.kind)
{
case COMP_GT:
@@ -2434,9 +2436,10 @@ print_optlist (FILE *fp, const struct predicate *p)
{
print_parenthesised(fp, p->pred_left);
fprintf (fp,
- "%s%s",
+ "%s%s%s",
p->need_stat ? "[call stat] " : "",
- p->need_type ? "[need type] " : "");
+ p->need_type ? "[need type] " : "",
+ p->need_inum ? "[need inum] " : "");
print_predicate(fp, p);
fprintf(fp, " [%g] ", p->est_success_rate);
if (options.debug_options & DebugSuccessRates)
diff --git a/find/tree.c b/find/tree.c
index cb147af..929c5f6 100644
--- a/find/tree.c
+++ b/find/tree.c
@@ -748,8 +748,9 @@ opt_expr (struct predicate **eval_treep)
}
reorder = ((options.optimisation_level > 1)
- && (NeedsType == curr->pred_right->p_cost)
- && !curr->pred_right->need_stat) ||
+ && (NeedsType == curr->pred_right->p_cost
+ || NeedsInodeNumber == curr->pred_right->p_cost)
+ && !curr->pred_right->need_stat) ||
(options.optimisation_level > 2);
if (reorder)
@@ -831,6 +832,7 @@ set_new_parent (struct predicate *curr, enum
predicate_precedence high_prec, str
new_parent->p_prec = high_prec;
new_parent->need_stat = false;
new_parent->need_type = false;
+ new_parent->need_inum = false;
new_parent->p_cost = NeedsNothing;
switch (high_prec)
@@ -919,7 +921,7 @@ static struct pred_cost_lookup costlookup[] =
{ pred_group , NeedsStatInfo },
{ pred_ilname , NeedsLinkName },
{ pred_iname , NeedsNothing },
- { pred_inum , NeedsStatInfo },
+ { pred_inum , NeedsInodeNumber },
{ pred_ipath , NeedsNothing },
{ pred_links , NeedsStatInfo },
{ pred_lname , NeedsLinkName },
@@ -1004,6 +1006,10 @@ get_pred_cost(const struct predicate *p)
{
data_requirement_cost = NeedsStatInfo;
}
+ else if (p->need_inum)
+ {
+ data_requirement_cost = NeedsInodeNumber;
+ }
else if (p->need_type)
{
data_requirement_cost = NeedsType;
@@ -1433,6 +1439,7 @@ get_new_pred (const struct parser_table *entry)
last_pred->no_default_print = false;
last_pred->need_stat = true;
last_pred->need_type = true;
+ last_pred->need_inum = false;
last_pred->args.str = NULL;
last_pred->pred_next = NULL;
last_pred->pred_left = NULL;
@@ -1478,6 +1485,7 @@ get_new_pred_chk_op (const struct parser_table *entry)
new_pred->p_prec = AND_PREC;
new_pred->need_stat = false;
new_pred->need_type = false;
+ new_pred->need_inum = false;
new_pred->args.str = NULL;
new_pred->side_effects = false;
new_pred->no_default_print = false;
@@ -1500,15 +1508,16 @@ struct cost_assoc
struct cost_assoc cost_table[] =
{
{ NeedsNothing, "Nothing" },
- { NeedsType, "Type" },
- { NeedsStatInfo, "StatInfo" },
- { NeedsLinkName, "LinkName" },
- { NeedsAccessInfo, "AccessInfo" },
- { NeedsSyncDiskHit, "SyncDiskHit" },
+ { NeedsInodeNumber, "InodeNumber" },
+ { NeedsType, "Type" },
+ { NeedsStatInfo, "StatInfo" },
+ { NeedsLinkName, "LinkName" },
+ { NeedsAccessInfo, "AccessInfo" },
+ { NeedsSyncDiskHit, "SyncDiskHit" },
{ NeedsEventualExec, "EventualExec" },
{ NeedsImmediateExec, "ImmediateExec" },
{ NeedsUserInteraction, "UserInteraction" },
- { NeedsUnknown, "Unknown" }
+ { NeedsUnknown, "Unknown" }
};
struct prec_assoc
@@ -1604,7 +1613,7 @@ print_tree (FILE *fp, struct predicate *node, int indent)
node->est_success_rate,
(node->side_effects ? "" : "no "));
- if (node->need_stat || node->need_type)
+ if (node->need_stat || node->need_type || node->need_inum)
{
int comma = 0;
@@ -1614,6 +1623,11 @@ print_tree (FILE *fp, struct predicate *node, int indent)
fprintf (fp, "stat");
comma = 1;
}
+ if (node->need_inum)
+ {
+ fprintf (fp, "%sinode", comma ? "," : "");
+ comma = 1;
+ }
if (node->need_type)
{
fprintf (fp, "%stype", comma ? "," : "");
diff --git a/find/util.c b/find/util.c
index 2570682..bbc8436 100644
--- a/find/util.c
+++ b/find/util.c
@@ -221,8 +221,8 @@ get_statinfo (const char *pathname, const char *name,
struct stat *p)
}
-/* Get the stat/type information for a file, if it is
- * not already known.
+/* Get the stat/type/inode information for a file, if it is not
+ * already known.
*/
int
get_info (const char *pathname,
@@ -235,14 +235,51 @@ get_info (const char *pathname,
* already have it, stat the file now.
*/
if (pred_ptr->need_stat)
- todo = true;
+ {
+ todo = true; /* need full stat info */
+ }
else if ((pred_ptr->need_type && (0 == state.have_type)))
- todo = true;
-
+ {
+ todo = true; /* need to stat to get the type */
+ }
+ if (!todo && pred_ptr->need_inum)
+ {
+ if (state.have_type)
+ {
+ /* For now we decide not to trust struct dirent.d_ino for
+ * directry entries that are subdirectories, in case this
+ * subdirectory is a mount point. We also need to call a
+ * stat function if we don't have st_ino (i.e. it is zero).
+ */
+ if (S_ISDIR(p->st_mode) || (!p->st_ino))
+ todo = true;
+ }
+ else
+ {
+ todo = true;
+ }
+ }
if (todo)
- return get_statinfo(pathname, state.rel_pathname, p);
+ {
+ int result = get_statinfo(pathname, state.rel_pathname, p);
+ if (result != 0)
+ {
+ /* Verify some postconditions. We can't check st_mode for
+ non-zero-ness because of Savannah bug #16378. */
+ if (pred_ptr->need_type)
+ {
+ assert (state.have_type);
+ }
+ if (pred_ptr->need_inum)
+ {
+ assert (p->st_ino);
+ }
+ }
+ }
else
- return 0;
+ {
+ return 0;
+ }
}
/* Determine if we can use O_NOFOLLOW.
@@ -979,7 +1016,7 @@ apply_predicate(const char *pathname, struct stat
*stat_buf, struct predicate *p
{
++p->perf.visits;
- if (p->need_stat || p->need_type)
+ if (p->need_stat || p->need_type || p->need_inum)
{
/* We may need a stat here. */
if (get_info(pathname, stat_buf, p) != 0)
--
1.5.6.5
- [PATCH] Optimise away calls to stat if all we need is the inode number (bug #24342).,
James Youngman <=