From 224328e4bda44aa25cd5c98b1c13751ecea865c7 Mon Sep 17 00:00:00 2001 From: Bo Borgerson Date: Wed, 16 Apr 2008 13:44:36 -0400 Subject: [PATCH] Use a hash rather than a linked-list for cycle check in cp * NEWS: List the change of cycle check behavior. * src/copy.c (struct dir_info): These go in the hash. (static Hash_table *ancestry_hash): This is the hash. (static size_t dir_info_hasher): Simple hasher based on INO. (static bool dir_info_comparator): Checks equivalence of INO and DEV. (static bool ancestry_insert): Insert a dir_info entry for the current dir. (static bool ancestry_delete): Delete the hash entry for the current dir. (copy_internal): Now calls ancestry_insert and ancestry_delete instead of managing the linked-list inline and calling is_ancestor for the cycle check. Signed-off-by: Bo Borgerson --- NEWS | 2 + src/copy.c | 106 +++++++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 72 insertions(+), 36 deletions(-) diff --git a/NEWS b/NEWS index 3a584e9..111e0c1 100644 --- a/NEWS +++ b/NEWS @@ -69,6 +69,8 @@ GNU coreutils NEWS -*- outline -*- seq gives better diagnostics for invalid formats. + cp now uses a hash for cycle checks rather than a linked-list of ancestry. + ** Portability rm now works properly even on systems like BeOS and Haiku, diff --git a/src/copy.c b/src/copy.c index c2f21a3..8e673f0 100644 --- a/src/copy.c +++ b/src/copy.c @@ -83,19 +83,74 @@ rpl_mkfifo (char const *file, mode_t mode) #define SAME_GROUP(A, B) ((A).st_gid == (B).st_gid) #define SAME_OWNER_AND_GROUP(A, B) (SAME_OWNER (A, B) && SAME_GROUP (A, B)) -struct dir_list + +/* This is for looking up directories by inode/device to ensure we don't + have any cycles. */ +static Hash_table *ancestors_hash; + +enum { INIT_ANCESTRY_SIZE = 47 }; + +struct dir_info { - struct dir_list *parent; ino_t ino; dev_t dev; }; +static size_t +dir_info_hasher (const void *entry, size_t tabsize) +{ + const struct dir_info *node = entry; + return node->ino % tabsize; +} + +static bool +dir_info_comparator (const void *e1, const void *e2) +{ + const struct dir_info *n1 = e1, *n2 = e2; + return n1->ino == n2->ino && n1->dev == n2->dev; +} + +/* First check whether this directory has been seen. If it has then + return false because we've encountered a cycle in the graph. If it + hasn't, then insert it into the ancestry hash and return true. */ + +static bool +ancestry_insert (struct dir_info *dir, struct stat *src_sb) +{ + dir->ino = src_sb->st_ino; + dir->dev = src_sb->st_dev; + + if (! ancestors_hash) + { + ancestors_hash = hash_initialize (INIT_ANCESTRY_SIZE, NULL, + dir_info_hasher, + dir_info_comparator, NULL); + if (! ancestors_hash) + xalloc_die (); + } + + if (hash_lookup (ancestors_hash, dir)) + return false; + + hash_insert (ancestors_hash, dir); + + return true; +} + +/* This is called when recursion through DIR is complete. Note that + `*dir' is actually a pointer into the stack of the caller, so there's + no free here. */ +static inline void +ancestry_delete (const struct dir_info *dir) +{ + hash_delete (ancestors_hash, dir); +} + /* Initial size of the cp.dest_info hash table. */ #define DEST_INFO_INITIAL_CAPACITY 61 static bool copy_internal (char const *src_name, char const *dst_name, bool new_dst, dev_t device, - struct dir_list *ancestors, const struct cp_options *x, bool command_line_arg, bool *copy_into_self, @@ -110,23 +165,6 @@ static char const *top_level_dst_name; /* The invocation name of this program. */ extern char *program_name; -/* FIXME: describe */ -/* FIXME: rewrite this to use a hash table so we avoid the quadratic - performance hit that's probably noticeable only on trees deeper - than a few hundred levels. See use of active_dir_map in remove.c */ - -static bool -is_ancestor (const struct stat *sb, const struct dir_list *ancestors) -{ - while (ancestors != 0) - { - if (ancestors->ino == sb->st_ino && ancestors->dev == sb->st_dev) - return true; - ancestors = ancestors->parent; - } - return false; -} - /* Read the contents of the directory SRC_NAME_IN, and recursively copy the contents to DST_NAME_IN. NEW_DST is true if DST_NAME_IN is a directory that was created previously in the @@ -137,8 +175,8 @@ is_ancestor (const struct stat *sb, const struct dir_list *ancestors) static bool copy_dir (char const *src_name_in, char const *dst_name_in, bool new_dst, - const struct stat *src_sb, struct dir_list *ancestors, - const struct cp_options *x, bool *copy_into_self) + const struct stat *src_sb, const struct cp_options *x, + bool *copy_into_self) { char *name_space; char *namep; @@ -167,7 +205,7 @@ copy_dir (char const *src_name_in, char const *dst_name_in, bool new_dst, char *dst_name = file_name_concat (dst_name_in, namep, NULL); ok &= copy_internal (src_name, dst_name, new_dst, src_sb->st_dev, - ancestors, &non_command_line_options, false, + &non_command_line_options, false, &local_copy_into_self, NULL); *copy_into_self |= local_copy_into_self; @@ -1056,7 +1094,6 @@ static bool copy_internal (char const *src_name, char const *dst_name, bool new_dst, dev_t device, - struct dir_list *ancestors, const struct cp_options *x, bool command_line_arg, bool *copy_into_self, @@ -1673,27 +1710,21 @@ copy_internal (char const *src_name, char const *dst_name, if (S_ISDIR (src_mode)) { - struct dir_list *dir; + struct dir_info dir; - /* If this directory has been copied before during the + /* Insert the current directory in the list of parents. + If this directory has been copied before during the recursion, there is a symbolic link to an ancestor directory of the symbolic link. It is impossible to continue to copy this, unless we've got an infinite disk. */ - if (is_ancestor (&src_sb, ancestors)) + if (! ancestry_insert (&dir, &src_sb)) { error (0, 0, _("cannot copy cyclic symbolic link %s"), quote (src_name)); goto un_backup; } - /* Insert the current directory in the list of parents. */ - - dir = alloca (sizeof *dir); - dir->parent = ancestors; - dir->ino = src_sb.st_ino; - dir->dev = src_sb.st_dev; - if (new_dst || !S_ISDIR (dst_sb.st_mode)) { /* POSIX says mkdir's behavior is implementation-defined when @@ -1753,9 +1784,12 @@ copy_internal (char const *src_name, char const *dst_name, this fails -- otherwise, the failure to read a single file in a source directory would cause the containing destination directory not to have owner/perms set properly. */ - delayed_ok = copy_dir (src_name, dst_name, new_dst, &src_sb, dir, x, + delayed_ok = copy_dir (src_name, dst_name, new_dst, &src_sb, x, copy_into_self); } + + /* This whole branch has been copied now. */ + ancestry_delete (&dir); } else if (x->symbolic_link) { @@ -2097,7 +2131,7 @@ copy (char const *src_name, char const *dst_name, top_level_src_name = src_name; top_level_dst_name = dst_name; - return copy_internal (src_name, dst_name, nonexistent_dst, 0, NULL, + return copy_internal (src_name, dst_name, nonexistent_dst, 0, options, true, copy_into_self, rename_succeeded); } -- 1.5.2.5