bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [patch #3596] Sort directories before files in "ls"


From: Francesco Montorsi
Subject: Re: [patch #3596] Sort directories before files in "ls"
Date: Tue, 27 Dec 2005 19:42:34 +0100
User-agent: Mozilla Thunderbird 1.0.6 (X11/20050715)

Hi,

Eric Blake wrote:
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

According to Francesco Montorsi on 12/22/2005 1:41 AM:
EVIL.  You are calling qsort twice for every element (once to put
directories first, then twice again on the subsets).  This is twice as
slow as just writing a proper sort function that does everything all in
one comparison, so that you only call qsort once.
however if we want to make a single qsort call, then we'll need to
declare a lot of new functions which merge previous tests with the
groupdir test:

static int groupdir_compare_name (V a, V b)
static int groupdir_compstr_name (V a, V b)
static int groupdir_rev_cmp_name (V a, V b)
static int groupdir_rev_str_name (V a, V b)

static int groupdir_compare_extension (V a, V b)
static int groupdir_compstr_extension (V a, V b)
static int groupdir_rev_cmp_extension (V a, V b)
static int groupdir_rev_str_extension (V a, V b)

...


is it right ?

Yes, that was the approach I was thinking of.  Notice that some #define
magic may make the job easier, but I still think that reducing the number
of calls to qsort is more efficient than calling it multiple times.
ok, I've taken the 'magic' defines approach ;)


Something else to consider.  Maybe it is worth refactoring sort_files() a
bit.  Currently, it consists of nested switch statements that break into
lots of ?: conditionals.  Perhaps you can find a slick way to make an
array of function pointers, and then just index into the array based on
strcoll/strcmp, normal/reverse, and mixed/dirs-first to get the right
function pointer rather than going through lots of ?: conditionals.  I
don't know how that would look or if it would be any more or less
efficient, but I thought I would throw the idea out.
the array of function pointers is a good idea !
I've implemented it and it 'looks' (I mean looking at the more elegant code it 
derives
from this approach ;)) faster even if I haven't done any benchmarks.


I attach the updated patch to this mail.
It should address all previous points you told me...
I've also created a separed changelog patch attached at:

http://savannah.gnu.org/patch/?func=detailitem&item_id=3596


The FSF staff have contacted me saying that they will soon email me all the paperwork required for copyright assignment.


Probably there are some GNU coding conventions I haven't respected and maybe other small touches to do but I think that now the patch really looks good ;)


Francesco Montorsi



PS: I've recently installed SuSE and I've found that the configure script of 
coreutils
doesn't check for 'bison' presence: on that distro, it was missing and it gave 
me problems
later with "make". I think adding a check for 'bison' would be nice...

Index: NEWS
===================================================================
RCS file: /sources/coreutils/coreutils/NEWS,v
retrieving revision 1.354
diff -b -u -2 -r1.354 NEWS
--- NEWS    15 Dec 2005 12:24:54 -0000  1.354
+++ NEWS    27 Dec 2005 18:38:25 -0000
@@ -24,4 +24,7 @@
   attempts to have the default be the best of both worlds.

+  ls now supports the '--directories-first' option to group directories
+  before files.
+
   sort now reports incompatible options (e.g., -i and -n) rather than
   silently ignoring one of them.
Index: THANKS
===================================================================
RCS file: /sources/coreutils/coreutils/THANKS,v
retrieving revision 1.413
diff -b -u -2 -r1.413 THANKS
--- THANKS  9 Dec 2005 16:27:26 -0000   1.413
+++ THANKS  27 Dec 2005 18:38:26 -0000
@@ -152,4 +152,5 @@
 Fletcher Mattox                     address@hidden
 Florin Iucha                        address@hidden
+Francesco Montorsi                  address@hidden
 François Pinard                     address@hidden
 Frank Adler                         address@hidden
Index: src/ls.c
===================================================================
RCS file: /sources/coreutils/coreutils/src/ls.c,v
retrieving revision 1.404
diff -b -u -2 -r1.404 ls.c
--- src/ls.c    17 Dec 2005 10:33:08 -0000  1.404
+++ src/ls.c    27 Dec 2005 18:38:32 -0000
@@ -399,9 +399,11 @@
 ARGMATCH_VERIFY (time_style_args, time_style_types);

-/* Type of time to print or sort by.  Controlled by -c and -u.  */
+/* Type of time to print or sort by.  Controlled by -c and -u.
+   NB: the values of each item of this enum are important since they are
+       used as indexes in the sort functions array (see sort_files()).  */

 enum time_type
   {
-    time_mtime,            /* default */
+    time_mtime = 0,        /* default */
     time_ctime,            /* -c */
     time_atime         /* -u */
@@ -410,14 +412,16 @@
 static enum time_type time_type;

-/* The file characteristic to sort by.  Controlled by -t, -S, -U, -X, -v.  */
+/* The file characteristic to sort by.  Controlled by -t, -S, -U, -X, -v.
+   NB: the values of each item of this enum are important since they are
+       used as indexes in the sort functions array (see sort_files()).  */

 enum sort_type
   {
-    sort_none,         /* -U */
+    sort_none = -1,    /* -U */
     sort_name,         /* default */
     sort_extension,        /* -X */
-    sort_time,         /* -t */
     sort_size,         /* -S */
-    sort_version       /* -v */
+    sort_version,      /* -v */
+    sort_time,         /* -t */
   };

@@ -592,4 +596,9 @@
 static bool immediate_dirs;

+/* True means that directories are grouped before files.
+   --directories-first  */
+
+static bool directories_first;
+
 /* Which files to ignore.  */

@@ -741,4 +750,5 @@
   SI_OPTION,
   SORT_OPTION,
+  DIRECTORIES_FIRST_OPTION,
   TIME_OPTION,
   TIME_STYLE_OPTION
@@ -750,4 +760,5 @@
   {"escape", no_argument, NULL, 'b'},
   {"directory", no_argument, NULL, 'd'},
+  {"directories-first", no_argument, NULL, DIRECTORIES_FIRST_OPTION},
   {"dired", no_argument, NULL, 'D'},
   {"full-time", no_argument, NULL, FULL_TIME_OPTION},
@@ -1224,5 +1235,6 @@
     || format == long_format
     || dereference == DEREF_ALWAYS
-    || print_block_size || print_inode;
+    || print_block_size || print_inode
+    || directories_first;
   format_needs_type = (!format_needs_stat
               && (recursive || print_with_color
@@ -1713,4 +1725,8 @@
      break;

+    case DIRECTORIES_FIRST_OPTION:
+      directories_first = true;
+      break;
+
    case TIME_OPTION:
      time_type = XARGMATCH ("--time", optarg, time_args, time_types);
@@ -2728,4 +2744,12 @@
 }

+
+/* Returns true if the given fileinfo refers to a directory */
+static bool is_directory (const struct fileinfo *f)
+{
+  return f->filetype == directory || f->filetype == arg_directory;
+}
+
+
 #ifdef S_ISLNK

@@ -2810,6 +2834,5 @@
      order.  */
   for (i = files_index; i-- != 0; )
-    if ((files[i].filetype == directory || files[i].filetype == arg_directory)
-   && (!ignore_dot_and_dot_dot
+    if (is_directory(&files[i]) && (!ignore_dot_and_dot_dot
        || !basename_is_dot_or_dotdot (files[i].name)))
       {
@@ -2868,4 +2891,47 @@

 typedef void const *V;
+typedef int (*qsortFunc)(V a, V b);
+
+#define FILEINFO_CAST(x)                ((struct fileinfo const *)x)
+
+/* Used below in DEFINE_SORT_FUNCTIONS for _df_ sort function variants */
+#define DIRFIRST_CHECK(a,b)                                         \
+    bool a_is_dir = is_directory(FILEINFO_CAST(a)),                 \
+        b_is_dir = is_directory(FILEINFO_CAST(b));                  \
+    if (a_is_dir && !b_is_dir)                                      \
+        return -1;         /* a goes before b */                    \
+    if (!a_is_dir && b_is_dir)                                      \
+        return 1;          /* b goes before a */
+
+/*
+   Defines the 8 different sort function variants required for each sortkey.
+   The 'basefunc' should be an inline function so that compiler will be able
+   to optimize all these functions. The 'basename' argument will be prefixed
+   with the '[rev_][x]str{cmp|coll}[_df]_' string to create the function name.
+*/
+#define DEFINE_SORT_FUNCTIONS(basename, basefunc)                   \
+    /* direct, non-dirfirst versions */                             \
+    static int xstrcoll_##basename (V a, V b)                       \
+    { return basefunc (a, b, xstrcoll); }                           \
+    static int strcmp_##basename (V a, V b)                         \
+    { return basefunc (a, b, strcmp); }                             \
+                                                                    \
+    /* reverse, non-dirfirst versions */                            \
+    static int rev_xstrcoll_##basename (V a, V b)                   \
+    { return basefunc (b, a, xstrcoll); }                           \
+    static int rev_strcmp_##basename (V a, V b)                     \
+    { return basefunc (b, a, strcmp); }                             \
+                                                                    \
+    /* direct, dirfirst versions */                                 \
+    static int xstrcoll_df_##basename (V a, V b)                    \
+    { DIRFIRST_CHECK(a,b); return basefunc (a, b, xstrcoll); }      \
+    static int strcmp_df_##basename (V a, V b)                      \
+    { DIRFIRST_CHECK(a,b); return basefunc (a, b, strcmp); }        \
+                                                                    \
+    /* reverse, dirfirst versions */                                \
+    static int rev_xstrcoll_df_##basename (V a, V b)                \
+    { DIRFIRST_CHECK(a,b); return basefunc (b, a, xstrcoll); }      \
+    static int rev_strcmp_df_##basename (V a, V b)                  \
+    { DIRFIRST_CHECK(a,b); return basefunc (b, a, strcmp); }

 static inline int
@@ -2877,8 +2943,4 @@
   return diff ? diff : cmp (a->name, b->name);
 }
-static int compare_ctime (V a, V b) { return cmp_ctime (a, b, xstrcoll); }
-static int compstr_ctime (V a, V b) { return cmp_ctime (a, b, strcmp); }
-static int rev_cmp_ctime (V a, V b) { return compare_ctime (b, a); }
-static int rev_str_ctime (V a, V b) { return compstr_ctime (b, a); }

 static inline int
@@ -2890,8 +2952,4 @@
   return diff ? diff : cmp (a->name, b->name);
 }
-static int compare_mtime (V a, V b) { return cmp_mtime (a, b, xstrcoll); }
-static int compstr_mtime (V a, V b) { return cmp_mtime (a, b, strcmp); }
-static int rev_cmp_mtime (V a, V b) { return compare_mtime (b, a); }
-static int rev_str_mtime (V a, V b) { return compstr_mtime (b, a); }

 static inline int
@@ -2903,8 +2961,4 @@
   return diff ? diff : cmp (a->name, b->name);
 }
-static int compare_atime (V a, V b) { return cmp_atime (a, b, xstrcoll); }
-static int compstr_atime (V a, V b) { return cmp_atime (a, b, strcmp); }
-static int rev_cmp_atime (V a, V b) { return compare_atime (b, a); }
-static int rev_str_atime (V a, V b) { return compstr_atime (b, a); }

 static inline int
@@ -2915,16 +2969,4 @@
   return diff ? diff : cmp (a->name, b->name);
 }
-static int compare_size (V a, V b) { return cmp_size (a, b, xstrcoll); }
-static int compstr_size (V a, V b) { return cmp_size (a, b, strcmp); }
-static int rev_cmp_size (V a, V b) { return compare_size (b, a); }
-static int rev_str_size (V a, V b) { return compstr_size (b, a); }
-
-static inline int
-cmp_version (struct fileinfo const *a, struct fileinfo const *b)
-{
-  return strverscmp (a->name, b->name);
-}
-static int compare_version (V a, V b) { return cmp_version (a, b); }
-static int rev_cmp_version (V a, V b) { return compare_version (b, a); }

 static inline int
@@ -2934,8 +2976,4 @@
   return cmp (a->name, b->name);
 }
-static int compare_name (V a, V b) { return cmp_name (a, b, xstrcoll); }
-static int compstr_name (V a, V b) { return cmp_name (a, b, strcmp); }
-static int rev_cmp_name (V a, V b) { return compare_name (b, a); }
-static int rev_str_name (V a, V b) { return compstr_name (b, a); }

 /* Compare file extensions.  Files with no extension are `smallest'.
@@ -2951,8 +2989,87 @@
   return diff ? diff : cmp (a->name, b->name);
 }
-static int compare_extension (V a, V b) { return cmp_extension (a, b, 
xstrcoll); }
-static int compstr_extension (V a, V b) { return cmp_extension (a, b, strcmp); 
}
-static int rev_cmp_extension (V a, V b) { return compare_extension (b, a); }
-static int rev_str_extension (V a, V b) { return compstr_extension (b, a); }
+
+
+DEFINE_SORT_FUNCTIONS(ctime, cmp_ctime)
+DEFINE_SORT_FUNCTIONS(mtime, cmp_mtime)
+DEFINE_SORT_FUNCTIONS(atime, cmp_atime)
+DEFINE_SORT_FUNCTIONS(size, cmp_size)
+DEFINE_SORT_FUNCTIONS(name, cmp_name)
+DEFINE_SORT_FUNCTIONS(extension, cmp_extension)
+
+
+
+/* Compares file versions. Unlike all other compare functions above,
+   this function does not have any 'cmp' argument since it does rely
+   directly on the strverscmp() GNU extension and thus it is available
+   only when xstrcoll is available. */
+static inline int
+cmp_version (struct fileinfo const *a, struct fileinfo const *b)
+{
+  return strverscmp (a->name, b->name);
+}
+
+static int xstrcoll_version (V a, V b)
+{ return cmp_version (a, b); }
+static int rev_xstrcoll_version (V a, V b)
+{ return cmp_version (b, a); }
+static int xstrcoll_df_version (V a, V b)
+{ DIRFIRST_CHECK(a,b); return cmp_version (a, b); }
+static int rev_xstrcoll_df_version (V a, V b)
+{ DIRFIRST_CHECK(a,b); return cmp_version (b, a); }
+
+
+
+
+/*
+   We have 8 different variants for each sortkey function, and we have 7
+   different sortkeys.
+   The function pointers stored in this array are organized as:
+
+    | idx | with...  | sort order | directories first ? |
+    +-----+----------+------------+---------------------+
+    |  0  | xstrcoll | direct     |          no         |
+    |  1  | strcmp   | direct     |          no         |
+    |  2  | xstrcoll | reverse    |          no         |
+    |  3  | strcmp   | reverse    |          no         |
+    |  4  | xstrcoll | direct     |          yes        |
+    |  5  | strcmp   | direct     |          yes        |
+    |  6  | xstrcoll | reverse    |          yes        |
+    |  7  | strcmp   | reverse    |          yes        |
+    +-----+----------+------------+---------------------+
+
+   for each sortkey.
+
+   NB: the order defined in the table above is arbitrary but
+       the order in which sortkeys are listed in the function pointer
+       array is defined by the time_type and sort_type enums !
+*/
+
+
+#define LIST_SORTFUNCTION_VARIANTS(basename)                \
+    xstrcoll_##basename, strcmp_##basename,                 \
+    rev_xstrcoll_##basename, rev_strcmp_##basename,         \
+    xstrcoll_df_##basename, strcmp_df_##basename,           \
+    rev_xstrcoll_df_##basename, rev_strcmp_df_##basename    \
+
+qsortFunc sort_functions[8*7] = {
+    LIST_SORTFUNCTION_VARIANTS(name),
+    LIST_SORTFUNCTION_VARIANTS(extension),
+    LIST_SORTFUNCTION_VARIANTS(size),
+
+    /* for version comparisons, we don't have
+      xstrcoll/strcmp differences */
+    xstrcoll_version, xstrcoll_version,
+    rev_xstrcoll_version, rev_xstrcoll_version,
+    xstrcoll_df_version, xstrcoll_df_version,
+    rev_xstrcoll_df_version, rev_xstrcoll_df_version,
+
+    /* last are time sort functions */
+    LIST_SORTFUNCTION_VARIANTS(mtime),
+    LIST_SORTFUNCTION_VARIANTS(ctime),
+    LIST_SORTFUNCTION_VARIANTS(atime)
+  };
+
+

 /* Sort the files now in the table.  */
@@ -2961,5 +3078,8 @@
 sort_files (void)
 {
-  int (*func) (V, V);
+  bool use_strcmp;
+
+  if (sort_type == sort_none)
+      return;

   /* Try strcoll.  If it fails, fall back on strcmp.  We can't safely
@@ -2969,76 +3089,24 @@

   if (! setjmp (failed_strcoll))
-    {
-      switch (sort_type)
-   {
-   case sort_none:
-     return;
-   case sort_time:
-     switch (time_type)
-       {
-       case time_ctime:
-         func = sort_reverse ? rev_cmp_ctime : compare_ctime;
-         break;
-       case time_mtime:
-         func = sort_reverse ? rev_cmp_mtime : compare_mtime;
-         break;
-       case time_atime:
-         func = sort_reverse ? rev_cmp_atime : compare_atime;
-         break;
-       default:
-         abort ();
-       }
-     break;
-   case sort_name:
-     func = sort_reverse ? rev_cmp_name : compare_name;
-     break;
-   case sort_extension:
-     func = sort_reverse ? rev_cmp_extension : compare_extension;
-     break;
-   case sort_size:
-     func = sort_reverse ? rev_cmp_size : compare_size;
-     break;
-   case sort_version:
-     func = sort_reverse ? rev_cmp_version : compare_version;
-     break;
-   default:
-     abort ();
-   }
-    }
+        use_strcmp = false;      /* [x]strcoll() available ! */
   else
     {
-      switch (sort_type)
-   {
-   case sort_time:
-     switch (time_type)
-       {
-       case time_ctime:
-         func = sort_reverse ? rev_str_ctime : compstr_ctime;
-         break;
-       case time_mtime:
-         func = sort_reverse ? rev_str_mtime : compstr_mtime;
-         break;
-       case time_atime:
-         func = sort_reverse ? rev_str_atime : compstr_atime;
-         break;
-       default:
-         abort ();
-       }
-     break;
-   case sort_name:
-     func = sort_reverse ? rev_str_name : compstr_name;
-     break;
-   case sort_extension:
-     func = sort_reverse ? rev_str_extension : compstr_extension;
-     break;
-   case sort_size:
-     func = sort_reverse ? rev_str_size : compstr_size;
-     break;
-   default:
-     abort ();
-   }
+        use_strcmp = true;
+        if (sort_type == sort_version)
+            abort(); /* cannot do version comparisons without xstrcoll() */
     }

-  qsort (files, files_index, sizeof *files, func);
+  /* when sort_type == sort_time we need to use time_type as subindex. */
+  int timeoffset = 0;
+  if (sort_type == sort_time)
+     timeoffset = time_type;
+
+  /* compute the index in the sort function pointers array */
+  int idx = (sort_type+timeoffset)*8 +     /* select sortkey block */
+            ((int)directories_first)*4 +   /* select (non-)dirfirst mode */
+            ((int)sort_reverse)*2 +        /* select sort order */
+            (int)use_strcmp;               /* select xstrcoll/strcmp version */
+
+  qsort (files, files_index, sizeof *files, sort_functions[ idx ]);
 }

@@ -4137,4 +4205,7 @@
   -D, --dired                generate output designed for Emacs' dired mode\n\
 "), stdout);
+      fputs(_("\
+      --directories-first    group directories before files\n\
+"), stdout);
       fputs (_("\
   -f                         do not sort, enable -aU, disable -lst\n\

reply via email to

[Prev in Thread] Current Thread [Next in Thread]