bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

coreutils int cleanup for "unexpand"


From: Paul Eggert
Subject: coreutils int cleanup for "unexpand"
Date: Tue, 03 Aug 2004 16:29:37 -0700
User-agent: Gnus/5.1006 (Gnus v5.10.6) Emacs/21.3 (gnu/linux)

This fixes several minor integer-overflow problems with "unexpand"
which could cause undefined behavior, e.g. with input lines longer
than INT_MAX bytes.  There are some issues remaining (note the FIXME)
but I thought it best to check in what I had.

2004-08-03  Paul Eggert  <address@hidden>

        * src/unexpand.c: Int cleanup and minor reorganization to be more
        like src/expand.c.
        Include quote.h, xstrndup.h.
        (TAB_STOP_SENTINEL): Increase from INT_MAX to INTMAX_MAX.
        (convert_entire_line, have_read_stdin, parse_tabstops, next_file,
        unexpand, main):
        Use bool for booleans.
        (tab_size, tab_list, add_tabstop, validate_tabstops, unexpand):
        Use uintmax_t for column counts.
        (first_free_tab, validate_tabstops, unexpand): Use size_t for sizes.
        (add_tabstop, parse_tabstops, main): Don't reserve UINTMAX_MAX
        as a tab stop.
        (parse_tabstops): Don't use ISBLANK on possibly-signed char.
        Detect overflow in tab stop string.
        (next_file, main): Use EXIT_FAILURE/EXIT_SUCCESS instead of 1/0.
        (unexpand): Concatenate input files the same way expand does.

Index: unexpand.c
===================================================================
RCS file: /home/eggert/coreutils/cu/src/unexpand.c,v
retrieving revision 1.80
retrieving revision 1.81
diff -p -u -r1.80 -r1.81
--- unexpand.c  21 Jan 2004 23:47:17 -0000      1.80
+++ unexpand.c  3 Aug 2004 23:27:20 -0000       1.81
@@ -43,6 +43,8 @@
 #include "system.h"
 #include "error.h"
 #include "posixver.h"
+#include "quote.h"
+#include "xstrndup.h"
 
 /* The official name of this program (e.g., no `g' prefix).  */
 #define PROGRAM_NAME "unexpand"
@@ -50,51 +52,51 @@
 #define AUTHORS "David MacKenzie"
 
 /* The number of bytes added at a time to the amount of memory
-   allocated for the output line. */
+   allocated for the output line.  */
 #define OUTPUT_BLOCK 256
 
-/* The number of bytes added at a time to the amount of memory
-   allocated for the list of tabstops. */
-#define TABLIST_BLOCK 256
-
 /* A sentinel value that's placed at the end of the list of tab stops.
    This value must be a large number, but not so large that adding the
-   length of a line to it would cause the column variable to overflow.  */
-#define TAB_STOP_SENTINEL INT_MAX
+   length of a line to it would cause the column variable to overflow.
+   FIXME: The algorithm isn't correct once the numbers get large;
+   also, no error is reported if overflow occurs.  */
+#define TAB_STOP_SENTINEL INTMAX_MAX
 
-/* The name this program was run with. */
+/* The name this program was run with.  */
 char *program_name;
 
-/* If nonzero, convert blanks even after nonblank characters have been
-   read on the line. */
-static int convert_entire_line;
+/* If true, convert blanks even after nonblank characters have been
+   read on the line.  */
+static bool convert_entire_line;
 
-/* If nonzero, the size of all tab stops.  If zero, use `tab_list' instead. */
-static int tab_size;
+/* If nonzero, the size of all tab stops.  If zero, use `tab_list' instead.  */
+static uintmax_t tab_size;
 
 /* Array of the explicit column numbers of the tab stops;
    after `tab_list' is exhausted, the rest of the line is printed
-   unchanged.  The first column is column 0. */
-static int *tab_list;
+   unchanged.  The first column is column 0.  */
+static uintmax_t *tab_list;
+
+/* The number of allocated entries in `tab_list'.  */
 static size_t n_tabs_allocated;
 
 /* The index of the first invalid element of `tab_list',
-   where the next element can be added. */
-static int first_free_tab;
+   where the next element can be added.  */
+static size_t first_free_tab;
 
-/* Null-terminated array of input filenames. */
+/* Null-terminated array of input filenames.  */
 static char **file_list;
 
-/* Default for `file_list' if no files are given on the command line. */
+/* Default for `file_list' if no files are given on the command line.  */
 static char *stdin_argv[] =
 {
   "-", NULL
 };
 
-/* Nonzero if we have ever read standard input. */
-static int have_read_stdin;
+/* True if we have ever read standard input.  */
+static bool have_read_stdin;
 
-/* Status to return to the system. */
+/* The desired exit status.  */
 static int exit_status;
 
 /* For long options that have no equivalent short option, use a
@@ -114,55 +116,115 @@ static struct option const longopts[] =
   {NULL, 0, NULL, 0}
 };
 
-/* Add tab stop TABVAL to the end of `tab_list', except
-   if TABVAL is -1, do nothing. */
+void
+usage (int status)
+{
+  if (status != EXIT_SUCCESS)
+    fprintf (stderr, _("Try `%s --help' for more information.\n"),
+            program_name);
+  else
+    {
+      printf (_("\
+Usage: %s [OPTION]... [FILE]...\n\
+"),
+             program_name);
+      fputs (_("\
+Convert spaces in each FILE to tabs, writing to standard output.\n\
+With no FILE, or when FILE is -, read standard input.\n\
+\n\
+"), stdout);
+      fputs (_("\
+Mandatory arguments to long options are mandatory for short options too.\n\
+"), stdout);
+      fputs (_("\
+  -a, --all        convert all whitespace, instead of just initial 
whitespace\n\
+      --first-only convert only leading sequences of whitespace (overrides 
-a)\n\
+  -t, --tabs=N     have tabs N characters apart instead of 8 (enables -a)\n\
+  -t, --tabs=LIST  use comma separated LIST of tab positions (enables -a)\n\
+"), stdout);
+      fputs (HELP_OPTION_DESCRIPTION, stdout);
+      fputs (VERSION_OPTION_DESCRIPTION, stdout);
+      printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
+    }
+  exit (status);
+}
+
+/* Add tab stop TABVAL to the end of `tab_list'.  */
 
 static void
-add_tabstop (int tabval)
+add_tabstop (uintmax_t tabval)
 {
-  if (tabval == -1)
-    return;
   if (first_free_tab == n_tabs_allocated)
     tab_list = x2nrealloc (tab_list, &n_tabs_allocated, sizeof *tab_list);
   tab_list[first_free_tab++] = tabval;
 }
 
 /* Add the comma or blank separated list of tabstops STOPS
-   to the list of tabstops. */
+   to the list of tabstops.  */
 
 static void
-parse_tabstops (const char *stops)
+parse_tabstops (char const *stops)
 {
-  int tabval = -1;
+  bool have_tabval = false;
+  uintmax_t tabval IF_LINT (= 0);
+  char const *num_start IF_LINT (= NULL);
+  bool ok = true;
 
   for (; *stops; stops++)
     {
-      if (*stops == ',' || ISBLANK (*stops))
+      if (*stops == ',' || ISBLANK (to_uchar (*stops)))
        {
-         add_tabstop (tabval);
-         tabval = -1;
+         if (have_tabval)
+           add_tabstop (tabval);
+         have_tabval = false;
        }
       else if (ISDIGIT (*stops))
        {
-         if (tabval == -1)
-           tabval = 0;
-         tabval = tabval * 10 + *stops - '0';
+         if (!have_tabval)
+           {
+             tabval = 0;
+             have_tabval = true;
+             num_start = stops;
+           }
+         {
+           /* Detect overflow.  */
+           uintmax_t new_t = 10 * tabval + *stops - '0';
+           if (UINTMAX_MAX / 10 < tabval || new_t < tabval * 10)
+             {
+               size_t len = strspn (num_start, "0123456789");
+               char *bad_num = xstrndup (num_start, len);
+               error (0, 0, _("tab stop is too large %s"), quote (bad_num));
+               free (bad_num);
+               ok = false;
+               stops = num_start + len - 1;
+             }
+           tabval = new_t;
+         }
        }
       else
-       error (EXIT_FAILURE, 0, _("tab size contains an invalid character"));
+       {
+         error (0, 0, _("tab size contains invalid character(s): %s"),
+                quote (stops));
+         ok = false;
+         break;
+       }
     }
 
-  add_tabstop (tabval);
+  if (!ok)
+    exit (EXIT_FAILURE);
+
+  if (have_tabval)
+    add_tabstop (tabval);
 }
 
 /* Check that the list of tabstops TABS, with ENTRIES entries,
-   contains only nonzero, ascending values. */
+   contains only nonzero, ascending values.  */
 
 static void
-validate_tabstops (const int *tabs, int entries)
+validate_tabstops (uintmax_t const *tabs, size_t entries)
 {
-  int prev_tab = 0;
-  int i;
+  uintmax_t prev_tab = 0;
+  size_t i;
 
   for (i = 0; i < entries; i++)
     {
@@ -190,14 +252,14 @@ next_file (FILE *fp)
       if (ferror (fp))
        {
          error (0, errno, "%s", prev_file);
-         exit_status = 1;
+         exit_status = EXIT_FAILURE;
        }
       if (fp == stdin)
-       clearerr (fp);          /* Also clear EOF. */
+       clearerr (fp);          /* Also clear EOF.  */
       else if (fclose (fp) == EOF)
        {
          error (0, errno, "%s", prev_file);
-         exit_status = 1;
+         exit_status = EXIT_FAILURE;
        }
     }
 
@@ -205,7 +267,7 @@ next_file (FILE *fp)
     {
       if (file[0] == '-' && file[1] == '\0')
        {
-         have_read_stdin = 1;
+         have_read_stdin = true;
          prev_file = file;
          return stdin;
        }
@@ -216,27 +278,25 @@ next_file (FILE *fp)
          return fp;
        }
       error (0, errno, "%s", file);
-      exit_status = 1;
+      exit_status = EXIT_FAILURE;
     }
   return NULL;
 }
 
 /* Change spaces to tabs, writing to stdout.
-   Read each file in `file_list', in order. */
+   Read each file in `file_list', in order.  */
 
 static void
 unexpand (void)
 {
-  FILE *fp;                    /* Input stream. */
-  int c;                       /* Each input character. */
-  /* Index in `tab_list' of next tabstop: */
-  int tab_index = 0;           /* For calculating width of pending tabs. */
-  int print_tab_index = 0;     /* For printing as many tabs as possible. */
-  unsigned int column = 0;     /* Column on screen of next char. */
-  int next_tab_column;         /* Column the next tab stop is on. */
-  int convert = 1;             /* If nonzero, perform translations. */
-  unsigned int pending = 0;    /* Pending columns of blanks. */
-  int saved_errno;
+  FILE *fp;                    /* Input stream.  */
+  size_t tab_index = 0;                /* Index in `tab_list' of next tabstop. 
 */
+  size_t print_tab_index = 0;  /* For printing as many tabs as possible.  */
+  uintmax_t column = 0;                /* Column of next char.  */
+  uintmax_t next_tab_column;   /* Column the next tab stop is on.  */
+  bool convert = true;         /* If true, perform translations.  */
+  uintmax_t pending = 0;       /* Pending columns of blanks.  */
+  int saved_errno IF_LINT (= 0);
 
   fp = next_file ((FILE *) NULL);
   if (fp == NULL)
@@ -247,8 +307,17 @@ unexpand (void)
 
   for (;;)
     {
-      c = getc (fp);
-      saved_errno = errno;
+      int c = getc (fp);
+      if (c == EOF)
+       {
+         fp = next_file (fp);
+         if (fp)
+           {
+             SET_BINARY2 (fileno (fp), STDOUT_FILENO);
+             continue;
+           }
+         saved_errno = errno;
+       }
 
       if (c == ' ' && convert && column < TAB_STOP_SENTINEL)
        {
@@ -260,7 +329,7 @@ unexpand (void)
          if (tab_size == 0)
            {
              /* Do not let tab_index == first_free_tab;
-                stop when it is 1 less. */
+                stop when it is 1 less.  */
              while (tab_index < first_free_tab - 1
                     && column >= tab_list[tab_index])
                tab_index++;
@@ -269,7 +338,7 @@ unexpand (void)
                tab_index++;
              if (column >= next_tab_column)
                {
-                 convert = 0;  /* Ran out of tab stops. */
+                 convert = false;      /* Ran out of tab stops.  */
                  goto flush_pend;
                }
            }
@@ -284,7 +353,7 @@ unexpand (void)
        {
        flush_pend:
          /* Flush pending spaces.  Print as many tabs as possible,
-            then print the rest as spaces. */
+            then print the rest as spaces.  */
          if (pending == 1)
            {
              putchar (' ');
@@ -296,7 +365,7 @@ unexpand (void)
              if (tab_size == 0)
                {
                  /* Do not let print_tab_index == first_free_tab;
-                    stop when it is 1 less. */
+                    stop when it is 1 less.  */
                  while (print_tab_index < first_free_tab - 1
                         && column >= tab_list[print_tab_index])
                    print_tab_index++;
@@ -329,14 +398,7 @@ unexpand (void)
          if (c == EOF)
            {
              errno = saved_errno;
-             fp = next_file (fp);
-             if (fp == NULL)
-               break;          /* No more files. */
-             else
-               {
-                 SET_BINARY2 (fileno (fp), STDOUT_FILENO);
-                 continue;
-               }
+             break;
            }
 
          if (convert)
@@ -349,8 +411,7 @@ unexpand (void)
              else
                {
                  ++column;
-                 if (convert_entire_line == 0)
-                   convert = 0;
+                 convert &= convert_entire_line;
                }
            }
 
@@ -360,54 +421,22 @@ unexpand (void)
            {
              tab_index = print_tab_index = 0;
              column = pending = 0;
-             convert = 1;
+             convert = true;
            }
        }
     }
 }
 
-void
-usage (int status)
-{
-  if (status != EXIT_SUCCESS)
-    fprintf (stderr, _("Try `%s --help' for more information.\n"),
-            program_name);
-  else
-    {
-      printf (_("\
-Usage: %s [OPTION]... [FILE]...\n\
-"),
-             program_name);
-      fputs (_("\
-Convert spaces in each FILE to tabs, writing to standard output.\n\
-With no FILE, or when FILE is -, read standard input.\n\
-\n\
-"), stdout);
-      fputs (_("\
-Mandatory arguments to long options are mandatory for short options too.\n\
-"), stdout);
-      fputs (_("\
-  -a, --all        convert all whitespace, instead of just initial 
whitespace\n\
-      --first-only convert only leading sequences of whitespace (overrides 
-a)\n\
-  -t, --tabs=N     have tabs N characters apart instead of 8 (enables -a)\n\
-  -t, --tabs=LIST  use comma separated LIST of tab positions (enables -a)\n\
-"), stdout);
-      fputs (HELP_OPTION_DESCRIPTION, stdout);
-      fputs (VERSION_OPTION_DESCRIPTION, stdout);
-      printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
-    }
-  exit (status);
-}
-
 int
 main (int argc, char **argv)
 {
-  int tabval = -1;             /* Value of tabstop being read, or -1. */
-  int c;                       /* Option character. */
+  bool have_tabval = false;
+  uintmax_t tabval IF_LINT (= 0);
+  int c;
 
-  /* If nonzero, cancel the effect of any -a (explicit or implicit in -t),
+  /* If true, cancel the effect of any -a (explicit or implicit in -t),
      so that only leading white space will be considered.  */
-  int convert_first_only = 0;
+  bool convert_first_only = false;
 
   bool obsolete_tablist = false;
 
@@ -419,9 +448,9 @@ main (int argc, char **argv)
 
   atexit (close_stdout);
 
-  have_read_stdin = 0;
-  exit_status = 0;
-  convert_entire_line = 0;
+  have_read_stdin = false;
+  exit_status = EXIT_SUCCESS;
+  convert_entire_line = false;
   tab_list = NULL;
   first_free_tab = 0;
 
@@ -436,25 +465,29 @@ main (int argc, char **argv)
        case '?':
          usage (EXIT_FAILURE);
        case 'a':
-         convert_entire_line = 1;
+         convert_entire_line = true;
          break;
        case 't':
-         convert_entire_line = 1;
+         convert_entire_line = true;
          parse_tabstops (optarg);
          break;
        case CONVERT_FIRST_ONLY_OPTION:
-         convert_first_only = 1;
+         convert_first_only = true;
          break;
        case ',':
-         add_tabstop (tabval);
-         tabval = -1;
+         if (have_tabval)
+           add_tabstop (tabval);
+         have_tabval = false;
          obsolete_tablist = true;
          break;
        case_GETOPT_HELP_CHAR;
        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
        default:
-         if (tabval == -1)
-           tabval = 0;
+         if (!have_tabval)
+           {
+             tabval = 0;
+             have_tabval = true;
+           }
          tabval = tabval * 10 + c - '0';
          obsolete_tablist = true;
          break;
@@ -469,9 +502,10 @@ main (int argc, char **argv)
     }
 
   if (convert_first_only)
-    convert_entire_line = 0;
+    convert_entire_line = false;
 
-  add_tabstop (tabval);
+  if (have_tabval)
+    add_tabstop (tabval);
 
   validate_tabstops (tab_list, first_free_tab);
 
@@ -492,5 +526,6 @@ main (int argc, char **argv)
 
   if (have_read_stdin && fclose (stdin) == EOF)
     error (EXIT_FAILURE, errno, "-");
-  exit (exit_status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+
+  exit (exit_status);
 }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]