[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
coreutils "tac" int cleanup fixes
From: |
Paul Eggert |
Subject: |
coreutils "tac" int cleanup fixes |
Date: |
Tue, 03 Aug 2004 15:43:11 -0700 |
User-agent: |
Gnus/5.1006 (Gnus v5.10.6) Emacs/21.3 (gnu/linux) |
The rarely-used "tac" command has a number of botches if the input is
larger than 2**31. I installed the following patch to fix the problems
that I found. I also fixed tac-pipe.c even though it's not used now.
Because of limitations in the regular-expression API, "tac" still
mishandles some large inputs, but it now diagnoses the problem and
exits rather than silently continuing with the wrong answers.
2004-08-03 Paul Eggert <address@hidden>
* src/tac.c (separator_ends_record, tac_seekable, tac_file,
tac_stdin, tac_stdin_to_mem, main): Use bool for booleans.
(match_length, G_buffer_size, tac_seekable, main): Use size_t for sizes.
(tac_seekable): Use ptrdiff_t for pointer subtraction.
Report an error if the result is out of range.
(tac_seekable, main): Check for integer overflow in buffer size
calculations.
(main): Remove unnecessary casts.
* src/tac-pipe.c (buf_init_from_stdin, find_bol, tac_mem):
Use bool for booleans.
(buf_init_from_stdin, buf_free, find_bol, print_line):
Use size_t for sizes.
Index: src/tac.c
===================================================================
RCS file: /home/eggert/coreutils/cu/src/tac.c,v
retrieving revision 1.109
diff -p -u -r1.109 tac.c
--- src/tac.c 21 Jan 2004 23:46:07 -0000 1.109
+++ src/tac.c 24 Jul 2004 22:57:10 -0000
@@ -77,9 +77,9 @@ char *program_name;
/* The string that separates the records of the file. */
static char *separator;
-/* If nonzero, print `separator' along with the record preceding it
+/* If true, print `separator' along with the record preceding it
in the file; otherwise with the record following it. */
-static int separator_ends_record;
+static bool separator_ends_record;
/* 0 if `separator' is to be matched as a regular expression;
otherwise, the length of `separator', used as a sentinel to
@@ -89,7 +89,7 @@ static size_t sentinel_length;
/* The length of a match with `separator'. If `sentinel_length' is 0,
`match_length' is computed every time a match succeeds;
otherwise, it is simply the length of `separator'. */
-static int match_length;
+static size_t match_length;
/* The input buffer. */
static char *G_buffer;
@@ -100,7 +100,7 @@ static size_t read_size;
/* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
The extra 2 bytes allow `past_end' to have a value beyond the
end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
-static unsigned G_buffer_size;
+static size_t G_buffer_size;
/* The compiled regular expression representing `separator'. */
static struct re_pattern_buffer compiled_separator;
@@ -181,9 +181,9 @@ output (const char *start, const char *p
}
/* Print in reverse the file open on descriptor FD for reading FILE.
- Return 0 if ok, 1 if an error occurs. */
+ Return true if successful. */
-static int
+static bool
tac_seekable (int input_fd, const char *file)
{
/* Pointer to the location in `G_buffer' where the search for
@@ -200,18 +200,18 @@ tac_seekable (int input_fd, const char *
/* Offset in the file of the next read. */
off_t file_pos;
- /* Nonzero if `output' has not been called yet for any file.
+ /* True if `output' has not been called yet for any file.
Only used when the separator is attached to the preceding record. */
- int first_time = 1;
+ bool first_time = true;
char first_char = *separator; /* Speed optimization, non-regexp. */
char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
- int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
+ size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp.
*/
struct re_registers regs;
/* Find the size of the input file. */
file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
if (file_pos < 1)
- return 0; /* It's an empty file. */
+ return true; /* It's an empty file. */
/* Arrange for the first read to lop off enough to leave the rest of the
file a multiple of `read_size'. Since `read_size' can change, this may
@@ -231,7 +231,7 @@ tac_seekable (int input_fd, const char *
if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
{
error (0, errno, "%s", file);
- return 1;
+ return false;
}
match_start = past_end = G_buffer + saved_record_size;
@@ -249,9 +249,11 @@ tac_seekable (int input_fd, const char *
Otherwise, make `match_start' < `G_buffer'. */
if (sentinel_length == 0)
{
- int i = match_start - G_buffer;
+ ptrdiff_t i = match_start - G_buffer;
int ret;
+ if (! (INT_MIN < i && i <= INT_MAX))
+ abort ();
ret = re_search (&compiled_separator, G_buffer, i, i - 1, -i, ®s);
if (ret == -1)
match_start = G_buffer - 1;
@@ -283,7 +285,7 @@ tac_seekable (int input_fd, const char *
{
/* Hit the beginning of the file; print the remaining record. */
output (G_buffer, past_end);
- return 0;
+ return true;
}
saved_record_size = past_end - G_buffer;
@@ -294,15 +296,20 @@ tac_seekable (int input_fd, const char *
the data already in `G_buffer', we need to increase
`G_buffer_size'. */
char *newbuffer;
- int offset = sentinel_length ? sentinel_length : 1;
+ size_t offset = sentinel_length ? sentinel_length : 1;
+ ptrdiff_t match_start_offset = match_start - G_buffer;
+ ptrdiff_t past_end_offset = past_end - G_buffer;
+ size_t old_G_buffer_size = G_buffer_size;
read_size *= 2;
G_buffer_size = read_size * 2 + sentinel_length + 2;
+ if (G_buffer_size < old_G_buffer_size)
+ xalloc_die ();
newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
newbuffer += offset;
/* Adjust the pointers for the new buffer location. */
- match_start += newbuffer - G_buffer;
- past_end += newbuffer - G_buffer;
+ match_start = newbuffer + match_start_offset;
+ past_end = newbuffer + past_end_offset;
G_buffer = newbuffer;
}
@@ -330,7 +337,7 @@ tac_seekable (int input_fd, const char *
if (safe_read (input_fd, G_buffer, read_size) != read_size)
{
error (0, errno, "%s", file);
- return 1;
+ return false;
}
}
else
@@ -342,10 +349,10 @@ tac_seekable (int input_fd, const char *
/* If this match of `separator' isn't at the end of the
file, print the record. */
- if (first_time == 0 || match_end != past_end)
+ if (!first_time || match_end != past_end)
output (match_end, past_end);
past_end = match_end;
- first_time = 0;
+ first_time = false;
}
else
{
@@ -361,28 +368,28 @@ tac_seekable (int input_fd, const char *
}
/* Print FILE in reverse.
- Return 0 if ok, 1 if an error occurs. */
+ Return true if successful. */
-static int
+static bool
tac_file (const char *file)
{
- int errors;
+ bool ok;
FILE *in;
in = fopen (file, "r");
if (in == NULL)
{
error (0, errno, "%s", file);
- return 1;
+ return false;
}
SET_BINARY (fileno (in));
- errors = tac_seekable (fileno (in), file);
+ ok = tac_seekable (fileno (in), file);
if (fclose (in) != 0)
{
error (0, errno, "%s", file);
- return 1;
+ return false;
}
- return errors;
+ return ok;
}
#if DONT_UNLINK_WHILE_OPEN
@@ -466,12 +473,11 @@ save_stdin (FILE **g_tmp, char **g_tempf
/* Print the standard input in reverse, saving it to temporary
file first if it is a pipe.
- Return 0 if ok, 1 if an error occurs. */
+ Return true if successful. */
-static int
+static bool
tac_stdin (void)
{
- int errors;
struct stat stats;
/* No tempfile is needed for "tac < file".
@@ -481,22 +487,20 @@ tac_stdin (void)
if (fstat (STDIN_FILENO, &stats))
{
error (0, errno, _("standard input"));
- return 1;
+ return false;
}
if (S_ISREG (stats.st_mode))
{
- errors = tac_seekable (fileno (stdin), _("standard input"));
+ return tac_seekable (STDIN_FILENO, _("standard input"));
}
else
{
FILE *tmp_stream;
char *tmp_file;
save_stdin (&tmp_stream, &tmp_file);
- errors = tac_seekable (fileno (tmp_stream), tmp_file);
+ return tac_seekable (fileno (tmp_stream), tmp_file);
}
-
- return errors;
}
#if 0
@@ -548,7 +552,7 @@ tac_mem (const char *buf, size_t n_bytes
/* FIXME: describe */
-static int
+static bool
tac_stdin_to_mem (void)
{
char *buf = NULL;
@@ -586,7 +590,7 @@ tac_stdin_to_mem (void)
tac_mem (buf, n_bytes, stdout);
- return 0;
+ return true;
}
#endif
@@ -594,8 +598,10 @@ int
main (int argc, char **argv)
{
const char *error_message; /* Return value from re_compile_pattern. */
- int optc, errors;
- int have_read_stdin = 0;
+ int optc;
+ bool ok;
+ bool have_read_stdin = false;
+ size_t half_buffer_size;
initialize_main (&argc, &argv);
program_name = argv[0];
@@ -605,10 +611,9 @@ main (int argc, char **argv)
atexit (close_stdout);
- errors = 0;
separator = "\n";
sentinel_length = 1;
- separator_ends_record = 1;
+ separator_ends_record = true;
while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
{
@@ -617,7 +622,7 @@ main (int argc, char **argv)
case 0:
break;
case 'b':
- separator_ends_record = 0;
+ separator_ends_record = false;
break;
case 'r':
sentinel_length = 0;
@@ -637,8 +642,7 @@ main (int argc, char **argv)
if (sentinel_length == 0)
{
compiled_separator.allocated = 100;
- compiled_separator.buffer = (unsigned char *)
- xmalloc (compiled_separator.allocated);
+ compiled_separator.buffer = xmalloc (compiled_separator.allocated);
compiled_separator.fastmap = xmalloc (256);
compiled_separator.translate = 0;
error_message = re_compile_pattern (separator, strlen (separator),
@@ -650,10 +654,16 @@ main (int argc, char **argv)
match_length = sentinel_length = strlen (separator);
read_size = INITIAL_READSIZE;
- /* A precaution that will probably never be needed. */
- while (sentinel_length * 2 >= read_size)
- read_size *= 2;
- G_buffer_size = read_size * 2 + sentinel_length + 2;
+ while (sentinel_length >= read_size / 2)
+ {
+ if (SIZE_MAX / 2 < read_size)
+ xalloc_die ();
+ read_size *= 2;
+ }
+ half_buffer_size = read_size + sentinel_length + 1;
+ G_buffer_size = 2 * half_buffer_size;
+ if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
+ xalloc_die ();
G_buffer = xmalloc (G_buffer_size);
if (sentinel_length)
{
@@ -667,21 +677,22 @@ main (int argc, char **argv)
if (optind == argc)
{
- have_read_stdin = 1;
+ have_read_stdin = true;
/* We need binary I/O, since `tac' relies
on `lseek' and byte counts. */
SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
- errors = tac_stdin ();
+ ok = tac_stdin ();
}
else
{
+ ok = true;
for (; optind < argc; ++optind)
{
if (STREQ (argv[optind], "-"))
{
- have_read_stdin = 1;
+ have_read_stdin = true;
SET_BINARY2 (STDIN_FILENO, STDOUT_FILENO);
- errors |= tac_stdin ();
+ ok &= tac_stdin ();
}
else
{
@@ -691,7 +702,7 @@ main (int argc, char **argv)
text mode has no visible effect on console output,
since two CRs in a row are just like one CR. */
SET_BINARY (STDOUT_FILENO);
- errors |= tac_file (argv[optind]);
+ ok &= tac_file (argv[optind]);
}
}
}
@@ -701,5 +712,5 @@ main (int argc, char **argv)
if (have_read_stdin && close (STDIN_FILENO) < 0)
error (EXIT_FAILURE, errno, "-");
- exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+ exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
}
Index: src/tac-pipe.c
===================================================================
RCS file: /home/eggert/coreutils/cu/src/tac-pipe.c,v
retrieving revision 1.4
diff -p -u -r1.4 tac-pipe.c
--- src/tac-pipe.c 30 Aug 2002 23:04:54 -0000 1.4
+++ src/tac-pipe.c 19 Jul 2004 18:33:25 -0000
@@ -30,11 +30,11 @@ struct Buf
};
typedef struct Buf Buf;
-static int
-buf_init_from_stdin (Buf *x, int eol_byte)
+static bool
+buf_init_from_stdin (Buf *x, char eol_byte)
{
- int last_byte_is_eol_byte = 1;
- int fail = 0;
+ bool last_byte_is_eol_byte = true;
+ bool ok = true;
#define OBS (&(x->obs))
obstack_init (OBS);
@@ -42,18 +42,18 @@ buf_init_from_stdin (Buf *x, int eol_byt
while (1)
{
char *buf = (char *) malloc (BUFFER_SIZE);
- int bytes_read;
+ size_t bytes_read;
if (buf == NULL)
{
/* Fall back on the code that relies on a temporary file.
Write all buffers to that file and free them. */
/* FIXME */
- fail = 1;
+ ok = false;
break;
}
bytes_read = full_read (STDIN_FILENO, buf, BUFFER_SIZE);
- if (bytes_read < 0)
+ if (bytes_read != buffer_size && errno != 0)
error (EXIT_FAILURE, errno, _("read error"));
{
@@ -63,14 +63,14 @@ buf_init_from_stdin (Buf *x, int eol_byt
obstack_grow (OBS, &bp, sizeof (bp));
}
- if (bytes_read > 0)
+ if (bytes_read != 0)
last_byte_is_eol_byte = (buf[bytes_read - 1] == eol_byte);
if (bytes_read < BUFFER_SIZE)
break;
}
- if (!fail)
+ if (ok)
{
/* If the file was non-empty and lacked an EOL_BYTE at its end,
then add a buffer containing just that one byte. */
@@ -80,7 +80,7 @@ buf_init_from_stdin (Buf *x, int eol_byt
if (buf == NULL)
{
/* FIXME: just like above */
- fail = 1;
+ ok = false;
}
else
{
@@ -102,13 +102,13 @@ buf_init_from_stdin (Buf *x, int eol_byt
&& x->p[x->n_bufs - 1].start == x->p[x->n_bufs - 1].one_past_end)
free (x->p[--(x->n_bufs)].start);
- return fail;
+ return ok;
}
static void
buf_free (Buf *x)
{
- int i;
+ size_t i;
for (i = 0; i < x->n_bufs; i++)
free (x->p[i].start);
obstack_free (OBS, NULL);
@@ -153,16 +153,16 @@ line_ptr_increment (const Buf *x, const
return lp_new;
}
-static int
+static bool
find_bol (const Buf *x,
const Line_ptr *last_bol, Line_ptr *new_bol, char eol_byte)
{
- int i;
+ size_t i;
Line_ptr tmp;
char *last_bol_ptr;
if (last_bol->ptr == x->p[0].start)
- return 0;
+ return false;
tmp = line_ptr_decrement (x, last_bol);
last_bol_ptr = tmp.ptr;
@@ -176,10 +176,10 @@ find_bol (const Buf *x,
nl_pos.i = i;
nl_pos.ptr = nl;
*new_bol = line_ptr_increment (x, &nl_pos);
- return 1;
+ return true;
}
- if (i <= 0)
+ if (i == 0)
break;
--i;
@@ -193,17 +193,17 @@ find_bol (const Buf *x,
{
new_bol->i = 0;
new_bol->ptr = x->p[0].start;
- return 1;
+ return true;
}
- return 0;
+ return false;
}
static void
print_line (FILE *out_stream, const Buf *x,
const Line_ptr *bol, const Line_ptr *bol_next)
{
- int i;
+ size_t i;
for (i = bol->i; i <= bol_next->i; i++)
{
char *a = (i == bol->i ? bol->ptr : x->p[i].start);
@@ -212,24 +212,22 @@ print_line (FILE *out_stream, const Buf
}
}
-static int
+static bool
tac_mem ()
{
Buf x;
Line_ptr bol;
char eol_byte = '\n';
- int fail;
- fail = buf_init_from_stdin (&x, eol_byte);
- if (fail)
+ if (! buf_init_from_stdin (&x, eol_byte))
{
buf_free (&x);
- return 1;
+ return false;
}
/* Special case the empty file. */
if (EMPTY (&x))
- return 0;
+ return true;
/* Initially, point at one past the last byte of the file. */
bol.i = x.n_bufs - 1;
@@ -238,11 +236,10 @@ tac_mem ()
while (1)
{
Line_ptr new_bol;
- int found = find_bol (&x, &bol, &new_bol, eol_byte);
- if (!found)
+ if (! find_bol (&x, &bol, &new_bol, eol_byte))
break;
print_line (stdout, &x, &new_bol, &bol);
bol = new_bol;
}
- return 0;
+ return true;
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- coreutils "tac" int cleanup fixes,
Paul Eggert <=