grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v3.7-15-ge3694e9


From: Paul Eggert
Subject: grep branch, master, updated. v3.7-15-ge3694e9
Date: Wed, 25 Aug 2021 15:11:32 -0400 (EDT)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  e3694e90b4789ccafaf022a29d9ce08ff11375c2 (commit)
      from  b7d83f46d81a304e188c82877430765c29a75610 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=e3694e90b4789ccafaf022a29d9ce08ff11375c2


commit e3694e90b4789ccafaf022a29d9ce08ff11375c2
Author: Paul Eggert <eggert@cs.ucla.edu>
Date:   Tue Aug 24 17:19:22 2021 -0700

    grep: prefer signed to unsigned integers
    
    This improves runtime checking for integer overflow when compiling
    with gcc -fsanitize=undefined and the like.  It also avoids
    the need for some integer casts, which can be error-prone.
    * bootstrap.conf (gnulib_modules): Add idx.
    * src/dfasearch.c (struct dfa_comp, kwsmusts):
    (possible_backrefs_in_pattern, regex_compile, GEAcompile)
    (EGexecute):
    * src/grep.c (struct patloc, patlocs_allocated, patlocs_used)
    (n_patterns, update_patterns, pattern_file_name, poison_len)
    (asan_poison, fwrite_errno, compile_fp_t, execute_fp_t)
    (buf_has_encoding_errors, buf_has_nulls, file_must_have_nulls)
    (bufalloc, pagesize, all_zeros, fillbuf, nlscan)
    (print_line_head, print_line_middle, print_line_tail, grepbuf)
    (grep, contains_encoding_error, fgrep_icase_available)
    (fgrep_icase_charlen, fgrep_to_grep_pattern, try_fgrep_pattern)
    (main):
    * src/kwsearch.c (struct kwsearch, Fcompile, Fexecute):
    * src/kwset.c (struct trie, struct kwset, kwsalloc, kwsincr)
    (kwswords, treefails, memchr_kwset, acexec_trans, kwsexec)
    (treedelta, kwsprep, bm_delta2_search, bmexec_trans, bmexec)
    (acexec):
    * src/kwset.h (struct kwsmatch):
    * src/pcresearch.c (Pcompile, Pexecute):
    * src/search.h (mb_clen):
    * src/searchutils.c (kwsinit, mb_goback, wordchars_count)
    (wordchars_size, wordchar_next, wordchar_prev):
    Prefer idx_t to size_t or ptrdiff_t for nonnegative sizes,
    and prefer ptrdiff_t to size_t for sizes plus error values.
    * src/grep.c (uword_size): New constant, used for signed
    size calculations.
    (totalnl, add_count, totalcc, print_offset, print_line_head, grep):
    Prefer intmax_t to uintmax_t for wide integer calculations.
    (fgrep_icase_charlen): Prefer ptrdiff_t to int for size offsets.
    * src/grep.h: Include idx.h.
    * src/search.h (imbrlen): New function, like mbrlen except
    with idx_t and ptrdiff_t.

diff --git a/bootstrap.conf b/bootstrap.conf
index 8e46000..7e4f24c 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -50,6 +50,7 @@ gitlog-to-changelog
 gnu-web-doc-update
 gnupload
 hash
+idx
 ignore-value
 intprops
 inttypes
diff --git a/src/dfasearch.c b/src/dfasearch.c
index d6afa8d..1675865 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -36,13 +36,13 @@ struct dfa_comp
 
   /* Regex compiled regexps. */
   struct re_pattern_buffer *patterns;
-  size_t pcount;
+  idx_t pcount;
   struct re_registers regs;
 
   /* Number of compiled fixed strings known to exactly match the regexp.
      If kwsexec returns < kwset_exact_matches, then we don't need to
      call the regexp matcher at all. */
-  ptrdiff_t kwset_exact_matches;
+  idx_t kwset_exact_matches;
 
   bool begline;
 };
@@ -80,9 +80,9 @@ kwsmusts (struct dfa_comp *dc)
          The kwset matcher will return the index of the matching
          string that it chooses. */
       ++dc->kwset_exact_matches;
-      ptrdiff_t old_len = strlen (dm->must);
-      ptrdiff_t new_len = old_len + dm->begline + dm->endline;
-      char *must = xmalloc (new_len);
+      idx_t old_len = strlen (dm->must);
+      idx_t new_len = old_len + dm->begline + dm->endline;
+      char *must = ximalloc (new_len);
       char *mp = must;
       *mp = eolbyte;
       mp += dm->begline;
@@ -108,7 +108,7 @@ kwsmusts (struct dfa_comp *dc)
    BS_SAFE is true of encodings where a backslash cannot appear as the
    last byte of a multibyte character.  */
 static bool _GL_ATTRIBUTE_PURE
-possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe)
+possible_backrefs_in_pattern (char const *keys, idx_t len, bool bs_safe)
 {
   /* Normally a backslash, but in an unsafe encoding this is a non-char
      value so that the comparison below always fails, because if there
@@ -144,8 +144,8 @@ possible_backrefs_in_pattern (char const *keys, ptrdiff_t 
len, bool bs_safe)
 }
 
 static bool
-regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
-               ptrdiff_t pcount, ptrdiff_t lineno, reg_syntax_t syntax_bits,
+regex_compile (struct dfa_comp *dc, char const *p, idx_t len,
+               idx_t pcount, idx_t lineno, reg_syntax_t syntax_bits,
                bool syntax_only)
 {
   struct re_pattern_buffer pat0;
@@ -154,7 +154,9 @@ regex_compile (struct dfa_comp *dc, char const *p, 
ptrdiff_t len,
   pat->allocated = 0;
 
   /* Do not use a fastmap with -i, to work around glibc Bug#20381.  */
-  pat->fastmap = (syntax_only | match_icase) ? NULL : xmalloc (UCHAR_MAX + 1);
+  verify (UCHAR_MAX < IDX_MAX);
+  idx_t uchar_max = UCHAR_MAX;
+  pat->fastmap = (syntax_only | match_icase) ? NULL : ximalloc (uchar_max + 1);
 
   pat->translate = NULL;
 
@@ -168,14 +170,17 @@ regex_compile (struct dfa_comp *dc, char const *p, 
ptrdiff_t len,
     return true;
 
   /* Emit a filename:lineno: prefix for patterns taken from files.  */
-  size_t pat_lineno;
+  idx_t pat_lineno;
   char const *pat_filename
     = lineno < 0 ? "" : pattern_file_name (lineno, &pat_lineno);
 
   if (*pat_filename == '\0')
     error (0, 0, "%s", err);
   else
-    error (0, 0, "%s:%zu: %s", pat_filename, pat_lineno, err);
+    {
+      ptrdiff_t n = pat_lineno;
+      error (0, 0, "%s:%td: %s", pat_filename, n, err);
+    }
 
   return false;
 }
@@ -185,7 +190,7 @@ regex_compile (struct dfa_comp *dc, char const *p, 
ptrdiff_t len,
    Return a description of the compiled pattern.  */
 
 void *
-GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
+GEAcompile (char *pattern, idx_t size, reg_syntax_t syntax_bits,
             bool exact)
 {
   char *motif;
@@ -210,29 +215,30 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits,
   dc->patterns = xmalloc (sizeof *dc->patterns);
   dc->patterns++;
   dc->pcount = 0;
-  size_t palloc = 1;
+  idx_t palloc = 1;
 
   char const *prev = pattern;
 
   /* Buffer containing back-reference-free patterns.  */
   char *buf = NULL;
-  ptrdiff_t buflen = 0;
-  size_t bufalloc = 0;
+  idx_t buflen = 0;
+  idx_t bufalloc = 0;
 
-  ptrdiff_t lineno = 0;
+  idx_t lineno = 0;
 
   do
     {
       char const *sep = rawmemchr (p, '\n');
-      ptrdiff_t len = sep - p;
+      idx_t len = sep - p;
 
       bool backref = possible_backrefs_in_pattern (p, len, bs_safe);
 
       if (backref && prev < p)
         {
-          ptrdiff_t prevlen = p - prev;
-          while (bufalloc < buflen + prevlen)
-            buf = x2realloc (buf, &bufalloc);
+          idx_t prevlen = p - prev;
+          ptrdiff_t bufshortage = buflen - bufalloc + prevlen;
+          if (0 < bufshortage)
+            buf = xpalloc (buf, &bufalloc, bufshortage, -1, 1);
           memcpy (buf + buflen, prev, prevlen);
           buflen += prevlen;
         }
@@ -240,10 +246,11 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits,
       /* Ensure room for at least two more patterns.  The extra one is
          for the regex_compile that may be executed after this loop
          exits, and its (unused) slot is patterns[-1] until then.  */
-      while (palloc <= dc->pcount + 1)
+      ptrdiff_t shortage = dc->pcount - palloc + 2;
+      if (0 < shortage)
         {
-          dc->patterns = x2nrealloc (dc->patterns - 1, &palloc,
-                                     sizeof *dc->patterns);
+          dc->patterns = xpalloc (dc->patterns - 1, &palloc, shortage, -1,
+                                  sizeof *dc->patterns);
           dc->patterns++;
         }
 
@@ -271,8 +278,8 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits,
     {
       if (pattern < prev)
         {
-          ptrdiff_t prevlen = patlim - prev;
-          buf = xrealloc (buf, buflen + prevlen);
+          idx_t prevlen = patlim - prev;
+          buf = xirealloc (buf, buflen + prevlen);
           memcpy (buf + buflen, prev, prevlen);
           buflen += prevlen;
         }
@@ -298,11 +305,12 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits,
       static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
       static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)";
       int bk = !(syntax_bits & RE_NO_BK_PARENS);
-      char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk);
+      idx_t bracket_bytes = sizeof word_beg_bk - 1 + sizeof word_end_bk;
+      char *n = ximalloc (size + bracket_bytes);
 
       strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk)
                              : (bk ? word_beg_bk : word_beg_no_bk));
-      size_t total = strlen (n);
+      idx_t total = strlen (n);
       memcpy (n + total, pattern, size);
       total += size;
       strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk)
@@ -338,16 +346,16 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t 
syntax_bits,
   return dc;
 }
 
-size_t
-EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
+ptrdiff_t
+EGexecute (void *vdc, char const *buf, idx_t size, idx_t *match_size,
            char const *start_ptr)
 {
   char const *buflim, *beg, *end, *ptr, *match, *best_match, *mb_start;
   char eol = eolbyte;
   regoff_t start;
-  size_t len, best_len;
+  idx_t len, best_len;
   struct kwsmatch kwsm;
-  size_t i;
+  idx_t i;
   struct dfa_comp *dc = vdc;
   struct dfa *superset = dfasuperset (dc->dfa);
   bool dfafast = dfaisfast (dc->dfa);
@@ -362,7 +370,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
       if (!start_ptr)
         {
           char const *next_beg, *dfa_beg = beg;
-          ptrdiff_t count = 0;
+          idx_t count = 0;
           bool exact_kwset_match = false;
           bool backref = false;
 
@@ -584,7 +592,6 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t 
*match_size,
  success:
   len = end - beg;
  success_in_len:;
-  size_t off = beg - buf;
   *match_size = len;
-  return off;
+  return beg - buf;
 }
diff --git a/src/grep.c b/src/grep.c
index 3569375..a55194c 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -88,13 +88,13 @@ struct patloc
   {
     /* Line number of the pattern in PATTERN_ARRAY.  Line numbers
        start at 0, and each pattern is terminated by '\n'.  */
-    ptrdiff_t lineno;
+    idx_t lineno;
 
     /* Input location of the pattern.  The FILENAME "-" represents
        standard input, and "" represents the command line.  FILELINE is
        origin-1 for files and is irrelevant for the command line.  */
     char const *filename;
-    ptrdiff_t fileline;
+    idx_t fileline;
   };
 
 /* The array of pattern locations.  The concatenation of all patterns
@@ -108,13 +108,13 @@ struct patloc
    removed patterns not at a file start or end requires another
    PATLOC entry for the first non-removed pattern.  */
 static struct patloc *patloc;
-static size_t patlocs_allocated, patlocs_used;
+static idx_t patlocs_allocated, patlocs_used;
 
 /* Pointer to the array of patterns, each terminated by newline.  */
 static char *pattern_array;
 
 /* The number of unique patterns seen so far.  */
-static size_t n_patterns;
+static idx_t n_patterns;
 
 /* Hash table of patterns seen so far.  */
 static Hash_table *pattern_table;
@@ -160,16 +160,16 @@ compare_patterns (void const *a, void const *b)
    sequence of patterns with no duplicates; SIZE is the total number
    of bytes in KEYS.  If some patterns past the first DUPFREE_SIZE
    bytes are not duplicates, update PATLOCS accordingly.  */
-static ptrdiff_t
-update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size,
+static idx_t
+update_patterns (char *keys, idx_t dupfree_size, idx_t size,
                  char const *filename)
 {
   char *dst = keys + dupfree_size;
-  ptrdiff_t fileline = 1;
+  idx_t fileline = 1;
   int prev_inserted = 0;
 
   char const *srclim = keys + size;
-  ptrdiff_t patsize;
+  idx_t patsize;
   for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
     {
       char const *patend = rawmemchr (src, '\n');
@@ -190,8 +190,8 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, 
ptrdiff_t size,
           if (!prev_inserted)
             {
               if (patlocs_used == patlocs_allocated)
-                patloc = x2nrealloc (patloc, &patlocs_allocated,
-                                     sizeof *patloc);
+                patloc = xpalloc (patloc, &patlocs_allocated, 1, -1,
+                                  sizeof *patloc);
               patloc[patlocs_used++]
                 = (struct patloc) { .lineno = n_patterns,
                                     .filename = filename,
@@ -213,9 +213,9 @@ update_patterns (char *keys, ptrdiff_t dupfree_size, 
ptrdiff_t size,
    Set *NEW_LINENO to the origin-1 line number of PATTERN in the file,
    or to an unspecified value if PATTERN came from the command line.  */
 char const * _GL_ATTRIBUTE_PURE
-pattern_file_name (size_t lineno, size_t *new_lineno)
+pattern_file_name (idx_t lineno, idx_t *new_lineno)
 {
-  ptrdiff_t i;
+  idx_t i;
   for (i = 1; i < patlocs_used; i++)
     if (lineno < patloc[i].lineno)
       break;
@@ -227,7 +227,7 @@ pattern_file_name (size_t lineno, size_t *new_lineno)
 /* Record the starting address and length of the sole poisoned region,
    so that we can unpoison it later, just before each following read.  */
 static void const *poison_buf;
-static size_t poison_len;
+static idx_t poison_len;
 
 static void
 clear_asan_poison (void)
@@ -237,7 +237,7 @@ clear_asan_poison (void)
 }
 
 static void
-asan_poison (void const *addr, size_t size)
+asan_poison (void const *addr, idx_t size)
 {
   poison_buf = addr;
   poison_len = size;
@@ -246,7 +246,7 @@ asan_poison (void const *addr, size_t size)
 }
 #else
 static void clear_asan_poison (void) { }
-static void asan_poison (void const volatile *addr, size_t size) { }
+static void asan_poison (void const volatile *addr, idx_t size) { }
 #endif
 
 /* The group separator used when context is requested. */
@@ -467,7 +467,7 @@ printf_errno (char const *format, ...)
 }
 
 static void
-fwrite_errno (void const *ptr, size_t size, size_t nmemb)
+fwrite_errno (void const *ptr, idx_t size, idx_t nmemb)
 {
   if (fwrite (ptr, size, nmemb, stdout) != nmemb)
     stdout_errno = errno;
@@ -644,9 +644,9 @@ static bool seek_failed;
 static bool seek_data_failed;
 
 /* Functions we'll use to search. */
-typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t, bool);
-typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *,
-                                char const *);
+typedef void *(*compile_fp_t) (char *, idx_t, reg_syntax_t, bool);
+typedef ptrdiff_t (*execute_fp_t) (void *, char const *, idx_t, idx_t *,
+                                   char const *);
 static execute_fp_t execute;
 static void *compiled_pattern;
 
@@ -694,6 +694,7 @@ clean_up_stdout (void)
 /* An unsigned type suitable for fast matching.  */
 typedef uintmax_t uword;
 static uword const uword_max = UINTMAX_MAX;
+enum { uword_size = sizeof (uword) }; /* For when a signed size is wanted.  */
 
 struct localeinfo localeinfo;
 
@@ -742,7 +743,7 @@ skip_easy_bytes (char const *buf)
      the buffer end, but that's benign.  */
   char const *p;
   uword const *s;
-  for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
+  for (p = buf; (uintptr_t) p % uword_size != 0; p++)
     if (to_uchar (*p) & unibyte_mask)
       return p;
   for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++)
@@ -753,22 +754,22 @@ skip_easy_bytes (char const *buf)
 }
 
 /* Return true if BUF, of size SIZE, has an encoding error.
-   BUF must be followed by at least sizeof (uword) bytes,
+   BUF must be followed by at least uword_size bytes,
    the first of which may be modified.  */
 static bool
-buf_has_encoding_errors (char *buf, size_t size)
+buf_has_encoding_errors (char *buf, idx_t size)
 {
   if (! unibyte_mask)
     return false;
 
   mbstate_t mbs = { 0 };
-  size_t clen;
+  ptrdiff_t clen;
 
   buf[size] = -1;
   for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
     {
-      clen = mbrlen (p, buf + size - p, &mbs);
-      if (MB_LEN_MAX < clen)
+      clen = imbrlen (p, buf + size - p, &mbs);
+      if (clen < 0)
         return true;
     }
 
@@ -780,7 +781,7 @@ buf_has_encoding_errors (char *buf, size_t size)
    BUF must be followed by at least one byte,
    which may be arbitrarily written to or read from.  */
 static bool
-buf_has_nulls (char *buf, size_t size)
+buf_has_nulls (char *buf, idx_t size)
 {
   buf[size] = 0;
   return strlen (buf) != size;
@@ -790,7 +791,7 @@ buf_has_nulls (char *buf, size_t size)
    SIZE bytes have already been read from the file
    with descriptor FD and status ST.  */
 static bool
-file_must_have_nulls (size_t size, int fd, struct stat const *st)
+file_must_have_nulls (idx_t size, int fd, struct stat const *st)
 {
   /* If the file has holes, it must contain a null byte somewhere.  */
   if (SEEK_HOLE != SEEK_SET && !seek_failed
@@ -869,18 +870,18 @@ skipped_file (char const *name, bool command_line, bool 
is_dir)
    page size, unless a read yields a partial page.  */
 
 static char *buffer;           /* Base of buffer. */
-static size_t bufalloc;                /* Allocated buffer size, counting 
slop. */
+static idx_t bufalloc;         /* Allocated buffer size, counting slop. */
 static int bufdesc;            /* File descriptor. */
 static char *bufbeg;           /* Beginning of user-visible stuff. */
 static char *buflim;           /* Limit of user-visible stuff. */
-static size_t pagesize;                /* alignment of memory pages */
+static idx_t pagesize;         /* alignment of memory pages */
 static off_t bufoffset;                /* Read offset.  */
 static off_t after_last_match; /* Pointer after last matching line that
                                    would have been output if we were
                                    outputting characters. */
 static bool skip_nuls;         /* Skip '\0' in data.  */
 static bool skip_empty_lines;  /* Skip empty lines in data.  */
-static uintmax_t totalnl;      /* Total newline count before lastnl. */
+static intmax_t totalnl;       /* Total newline count before lastnl. */
 
 /* Initial buffer size, not counting slop. */
 enum { INITIAL_BUFSIZE = 96 * 1024 };
@@ -894,18 +895,18 @@ enum { INITIAL_BUFSIZE = 96 * 1024 };
 
 /* Add two numbers that count input bytes or lines, and report an
    error if the addition overflows.  */
-static uintmax_t
-add_count (uintmax_t a, uintmax_t b)
+static intmax_t
+add_count (intmax_t a, idx_t b)
 {
-  uintmax_t sum = a + b;
-  if (sum < a)
+  intmax_t sum;
+  if (!INT_ADD_OK (a, b, &sum))
     die (EXIT_TROUBLE, 0, _("input is too large to count"));
   return sum;
 }
 
 /* Return true if BUF (of size SIZE) is all zeros.  */
 static bool
-all_zeros (char const *buf, size_t size)
+all_zeros (char const *buf, idx_t size)
 {
   for (char const *p = buf; p < buf + size; p++)
     if (*p)
@@ -944,55 +945,55 @@ reset (int fd, struct stat const *st)
    to the beginning of the buffer contents, and 'buflim'
    points just after the end.  Return false if there's an error.  */
 static bool
-fillbuf (size_t save, struct stat const *st)
+fillbuf (idx_t save, struct stat const *st)
 {
-  size_t fillsize;
-  bool cc = true;
   char *readbuf;
-  size_t readsize;
 
-  if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
+  /* After BUFLIM, we need room for at least a page of data plus a
+     trailing uword.  */
+  idx_t min_after_buflim = pagesize + uword_size;
+
+  if (min_after_buflim <= buffer + bufalloc - buflim)
     readbuf = buflim;
   else
     {
-      size_t minsize = save + pagesize;
-      size_t newsize;
-      size_t newalloc;
       char *newbuf;
 
-      /* Grow newsize until it is at least as great as minsize.  */
-      for (newsize = bufalloc - pagesize - sizeof (uword);
-           newsize < minsize;
-           newsize *= 2)
-        if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
-          xalloc_die ();
-
-      /* Try not to allocate more memory than the file size indicates,
-         as that might cause unnecessary memory exhaustion if the file
-         is large.  However, do not use the original file size as a
-         heuristic if we've already read past the file end, as most
-         likely the file is growing.  */
-      if (usable_st_size (st))
-        {
-          off_t to_be_read = st->st_size - bufoffset;
-          off_t maxsize_off = save + to_be_read;
-          if (0 <= to_be_read && to_be_read <= maxsize_off
-              && maxsize_off == (size_t) maxsize_off
-              && minsize <= (size_t) maxsize_off
-              && (size_t) maxsize_off < newsize)
-            newsize = maxsize_off;
-        }
+      /* For data to be searched we need room for the saved bytes,
+         plus at least a page of data to read.  */
+      idx_t minsize = save + pagesize;
 
       /* Add enough room so that the buffer is aligned and has room
          for byte sentinels fore and aft, and so that a uword can
          be read aft.  */
-      newalloc = newsize + pagesize + sizeof (uword);
+      ptrdiff_t incr_min = minsize - bufalloc + min_after_buflim;
+
+      if (incr_min <= 0)
+        newbuf = buffer;
+      else
+        {
+          /* Try not to allocate more memory than the file size indicates,
+             as that might cause unnecessary memory exhaustion if the file
+             is large.  However, do not use the original file size as a
+             heuristic if we've already read past the file end, as most
+             likely the file is growing.  */
+          ptrdiff_t alloc_max = -1;
+          if (usable_st_size (st))
+            {
+              off_t to_be_read = st->st_size - bufoffset;
+              ptrdiff_t a;
+              if (0 <= to_be_read
+                  && INT_ADD_OK (to_be_read, save + min_after_buflim, &a))
+                alloc_max = a;
+            }
+
+          newbuf = xpalloc (NULL, &bufalloc, incr_min, alloc_max, 1);
+        }
 
-      newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
       readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
-      size_t moved = save + 1;  /* Move the preceding byte sentinel too.  */
+      idx_t moved = save + 1;  /* Move the preceding byte sentinel too.  */
       memmove (readbuf - moved, buflim - moved, moved);
-      if (newbuf != buffer)
+      if (0 < incr_min)
         {
           free (buffer);
           buffer = newbuf;
@@ -1003,9 +1004,12 @@ fillbuf (size_t save, struct stat const *st)
 
   clear_asan_poison ();
 
-  readsize = buffer + bufalloc - sizeof (uword) - readbuf;
+  idx_t readsize = buffer + bufalloc - uword_size - readbuf;
   readsize -= readsize % pagesize;
 
+  idx_t fillsize;
+  bool cc = true;
+
   while (true)
     {
       fillsize = safe_read (bufdesc, readbuf, readsize);
@@ -1043,12 +1047,11 @@ fillbuf (size_t save, struct stat const *st)
   /* Initialize the following word, because skip_easy_bytes and some
      matchers read (but do not use) those bytes.  This avoids false
      positive reports of these bytes being used uninitialized.  */
-  memset (buflim, 0, sizeof (uword));
+  memset (buflim, 0, uword_size);
 
   /* Mark the part of the buffer not filled by the read or set by
      the above memset call as ASAN-poisoned.  */
-  asan_poison (buflim + sizeof (uword),
-               bufalloc - (buflim - buffer) - sizeof (uword));
+  asan_poison (buflim + uword_size, bufalloc - (buflim - buffer) - uword_size);
 
   return cc;
 }
@@ -1089,7 +1092,7 @@ static char *label = NULL;      /* Fake filename for 
stdin */
 
 
 /* Internal variables to keep track of byte count, context, etc. */
-static uintmax_t totalcc;      /* Total character count before bufbeg. */
+static intmax_t totalcc;       /* Total character count before bufbeg. */
 static char const *lastnl;     /* Pointer after last newline counted. */
 static char *lastout;          /* Pointer after last character output;
                                    NULL if no character has been output
@@ -1105,7 +1108,7 @@ static bool binary;               /* Use binary rather 
than text I/O.  */
 static void
 nlscan (char const *lim)
 {
-  size_t newlines = 0;
+  idx_t newlines = 0;
   for (char const *beg = lastnl; beg < lim; beg++)
     {
       beg = memchr (beg, eolbyte, lim - beg);
@@ -1137,16 +1140,16 @@ print_sep (char sep)
 
 /* Print a line number or a byte offset.  */
 static void
-print_offset (uintmax_t pos, const char *color)
+print_offset (intmax_t pos, const char *color)
 {
   pr_sgr_start_if (color);
-  printf_errno ("%*"PRIuMAX, offset_width, pos);
+  printf_errno ("%*"PRIdMAX, offset_width, pos);
   pr_sgr_end_if (color);
 }
 
 /* Print a whole line head (filename, line, byte).  The output data
    starts at BEG and contains LEN bytes; it is followed by at least
-   sizeof (uword) bytes, the first of which may be temporarily modified.
+   uword_size bytes, the first of which may be temporarily modified.
    The output data comes from what is perhaps a larger input line that
    goes until LIM, where LIM[-1] is an end-of-line byte.  Use SEP as
    the separator on output.
@@ -1154,7 +1157,7 @@ print_offset (uintmax_t pos, const char *color)
    Return true unless the line was suppressed due to an encoding error.  */
 
 static bool
-print_line_head (char *beg, size_t len, char const *lim, char sep)
+print_line_head (char *beg, idx_t len, char const *lim, char sep)
 {
   if (binary_files != TEXT_BINARY_FILES)
     {
@@ -1191,7 +1194,7 @@ print_line_head (char *beg, size_t len, char const *lim, 
char sep)
 
   if (out_byte)
     {
-      uintmax_t pos = add_count (totalcc, beg - bufbeg);
+      intmax_t pos = add_count (totalcc, beg - bufbeg);
       print_offset (pos, byte_num_color);
       print_sep (sep);
     }
@@ -1206,16 +1209,16 @@ static char *
 print_line_middle (char *beg, char *lim,
                    const char *line_color, const char *match_color)
 {
-  size_t match_size;
-  size_t match_offset;
+  idx_t match_size;
+  ptrdiff_t match_offset;
   char *cur;
   char *mid = NULL;
   char *b;
 
   for (cur = beg;
        (cur < lim
-        && ((match_offset = execute (compiled_pattern, beg, lim - beg,
-                                     &match_size, cur)) != (size_t) -1));
+        && 0 <= (match_offset = execute (compiled_pattern, beg, lim - beg,
+                                         &match_size, cur)));
        cur = b + match_size)
     {
       b = beg + match_offset;
@@ -1273,8 +1276,8 @@ print_line_middle (char *beg, char *lim,
 static char *
 print_line_tail (char *beg, const char *lim, const char *line_color)
 {
-  size_t eol_size;
-  size_t tail_size;
+  idx_t eol_size;
+  idx_t tail_size;
 
   eol_size   = (lim > beg && lim[-1] == eolbyte);
   eol_size  += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
@@ -1462,10 +1465,10 @@ grepbuf (char *beg, char const *lim)
 
   for (char *p = beg; p < lim; p = endp)
     {
-      size_t match_size;
-      size_t match_offset = execute (compiled_pattern, p, lim - p,
-                                     &match_size, NULL);
-      if (match_offset == (size_t) -1)
+      idx_t match_size;
+      ptrdiff_t match_offset = execute (compiled_pattern, p, lim - p,
+                                        &match_size, NULL);
+      if (match_offset < 0)
         {
           if (!out_invert)
             break;
@@ -1500,7 +1503,7 @@ static intmax_t
 grep (int fd, struct stat const *st, bool *ineof)
 {
   intmax_t nlines, i;
-  size_t residue, save;
+  idx_t residue, save;
   char oldc;
   char *beg;
   char *lim;
@@ -1540,8 +1543,8 @@ grep (int fd, struct stat const *st, bool *ineof)
   if (align_tabs)
     {
       /* Width is log of maximum number.  Line numbers are origin-1.  */
-      uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX;
-      num += out_line && num < UINTMAX_MAX;
+      intmax_t num = usable_st_size (st) ? st->st_size : INTMAX_MAX;
+      num += out_line && num < INTMAX_MAX;
       do
         offset_width++;
       while ((num /= 10) != 0);
@@ -2231,15 +2234,15 @@ parse_grep_colors (void)
 
 /* Return true if PAT (of length PATLEN) contains an encoding error.  */
 static bool
-contains_encoding_error (char const *pat, size_t patlen)
+contains_encoding_error (char const *pat, idx_t patlen)
 {
   mbstate_t mbs = { 0 };
-  size_t charlen;
+  ptrdiff_t charlen;
 
-  for (size_t i = 0; i < patlen; i += charlen)
+  for (idx_t i = 0; i < patlen; i += charlen)
     {
       charlen = mb_clen (pat + i, patlen - i, &mbs);
-      if (MB_LEN_MAX < charlen)
+      if (charlen < 0)
         return true;
     }
   return false;
@@ -2279,8 +2282,8 @@ setup_ok_fold (void)
    Fcompile cannot handle it.  MBS is the multibyte conversion state.
    PATLEN must be nonzero.  */
 
-static int
-fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
+static ptrdiff_t
+fgrep_icase_charlen (char const *pat, idx_t patlen, mbstate_t *mbs)
 {
   unsigned char pat0 = pat[0];
 
@@ -2302,7 +2305,7 @@ fgrep_icase_charlen (char const *pat, size_t patlen, 
mbstate_t *mbs)
   wchar_t folded[CASE_FOLDED_BUFSIZE];
   if (case_folded_counterparts (wc, folded))
     return -1;
-  for (int i = wn; 0 < --i; )
+  for (idx_t i = wn; 0 < --i; )
     {
       unsigned char c = pat[i];
       if (toupper (c) != c)
@@ -2317,11 +2320,11 @@ fgrep_icase_charlen (char const *pat, size_t patlen, 
mbstate_t *mbs)
    and so can be processed by Fcompile.  */
 
 static bool
-fgrep_icase_available (char const *pat, size_t patlen)
+fgrep_icase_available (char const *pat, idx_t patlen)
 {
   mbstate_t mbs = {0,};
 
-  for (size_t i = 0; i < patlen; )
+  for (idx_t i = 0; i < patlen; )
     {
       int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs);
       if (n < 0)
@@ -2335,28 +2338,27 @@ fgrep_icase_available (char const *pat, size_t patlen)
 /* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style.  */
 
 void
-fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
+fgrep_to_grep_pattern (char **keys_p, idx_t *len_p)
 {
-  size_t len = *len_p;
+  idx_t len = *len_p;
   char *keys = *keys_p;
   mbstate_t mb_state = { 0 };
   char *new_keys = xnmalloc (len + 1, 2);
   char *p = new_keys;
-  size_t n;
 
-  for (; len; keys += n, len -= n)
+  for (ptrdiff_t n; len; keys += n, len -= n)
     {
       n = mb_clen (keys, len, &mb_state);
       switch (n)
         {
-        case (size_t) -2:
+        case -2:
           n = len;
           FALLTHROUGH;
         default:
           p = mempcpy (p, keys, n);
           break;
 
-        case (size_t) -1:
+        case -1:
           memset (&mb_state, 0, sizeof mb_state);
           n = 1;
           FALLTHROUGH;
@@ -2385,11 +2387,11 @@ fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
    to the -F pattern "a".  */
 
 static int
-try_fgrep_pattern (int matcher, char *keys, size_t *len_p)
+try_fgrep_pattern (int matcher, char *keys, idx_t *len_p)
 {
   int result = matcher;
-  size_t len = *len_p;
-  char *new_keys = xmalloc (len + 1);
+  idx_t len = *len_p;
+  char *new_keys = ximalloc (len + 1);
   char *p = new_keys;
   char const *q = keys;
   mbstate_t mb_state = { 0 };
@@ -2434,26 +2436,14 @@ try_fgrep_pattern (int matcher, char *keys, size_t 
*len_p)
           break;
         }
 
-      {
-        size_t n;
-        if (match_icase)
-          {
-            int ni = fgrep_icase_charlen (q, len, &mb_state);
-            if (ni < 0)
-              goto fail;
-            n = ni;
-          }
-        else
-          {
-            n = mb_clen (q, len, &mb_state);
-            if (MB_LEN_MAX < n)
-              goto fail;
-          }
-
-        p = mempcpy (p, q, n);
-        q += n;
-        len -= n;
-      }
+      ptrdiff_t clen = (match_icase
+                        ? fgrep_icase_charlen (q, len, &mb_state)
+                        : mb_clen (q, len, &mb_state));
+      if (clen < 0)
+        goto fail;
+      p = mempcpy (p, q, clen);
+      q += clen;
+      len -= clen;
     }
 
   if (*len_p != p - new_keys)
@@ -2473,7 +2463,7 @@ int
 main (int argc, char **argv)
 {
   char *keys = NULL;
-  size_t keycc = 0, keyalloc = 0;
+  idx_t keycc = 0, keyalloc = 0;
   int matcher = -1;
   int opt;
   int prev_optind, last_recursive;
@@ -2612,12 +2602,10 @@ main (int argc, char **argv)
 
       case 'e':
         {
-          ptrdiff_t cc = strlen (optarg);
-          if (keyalloc < keycc + cc + 1)
-            {
-              keyalloc = keycc + cc + 1;
-              pattern_array = keys = x2realloc (keys, &keyalloc);
-            }
+          idx_t cc = strlen (optarg);
+          ptrdiff_t shortage = keycc - keyalloc + cc + 1;
+          if (0 < shortage)
+            pattern_array = keys = xpalloc (keys, &keyalloc, shortage, -1, 1);
           char *keyend = mempcpy (keys + keycc, optarg, cc);
           *keyend = '\n';
           keycc = update_patterns (keys, keycc, keycc + cc + 1, "");
@@ -2638,11 +2626,13 @@ main (int argc, char **argv)
               if (!fp)
                 die (EXIT_TROUBLE, errno, "%s", optarg);
             }
-          ptrdiff_t newkeycc = keycc, cc;
+          idx_t newkeycc = keycc, cc;
           for (;; newkeycc += cc)
             {
-              if (keyalloc <= newkeycc + 1)
-                pattern_array = keys = x2realloc (keys, &keyalloc);
+              ptrdiff_t shortage = newkeycc - keyalloc + 2;
+              if (0 < shortage)
+                pattern_array = keys = xpalloc (keys, &keyalloc,
+                                                shortage, -1, 1);
               cc = fread (keys + newkeycc, 1, keyalloc - (newkeycc + 1), fp);
               if (cc == 0)
                 break;
@@ -2861,7 +2851,7 @@ main (int argc, char **argv)
     {
       /* Make a copy so that it can be reallocated or freed later.  */
       pattern_array = keys = xstrdup (argv[optind++]);
-      ptrdiff_t patlen = strlen (keys);
+      idx_t patlen = strlen (keys);
       keys[patlen] = '\n';
       keycc = update_patterns (keys, 0, patlen + 1, "");
     }
@@ -2968,7 +2958,7 @@ main (int argc, char **argv)
                                only_matching | color_option);
   /* We need one byte prior and one after.  */
   char eolbytes[3] = { 0, eolbyte, 0 };
-  size_t match_size;
+  idx_t match_size;
   skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1,
                                 &match_size, NULL) == 0)
                       == out_invert);
@@ -2987,11 +2977,11 @@ main (int argc, char **argv)
 #else
   long psize = getpagesize ();
 #endif
-  if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2))
+  if (! (0 < psize && psize <= (IDX_MAX - uword_size) / 2))
     abort ();
   pagesize = psize;
-  bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword);
-  buffer = xmalloc (bufalloc);
+  bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + uword_size;
+  buffer = ximalloc (bufalloc);
 
   if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
     devices = READ_DEVICES;
diff --git a/src/grep.h b/src/grep.h
index a3cd73e..04c15dd 100644
--- a/src/grep.h
+++ b/src/grep.h
@@ -21,6 +21,7 @@
 #define GREP_GREP_H 1
 
 #include <stdbool.h>
+#include <idx.h>
 
 /* The following flags are exported from grep for the matchers
    to look at. */
@@ -29,6 +30,6 @@ extern bool match_words;      /* -w */
 extern bool match_lines;       /* -x */
 extern char eolbyte;           /* -z */
 
-extern char const *pattern_file_name (size_t, size_t *);
+extern char const *pattern_file_name (idx_t, idx_t *);
 
 #endif
diff --git a/src/kwsearch.c b/src/kwsearch.c
index ea18ce1..171db9a 100644
--- a/src/kwsearch.c
+++ b/src/kwsearch.c
@@ -32,11 +32,11 @@ struct kwsearch
      'kwswords (kwset)' when some extra one-character words have been
      appended, one for each troublesome character that will require a
      DFA search.  */
-  ptrdiff_t words;
+  idx_t words;
 
   /* The user's pattern and its size in bytes.  */
   char *pattern;
-  size_t size;
+  idx_t size;
 
   /* The user's pattern compiled as a regular expression,
      or null if it has not been compiled.  */
@@ -47,11 +47,11 @@ struct kwsearch
    followed by '\n'.  Return a description of the compiled pattern.  */
 
 void *
-Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
+Fcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
 {
   kwset_t kwset;
   char *buf = NULL;
-  size_t bufalloc = 0;
+  idx_t bufalloc = 0;
 
   kwset = kwsinit (true);
 
@@ -59,7 +59,7 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, 
bool exact)
   do
     {
       char const *sep = rawmemchr (p, '\n');
-      ptrdiff_t len = sep - p;
+      idx_t len = sep - p;
 
       if (match_lines)
         {
@@ -70,8 +70,8 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, 
bool exact)
               if (bufalloc < len + 2)
                 {
                   free (buf);
-                  bufalloc = len + 2;
-                  buf = x2realloc (NULL, &bufalloc);
+                  bufalloc = len;
+                  buf = xpalloc (NULL, &bufalloc, 2, -1, 1);
                   buf[0] = eolbyte;
                 }
               memcpy (buf + 1, p, len);
@@ -88,7 +88,7 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, 
bool exact)
 
   free (buf);
 
-  ptrdiff_t words = kwswords (kwset);
+  idx_t words = kwswords (kwset);
   kwsprep (kwset);
 
   struct kwsearch *kwsearch = xmalloc (sizeof *kwsearch);
@@ -102,14 +102,14 @@ Fcompile (char *pattern, size_t size, reg_syntax_t 
ignored, bool exact)
 
 /* Use the compiled pattern VCP to search the buffer BUF of size SIZE.
    If found, return the offset of the first match and store its
-   size into *MATCH_SIZE.  If not found, return SIZE_MAX.
+   size into *MATCH_SIZE.  If not found, return -1.
    If START_PTR is nonnull, start searching there.  */
-size_t
-Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
+ptrdiff_t
+Fexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
           char const *start_ptr)
 {
   char const *beg, *end, *mb_start;
-  ptrdiff_t len;
+  idx_t len;
   char eol = eolbyte;
   struct kwsearch *kwsearch = vcp;
   kwset_t kwset = kwsearch->kwset;
@@ -126,7 +126,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
         break;
       len = kwsmatch.size - 2 * match_lines;
 
-      size_t mbclen = 0;
+      idx_t mbclen = 0;
       if (mb_check
           && mb_goback (&mb_start, &mbclen, beg + offset, buf + size) != 0)
         {
@@ -198,8 +198,8 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t 
*match_size,
                 else
                   end = buf + size;
 
-                if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL)
-                    != (size_t) -1)
+                if (0 <= EGexecute (kwsearch->re, beg, end - beg,
+                                    match_size, NULL))
                   goto success_match_words;
                 beg = end - 1;
                 break;
diff --git a/src/kwset.c b/src/kwset.c
index e5ac1a9..329b802 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -59,31 +59,31 @@ struct tree
 struct trie
 {
   /* If an accepting node, this is either 2*W + 1 where W is the word
-     index, or is SIZE_MAX if Aho-Corasick is in use and FAIL
+     index, or is -1 if Aho-Corasick is in use and FAIL
      specifies where to look for more info.  If not an accepting node,
      this is zero.  */
-  size_t accepting;
+  ptrdiff_t accepting;
 
   struct tree *links;          /* Tree of edges leaving this node.  */
   struct trie *parent;         /* Parent of this node.  */
   struct trie *next;           /* List of all trie nodes in level order.  */
   struct trie *fail;           /* Aho-Corasick failure function.  */
-  ptrdiff_t depth;             /* Depth of this node from the root.  */
-  ptrdiff_t shift;             /* Shift function for search failures.  */
-  ptrdiff_t maxshift;          /* Max shift of self and descendants.  */
+  idx_t depth;                 /* Depth of this node from the root.  */
+  idx_t shift;                 /* Shift function for search failures.  */
+  idx_t maxshift;              /* Max shift of self and descendants.  */
 };
 
 /* Structure returned opaquely to the caller, containing everything.  */
 struct kwset
 {
   struct obstack obstack;      /* Obstack for node allocation.  */
-  ptrdiff_t words;             /* Number of words in the trie.  */
+  idx_t words;                 /* Number of words in the trie.  */
   struct trie *trie;           /* The trie itself.  */
-  ptrdiff_t mind;              /* Minimum depth of an accepting node.  */
+  idx_t mind;                  /* Minimum depth of an accepting node.  */
   unsigned char delta[NCHAR];  /* Delta table for rapid search.  */
   struct trie *next[NCHAR];    /* Table of children of the root.  */
   char *target;                        /* Target string if there's only one.  
*/
-  ptrdiff_t *shift;            /* Used in Boyer-Moore search for one
+  idx_t *shift;                        /* Used in Boyer-Moore search for one
                                    string.  */
   char const *trans;           /* Character translation table.  */
 
@@ -108,8 +108,7 @@ struct kwset
   char gc2;
 
   /* kwsexec implementation.  */
-  ptrdiff_t (*kwsexec) (kwset_t, char const *, ptrdiff_t,
-                        struct kwsmatch *, bool);
+  ptrdiff_t (*kwsexec) (kwset_t, char const *, idx_t, struct kwsmatch *, bool);
 };
 
 /* Use TRANS to transliterate C.  A null TRANS does no transliteration.  */
@@ -119,9 +118,9 @@ tr (char const *trans, char c)
   return trans ? trans[U(c)] : c;
 }
 
-static ptrdiff_t acexec (kwset_t, char const *, ptrdiff_t,
+static ptrdiff_t acexec (kwset_t, char const *, idx_t,
                          struct kwsmatch *, bool);
-static ptrdiff_t bmexec (kwset_t, char const *, ptrdiff_t,
+static ptrdiff_t bmexec (kwset_t, char const *, idx_t,
                          struct kwsmatch *, bool);
 
 /* Return a newly allocated keyword set.  A nonnull TRANS specifies a
@@ -142,7 +141,7 @@ kwsalloc (char const *trans)
   kwset->trie->fail = NULL;
   kwset->trie->depth = 0;
   kwset->trie->shift = 0;
-  kwset->mind = PTRDIFF_MAX;
+  kwset->mind = IDX_MAX;
   kwset->target = NULL;
   kwset->trans = trans;
   kwset->kwsexec = acexec;
@@ -156,7 +155,7 @@ enum { DEPTH_SIZE = CHAR_BIT + CHAR_BIT / 2 };
 
 /* Add the given string to the contents of the keyword set.  */
 void
-kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
+kwsincr (kwset_t kwset, char const *text, idx_t len)
 {
   assume (0 <= len);
   struct trie *trie = kwset->trie;
@@ -181,7 +180,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
       enum { L, R } dirs[DEPTH_SIZE];
       links[0] = (struct tree *) &trie->links;
       dirs[0] = L;
-      ptrdiff_t depth = 1;
+      idx_t depth = 1;
 
       while (cur && label != cur->label)
         {
@@ -292,10 +291,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
   /* Mark the node finally reached as accepting, encoding the
      index number of this word in the keyword set so far.  */
   if (!trie->accepting)
-    {
-      size_t words = kwset->words;
-      trie->accepting = 2 * words + 1;
-    }
+    trie->accepting = 2 * kwset->words + 1;
   ++kwset->words;
 
   /* Keep track of the longest and shortest string of the keyword set.  */
@@ -303,7 +299,7 @@ kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
     kwset->mind = trie->depth;
 }
 
-ptrdiff_t
+idx_t
 kwswords (kwset_t kwset)
 {
   return kwset->words;
@@ -350,7 +346,7 @@ treefails (struct tree const *tree, struct trie const *fail,
         {
           tree->trie->fail = cur->trie;
           if (!reverse && cur->trie->accepting && !tree->trie->accepting)
-            tree->trie->accepting = SIZE_MAX;
+            tree->trie->accepting = -1;
           return;
         }
       fail = fail->fail;
@@ -362,7 +358,7 @@ treefails (struct tree const *tree, struct trie const *fail,
 /* Set delta entries for the links of the given tree such that
    the preexisting delta value is larger than the current depth.  */
 static void
-treedelta (struct tree const *tree, ptrdiff_t depth, unsigned char delta[])
+treedelta (struct tree const *tree, idx_t depth, unsigned char delta[])
 {
   if (!tree)
     return;
@@ -407,7 +403,6 @@ void
 kwsprep (kwset_t kwset)
 {
   char const *trans = kwset->trans;
-  ptrdiff_t i;
   unsigned char deltabuf[NCHAR];
   unsigned char *delta = trans ? deltabuf : kwset->delta;
   struct trie *curr, *last;
@@ -425,7 +420,8 @@ kwsprep (kwset_t kwset)
 
       /* Looking for just one string.  Extract it from the trie.  */
       kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
-      for (i = 0, curr = kwset->trie; i < kwset->mind; ++i)
+      curr = kwset->trie;
+      for (idx_t i = 0; i < kwset->mind; i++)
         {
           kwset->target[i] = curr->links->label;
           curr = curr->next;
@@ -504,7 +500,7 @@ kwsprep (kwset_t kwset)
   treenext (kwset->trie->links, next);
   int gc1 = -2;
   int gc1help = -1;
-  for (i = 0; i < NCHAR; i++)
+  for (int i = 0; i < NCHAR; i++)
     {
       int ti = i;
       if (trans)
@@ -534,9 +530,10 @@ kwsprep (kwset_t kwset)
     {
       /* Looking for just one string.  Extract it from the trie.  */
       kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
-      for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
+      curr = kwset->trie;
+      for (idx_t i = kwset->mind; 0 < i; i--)
         {
-          kwset->target[i] = curr->links->label;
+          kwset->target[i - 1] = curr->links->label;
           curr = curr->next;
         }
 
@@ -547,7 +544,8 @@ kwsprep (kwset_t kwset)
           kwset->shift
             = obstack_alloc (&kwset->obstack,
                              sizeof *kwset->shift * (kwset->mind - 1));
-          for (i = 0, curr = kwset->trie->next; i < kwset->mind - 1; ++i)
+          curr = kwset->trie->next;
+          for (idx_t i = 0; i < kwset->mind - 1; i++)
             {
               kwset->shift[i] = curr->shift;
               curr = curr->next;
@@ -560,7 +558,7 @@ kwsprep (kwset_t kwset)
 
   /* Fix things up for any translation table.  */
   if (trans)
-    for (i = 0; i < NCHAR; ++i)
+    for (int i = 0; i < NCHAR; ++i)
       kwset->delta[i] = delta[U(trans[i])];
 }
 
@@ -574,16 +572,16 @@ kwsprep (kwset_t kwset)
    when failing.  KWSET->shift says how much to shift.  */
 static inline bool
 bm_delta2_search (char const **tpp, char const *ep, char const *sp,
-                  ptrdiff_t len,
+                  idx_t len,
                   char const *trans, char gc1, char gc2,
                   unsigned char const *d1, kwset_t kwset)
 {
   char const *tp = *tpp;
-  ptrdiff_t d = len, skip = 0;
+  idx_t d = len, skip = 0;
 
   while (true)
     {
-      ptrdiff_t i = 2;
+      idx_t i = 2;
       if (tr (trans, tp[-2]) == gc2)
         {
           while (++i <= d)
@@ -622,7 +620,7 @@ bm_delta2_search (char const **tpp, char const *ep, char 
const *sp,
    that matches the terminal byte specified by KWSET, or NULL if there
    is no match.  KWSET->gc1 should be nonnegative.  */
 static char const *
-memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset)
+memchr_kwset (char const *s, idx_t n, kwset_t kwset)
 {
   char const *slim = s + n;
   if (kwset->gc1help < 0)
@@ -634,7 +632,7 @@ memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset)
   else
     {
       int small_heuristic = 2;
-      size_t small_bytes = small_heuristic * sizeof (unsigned long int);
+      idx_t small_bytes = small_heuristic * sizeof (unsigned long int);
       while (s < slim)
         {
           if (kwset->next[U(*s)])
@@ -649,13 +647,13 @@ memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset)
 
 /* Fast Boyer-Moore search (inlinable version).  */
 static inline ptrdiff_t _GL_ATTRIBUTE_PURE
-bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t size)
+bmexec_trans (kwset_t kwset, char const *text, idx_t size)
 {
   assume (0 <= size);
   unsigned char const *d1;
   char const *ep, *sp, *tp;
   int d;
-  ptrdiff_t len = kwset->mind;
+  idx_t len = kwset->mind;
   char const *trans = kwset->trans;
 
   if (len == 0)
@@ -675,8 +673,8 @@ bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
size)
   char gc2 = kwset->gc2;
 
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2).  */
-  ptrdiff_t len12;
-  if (!INT_MULTIPLY_WRAPV (len, 12, &len12) && len12 < size)
+  idx_t len12;
+  if (INT_MULTIPLY_OK (len, 12, &len12) && len12 < size)
     /* 11 is not a bug, the initial offset happens only once.  */
     for (ep = text + size - 11 * len; tp <= ep; )
       {
@@ -735,7 +733,7 @@ bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
size)
 
 /* Fast Boyer-Moore search.  */
 static ptrdiff_t
-bmexec (kwset_t kwset, char const *text, ptrdiff_t size,
+bmexec (kwset_t kwset, char const *text, idx_t size,
         struct kwsmatch *kwsmatch, bool longest)
 {
   /* Help the compiler inline in two ways, depending on whether
@@ -753,7 +751,7 @@ bmexec (kwset_t kwset, char const *text, ptrdiff_t size,
 /* Hairy multiple string search with the Aho-Corasick algorithm.
    (inlinable version)  */
 static inline ptrdiff_t
-acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len,
+acexec_trans (kwset_t kwset, char const *text, idx_t len,
               struct kwsmatch *kwsmatch, bool longest)
 {
   struct trie const *trie, *accept;
@@ -831,7 +829,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
len,
 
  match:
   accept = trie;
-  while (accept->accepting == SIZE_MAX)
+  while (accept->accepting < 0)
     accept = accept->fail;
   left = tp - accept->depth;
 
@@ -858,7 +856,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
len,
           if (trie->accepting)
             {
               accept1 = trie;
-              while (accept1->accepting == SIZE_MAX)
+              while (accept1->accepting < 0)
                 accept1 = accept1->fail;
               left1 = tp - accept1->depth;
               if (left1 <= left)
@@ -870,7 +868,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
len,
         }
     }
 
-  kwsmatch->index = accept->accepting / 2;
+  kwsmatch->index = accept->accepting >> 1;
   kwsmatch->offset = left - text;
   kwsmatch->size = accept->depth;
 
@@ -879,7 +877,7 @@ acexec_trans (kwset_t kwset, char const *text, ptrdiff_t 
len,
 
 /* Hairy multiple string search with Aho-Corasick algorithm.  */
 static ptrdiff_t
-acexec (kwset_t kwset, char const *text, ptrdiff_t size,
+acexec (kwset_t kwset, char const *text, idx_t size,
         struct kwsmatch *kwsmatch, bool longest)
 {
   assume (0 <= size);
@@ -898,7 +896,7 @@ acexec (kwset_t kwset, char const *text, ptrdiff_t size,
    value), and length.  If LONGEST, find the longest match; otherwise
    any match will do.  */
 ptrdiff_t
-kwsexec (kwset_t kwset, char const *text, ptrdiff_t size,
+kwsexec (kwset_t kwset, char const *text, idx_t size,
          struct kwsmatch *kwsmatch, bool longest)
 {
   return kwset->kwsexec (kwset, text, size, kwsmatch, longest);
diff --git a/src/kwset.h b/src/kwset.h
index 24e13e2..cb94cf4 100644
--- a/src/kwset.h
+++ b/src/kwset.h
@@ -22,23 +22,26 @@
 #include <stddef.h>
 #include <stdbool.h>
 
+#include <idx.h>
+
 struct kwsmatch
 {
-  ptrdiff_t index;     /* Index number of matching keyword.  */
-  ptrdiff_t offset;    /* Offset of match.  */
-  ptrdiff_t size;      /* Length of match.  */
+  idx_t index; /* Index number of matching keyword.  */
+  idx_t offset;        /* Offset of match.  */
+  idx_t size;  /* Length of match.  */
 };
 
-#include "arg-nonnull.h"
+#include <arg-nonnull.h>
+#include <idx.h>
 
 struct kwset;
 typedef struct kwset *kwset_t;
 
 extern kwset_t kwsalloc (char const *);
-extern void kwsincr (kwset_t, char const *, ptrdiff_t);
-extern ptrdiff_t kwswords (kwset_t) _GL_ATTRIBUTE_PURE;
+extern void kwsincr (kwset_t, char const *, idx_t);
+extern idx_t kwswords (kwset_t) _GL_ATTRIBUTE_PURE;
 extern void kwsprep (kwset_t);
-extern ptrdiff_t kwsexec (kwset_t, char const *, ptrdiff_t,
+extern ptrdiff_t kwsexec (kwset_t, char const *, idx_t,
                           struct kwsmatch *, bool)
   _GL_ARG_NONNULL ((4));
 extern void kwsfree (kwset_t);
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 37f7e40..3bdaee9 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -113,7 +113,7 @@ jit_exec (struct pcre_comp *pc, char const *subject, int 
search_bytes,
    followed by '\n'.  Return a description of the compiled pattern.  */
 
 void *
-Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
+Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
 {
   int e;
   char const *ep;
@@ -202,8 +202,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, 
bool exact)
   return pc;
 }
 
-size_t
-Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
+ptrdiff_t
+Pexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
           char const *start_ptr)
 {
   int sub[NSUB];
diff --git a/src/search.h b/src/search.h
index 6a5814a..acc282c 100644
--- a/src/search.h
+++ b/src/search.h
@@ -48,38 +48,55 @@ typedef signed char mb_len_map_t;
 /* searchutils.c */
 extern void wordinit (void);
 extern kwset_t kwsinit (bool);
-extern size_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE;
-extern size_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE;
-extern size_t wordchar_prev (char const *, char const *, char const *)
+extern idx_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE;
+extern idx_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE;
+extern idx_t wordchar_prev (char const *, char const *, char const *)
   _GL_ATTRIBUTE_PURE;
-extern ptrdiff_t mb_goback (char const **, size_t *, char const *,
-                            char const *);
+extern ptrdiff_t mb_goback (char const **, idx_t *, char const *, char const 
*);
 
 /* dfasearch.c */
-extern void *GEAcompile (char *, size_t, reg_syntax_t, bool);
-extern size_t EGexecute (void *, char const *, size_t, size_t *, char const *);
+extern void *GEAcompile (char *, idx_t, reg_syntax_t, bool);
+extern ptrdiff_t EGexecute (void *, char const *, idx_t, idx_t *, char const 
*);
 
 /* kwsearch.c */
-extern void *Fcompile (char *, size_t, reg_syntax_t, bool);
-extern size_t Fexecute (void *, char const *, size_t, size_t *, char const *);
+extern void *Fcompile (char *, idx_t, reg_syntax_t, bool);
+extern ptrdiff_t Fexecute (void *, char const *, idx_t, idx_t *, char const *);
 
 /* pcresearch.c */
-extern void *Pcompile (char *, size_t, reg_syntax_t, bool);
-extern size_t Pexecute (void *, char const *, size_t, size_t *, char const *);
+extern void *Pcompile (char *, idx_t, reg_syntax_t, bool);
+extern ptrdiff_t Pexecute (void *, char const *, idx_t, idx_t *, char const *);
 
 /* grep.c */
 extern struct localeinfo localeinfo;
-extern void fgrep_to_grep_pattern (char **, size_t *);
+extern void fgrep_to_grep_pattern (char **, idx_t *);
+
+/* Return the number of bytes in the character at the start of S, which
+   is of size N.  N must be positive.  MBS is the conversion state.
+   This acts like mbrlen, except it returns -1 and -2 instead of
+   (size_t) -1 and (size_t) -2.  */
+SEARCH_INLINE ptrdiff_t
+imbrlen (char const *s, idx_t n, mbstate_t *mbs)
+{
+  size_t len = mbrlen (s, n, mbs);
+
+  /* Convert result to ptrdiff_t portably, even on oddball platforms.
+     When optimizing, this typically uses no machine instructions.  */
+  if (len <= MB_LEN_MAX)
+    return len;
+  ptrdiff_t neglen = -len;
+  return -neglen;
+}
 
 /* Return the number of bytes in the character at the start of S, which
    is of size N.  N must be positive.  MBS is the conversion state.
    This acts like mbrlen, except it returns 1 when mbrlen would return 0,
+   it returns -1 and -2 instead of (size_t) -1 and (size_t) -2,
    and it is typically faster because of the cache.  */
-SEARCH_INLINE size_t
-mb_clen (char const *s, size_t n, mbstate_t *mbs)
+SEARCH_INLINE ptrdiff_t
+mb_clen (char const *s, idx_t n, mbstate_t *mbs)
 {
   signed char len = localeinfo.sbclen[to_uchar (*s)];
-  return len == -2 ? mbrlen (s, n, mbs) : len;
+  return len == -2 ? imbrlen (s, n, mbs) : len;
 }
 
 extern char const *input_filename (void);
diff --git a/src/searchutils.c b/src/searchutils.c
index 0080dd7..ebc4a11 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -47,7 +47,7 @@ kwsinit (bool mb_trans)
 
   if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
     {
-      trans = xmalloc (NCHAR);
+      trans = ximalloc (NCHAR);
       /* If I is a single-byte character that becomes a different
          single-byte character when uppercased, set trans[I]
          to that character.  Otherwise, set trans[I] to I.  */
@@ -88,7 +88,7 @@ kwsinit (bool mb_trans)
 
    Treat encoding errors as if they were single-byte characters.  */
 ptrdiff_t
-mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
+mb_goback (char const **mb_start, idx_t *mbclen, char const *cur,
            char const *end)
 {
   const char *p = *mb_start;
@@ -114,8 +114,8 @@ mb_goback (char const **mb_start, size_t *mbclen, char 
const *cur,
               if (long_enough)
                 {
                   mbstate_t mbs = { 0 };
-                  size_t clen = mbrlen (cur - i, end - (cur - i), &mbs);
-                  if (clen <= MB_LEN_MAX)
+                  ptrdiff_t clen = imbrlen (cur - i, end - (cur - i), &mbs);
+                  if (0 <= clen)
                     {
                       /* This multibyte character contains *CUR.  */
                       p0 = cur - i;
@@ -130,13 +130,13 @@ mb_goback (char const **mb_start, size_t *mbclen, char 
const *cur,
       /* In non-UTF-8 encodings, to find character boundaries one must
          in general scan forward from the start of the buffer.  */
       mbstate_t mbs = { 0 };
-      size_t clen;
+      ptrdiff_t clen;
 
       do
         {
           clen = mb_clen (p, end - p, &mbs);
 
-          if (MB_LEN_MAX < clen)
+          if (clen < 0)
             {
               /* An invalid sequence, or a truncated multibyte character.
                  Treat it as a single byte character.  */
@@ -159,10 +159,10 @@ mb_goback (char const **mb_start, size_t *mbclen, char 
const *cur,
 /* Examine the start of BUF (which goes to END) for word constituents.
    If COUNTALL, examine as many as possible; otherwise, examine at most one.
    Return the total number of bytes in the examined characters.  */
-static size_t
+static idx_t
 wordchars_count (char const *buf, char const *end, bool countall)
 {
-  size_t n = 0;
+  idx_t n = 0;
   mbstate_t mbs = { 0 };
   while (n < end - buf)
     {
@@ -188,7 +188,7 @@ wordchars_count (char const *buf, char const *end, bool 
countall)
 /* Examine the start of BUF for the longest prefix containing just
    word constituents.  Return the total number of bytes in the prefix.
    The buffer ends at END.  */
-size_t
+idx_t
 wordchars_size (char const *buf, char const *end)
 {
   return wordchars_count (buf, end, true);
@@ -196,7 +196,7 @@ wordchars_size (char const *buf, char const *end)
 
 /* If BUF starts with a word constituent, return the number of bytes
    used to represent it; otherwise, return zero.  The buffer ends at END.  */
-size_t
+idx_t
 wordchar_next (char const *buf, char const *end)
 {
   return wordchars_count (buf, end, false);
@@ -205,7 +205,7 @@ wordchar_next (char const *buf, char const *end)
 /* In the buffer BUF, return nonzero if the character whose encoding
    contains the byte before CUR is a word constituent.  The buffer
    ends at END.  */
-size_t
+idx_t
 wordchar_prev (char const *buf, char const *cur, char const *end)
 {
   if (buf == cur)

-----------------------------------------------------------------------

Summary of changes:
 bootstrap.conf    |   1 +
 src/dfasearch.c   |  75 ++++++++-------
 src/grep.c        | 282 ++++++++++++++++++++++++++----------------------------
 src/grep.h        |   3 +-
 src/kwsearch.c    |  30 +++---
 src/kwset.c       |  88 +++++++++--------
 src/kwset.h       |  17 ++--
 src/pcresearch.c  |   6 +-
 src/search.h      |  47 ++++++---
 src/searchutils.c |  22 ++---
 10 files changed, 294 insertions(+), 277 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]