grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.24-14-gaafef7e


From: Paul Eggert
Subject: grep branch, master, updated. v2.24-14-gaafef7e
Date: Tue, 19 Apr 2016 15:55:41 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  aafef7e2951c9f115b18022c72dc7ba5da147caa (commit)
      from  bed627fd8dc8192da1cf985253ded5ecffd253db (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=aafef7e2951c9f115b18022c72dc7ba5da147caa


commit aafef7e2951c9f115b18022c72dc7ba5da147caa
Author: Paul Eggert <address@hidden>
Date:   Tue Apr 19 08:54:32 2016 -0700

    dfa: remove dependency on btowc
    
    MirOS BSD btowc is a macro that (when GCC is being used) hardcodes
    btowc (0x80) == WEOF regardless of locale, which contradicts
    future POSIX in the C locale.  Instead of bothering to develop a
    Gnulib workaround for the btowc incompatibility, use mbrtowc,
    which we are using elsewhere and fixing anyway, and are caching so
    it is fast here.  Problem reported by Nelson H. F. Beebe via Jim
    Meyering in: http://bugs.gnu.org/23269#14
    * bootstrap.conf (gnulib_modules): Remove btowc.
    * src/dfa.c (struct dfa): Remove mbrtowc_cache member, replacing with ...
    (mbrtowc_cache): ... this new static var.  All uses changed.
    (dfambcache): Remove; now done by setsyntax.  Call removed.
    (is_valid_unibyte_character): Remove.
    (IS_WORD_CONSTITUENT): Remove this macro, replacing it with ...
    (unibyte_word_constituent): ... this new function.  It uses
    mbrtowc_cache rather than btowc.
    (dfasyntax): Initialize mbrtowc_cache before using it.

diff --git a/bootstrap.conf b/bootstrap.conf
index 3bff7c3..9e76131 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -27,7 +27,6 @@ alloca
 announce-gen
 argmatch
 binary-io
-btowc
 c-ctype
 closeout
 do-release-commit-and-tag
diff --git a/src/dfa.c b/src/dfa.c
index adc5de3..98ee4ac 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -350,11 +350,6 @@ struct dfa
    */
   int *multibyte_prop;
 
-  /* A table indexed by byte values that contains the corresponding wide
-     character (if any) for that byte.  WEOF means the byte is not a
-     valid single-byte character.  */
-  wint_t mbrtowc_cache[NOTCHAR];
-
   /* Array of the bracket expression in the DFA.  */
   struct mb_char_classes *mbcsets;
   size_t nmbcsets;
@@ -431,19 +426,10 @@ struct dfa
 
 static void regexp (void);
 
-static void
-dfambcache (struct dfa *d)
-{
-  int i;
-  for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
-    {
-      char c = i;
-      unsigned char uc = i;
-      mbstate_t s = { 0 };
-      wchar_t wc;
-      d->mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF;
-    }
-}
+/* A table indexed by byte values that contains the corresponding wide
+   character (if any) for that byte.  WEOF means the byte is not a
+   valid single-byte character.  */
+static wint_t mbrtowc_cache[NOTCHAR];
 
 /* Store into *PWC the result of converting the leading bytes of the
    multibyte buffer S of length N bytes, using the mbrtowc_cache in *D
@@ -466,7 +452,7 @@ static size_t
 mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d)
 {
   unsigned char uc = s[0];
-  wint_t wc = d->mbrtowc_cache[uc];
+  wint_t wc = mbrtowc_cache[uc];
 
   if (wc == WEOF)
     {
@@ -671,25 +657,18 @@ static charclass letters;
 /* Set of characters that are newline.  */
 static charclass newline;
 
-/* Add this to the test for whether a byte is word-constituent, since on
-   BSD-based systems, many values in the 128..255 range are classified as
-   alphabetic, while on glibc-based systems, they are not.  */
-#ifdef __GLIBC__
-# define is_valid_unibyte_character(c) 1
-#else
-# define is_valid_unibyte_character(c) (btowc (c) != WEOF)
-#endif
-
-/* C is a "word-constituent" byte.  */
-#define IS_WORD_CONSTITUENT(C) \
-  (is_valid_unibyte_character (C) && (isalnum (C) || (C) == '_'))
+static bool
+unibyte_word_constituent (unsigned char c)
+{
+  return mbrtowc_cache[c] != WEOF && (isalnum (c) || (c) == '_');
+}
 
 static int
 char_context (unsigned char c)
 {
   if (c == eolbyte)
     return CTX_NEWLINE;
-  if (IS_WORD_CONSTITUENT (c))
+  if (unibyte_word_constituent (c))
     return CTX_LETTER;
   return CTX_NONE;
 }
@@ -708,23 +687,29 @@ wchar_context (wint_t wc)
 void
 dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
 {
-  unsigned int i;
-
+  int i;
   syntax_bits_set = 1;
   syntax_bits = bits;
   case_fold = fold != 0;
   eolbyte = eol;
 
-  for (i = 0; i < NOTCHAR; ++i)
+  for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
     {
-      sbit[i] = char_context (i);
-      switch (sbit[i])
+      char c = i;
+      unsigned char uc = i;
+      mbstate_t s = { 0 };
+      wchar_t wc;
+      mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF;
+
+      /* Now that mbrtowc_cache[uc] is set, use it to calculate sbit.  */
+      sbit[uc] = char_context (uc);
+      switch (sbit[uc])
         {
         case CTX_LETTER:
-          setbit (i, letters);
+          setbit (uc, letters);
           break;
         case CTX_NEWLINE:
-          setbit (i, newline);
+          setbit (uc, newline);
           break;
         }
     }
@@ -1489,7 +1474,7 @@ lex (void)
             {
               zeroset (ccl);
               for (c2 = 0; c2 < NOTCHAR; ++c2)
-                if (IS_WORD_CONSTITUENT (c2))
+                if (unibyte_word_constituent (c2))
                   setbit (c2, ccl);
               if (c == 'W')
                 notset (ccl);
@@ -2714,7 +2699,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
         state_letter = state;
 
       for (i = 0; i < NOTCHAR; ++i)
-        trans[i] = (IS_WORD_CONSTITUENT (i)) ? state_letter : state;
+        trans[i] = unibyte_word_constituent (i) ? state_letter : state;
       trans[eolbyte] = state_newline;
     }
   else
@@ -2820,7 +2805,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
 
               if (c == eolbyte)
                 trans[c] = state_newline;
-              else if (IS_WORD_CONSTITUENT (c))
+              else if (unibyte_word_constituent (c))
                 trans[c] = state_letter;
               else if (c < NOTCHAR)
                 trans[c] = state;
@@ -3626,7 +3611,6 @@ void
 dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
 {
   dfainit (d);
-  dfambcache (d);
   dfaparse (s, len, d);
   dfassbuild (d);
 

-----------------------------------------------------------------------

Summary of changes:
 bootstrap.conf |    1 -
 src/dfa.c      |   70 +++++++++++++++++++++----------------------------------
 2 files changed, 27 insertions(+), 44 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]