bug-glibc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

glibc regex patch for compatibility support for stray '{'s in EREs


From: Paul Eggert
Subject: glibc regex patch for compatibility support for stray '{'s in EREs
Date: Wed, 21 Mar 2001 19:47:11 -0800 (PST)

> From: Ulrich Drepper <address@hidden>
> Date: 09 Feb 2001 00:29:57 -0800
> 
> > For example, we could add a new flag to regex.h that would yield the
> > desired behavior.  Would that be an acceptable compromise?
> 
> If there is another bit for the flag left (haven't checked): yes.

Another bit is available, so here's a patch to achieve the desired
behavior.  This patch relies on the overflow-checking patch I sent to
bug-glibc a few minutes ago.  I've checked these two patches with the
latest GNU grep alpha version.

(PS to alain: no change is needed to GNU grep, other than to slurp the
latest regex.c and regex.h after these patches are merged in.  I can
mail you the merged regex.c and regex.h separately if you like.)

2001-03-21  Paul Eggert  <address@hidden>

        * posix/regex.h (RE_INVALID_INTERVAL_ORD): New macro.
        (RE_SYNTAX_POSIX_EGREP): Use it.
        * posix/regex.c (regex_compile): Implement it.

===================================================================
RCS file: posix/regex.h,v
retrieving revision 1.20
retrieving revision 1.20.1.1
diff -pu -r1.20 -r1.20.1.1
--- posix/regex.h       2000/10/30 04:16:19     1.20
+++ posix/regex.h       2001/03/22 03:30:12     1.20.1.1
@@ -160,6 +160,11 @@ typedef unsigned long int reg_syntax_t;
    this bit set, and it won't affect anything in the normal case. */
 #define RE_DEBUG (RE_NO_GNU_OPS << 1)
 
+/* If this bit is set, a syntactically invalid interval is treated as
+   a string of ordinary characters.  For example, the ERE 'a{1' is
+   treated as 'a\{1'.  */
+#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
 /* This global variable defines the particular regexp syntax to use (for
    some interfaces).  When a regexp is compiled, the syntax used is
    stored in the pattern buffer, so changing this does not affect
@@ -199,7 +204,8 @@ extern reg_syntax_t re_syntax_options;
    | RE_NO_BK_VBAR)
 
 #define RE_SYNTAX_POSIX_EGREP                                          \
-  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES                    \
+   | RE_INVALID_INTERVAL_ORD)
 
 /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
 #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
===================================================================
RCS file: regex.c,v
retrieving revision 1.91.0.1
retrieving revision 1.91.0.2
diff -pu -r1.91.0.1 -r1.91.0.2
--- posix/regex.c       2001/03/22 02:42:43     1.91.0.1
+++ posix/regex.c       2001/03/22 03:33:10     1.91.0.2
@@ -2326,14 +2326,6 @@ regex_compile (pattern, size, syntax, bu
   /* Address of beginning of regexp, or inside of last group.  */
   US_CHAR_TYPE *begalt;
 
-  /* Place in the uncompiled pattern (i.e., the {) to
-     which to go back if the interval is invalid.  */
-#ifdef MBS_SUPPORT
-  const US_CHAR_TYPE *beg_interval;
-#else
-  const char *beg_interval;
-#endif /* MBS_SUPPORT */
-
   /* Address of the place where a forward jump should go to the end of
      the containing expression.  Each alternative of an `or' -- except the
      last -- ends with a forward jump of this sort.  */
@@ -3827,25 +3819,19 @@ regex_compile (pattern, size, syntax, bu
 
                 /* At least (most) this many matches must be made.  */
                 int lower_bound = -1, upper_bound = -1;
-                beg_interval = p - 1;
+
+               /* Place in the uncompiled pattern (i.e., just after
+                  the '{') to go back to if the interval is invalid.  */
+               const CHAR_TYPE *beg_interval = p;
 
                 if (p == pend)
-                  {
-                    if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
-                      goto unfetch_interval;
-                    else
-                      FREE_STACK_RETURN (REG_EBRACE);
-                  }
+                 goto invalid_interval;
 
                 GET_UNSIGNED_NUMBER (lower_bound);
 
                 if (c == ',')
                   {
                     GET_UNSIGNED_NUMBER (upper_bound);
-                   if ((!(syntax & RE_NO_BK_BRACES) && c != '\\')
-                       || ((syntax & RE_NO_BK_BRACES) && c != '}'))
-                     FREE_STACK_RETURN (REG_BADBR);
-
                    if (upper_bound < 0)
                      upper_bound = RE_DUP_MAX;
                   }
@@ -3853,36 +3839,24 @@ regex_compile (pattern, size, syntax, bu
                   /* Interval such as `{1}' => match exactly once. */
                   upper_bound = lower_bound;
 
-                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
-                    || lower_bound > upper_bound)
-                  {
-                    if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
-                      goto unfetch_interval;
-                    else
-                      FREE_STACK_RETURN (REG_BADBR);
-                  }
+                if (! (0 <= lower_bound && lower_bound <= upper_bound))
+                 goto invalid_interval;
 
                 if (!(syntax & RE_NO_BK_BRACES))
                   {
-                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
-
+                   if (c != '\\' || p == pend)
+                     goto invalid_interval;
                     PATFETCH (c);
                   }
 
                 if (c != '}')
-                  {
-                    if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
-                      goto unfetch_interval;
-                    else
-                      FREE_STACK_RETURN (REG_BADBR);
-                  }
-
-                /* We just parsed a valid interval.  */
+                 goto invalid_interval;
 
                 /* If it's invalid to have no preceding re.  */
                 if (!laststart)
                   {
-                    if (syntax & RE_CONTEXT_INVALID_OPS)
+                   if (syntax & RE_CONTEXT_INVALID_OPS
+                       && !(syntax & RE_INVALID_INTERVAL_ORD))
                       FREE_STACK_RETURN (REG_BADRPT);
                     else if (syntax & RE_CONTEXT_INDEP_OPS)
                       laststart = b;
@@ -3890,6 +3864,11 @@ regex_compile (pattern, size, syntax, bu
                       goto unfetch_interval;
                   }
 
+                /* We just parsed a valid interval.  */
+
+                if (RE_DUP_MAX < upper_bound)
+                 FREE_STACK_RETURN (REG_BADBR);
+
                 /* If the upper bound is zero, don't want to succeed at
                    all; jump from `laststart' to `b + 3', which will be
                   the end of the buffer after we insert the jump.  */
@@ -3975,25 +3954,20 @@ regex_compile (pattern, size, syntax, bu
                        }
                    }
                 pending_exact = 0;
-                beg_interval = NULL;
-              }
-              break;
+               break;
 
-            unfetch_interval:
-              /* If an invalid interval, match the characters as literals.  */
-               assert (beg_interval);
-               p = beg_interval;
-               beg_interval = NULL;
-
-               /* normal_char and normal_backslash need `c'.  */
-               PATFETCH (c);
-
-               if (!(syntax & RE_NO_BK_BRACES))
-                 {
-                   if (p > pattern  &&  p[-1] == '\\')
-                     goto normal_backslash;
-                 }
-               goto normal_char;
+             invalid_interval:
+               if (!(syntax & RE_INVALID_INTERVAL_ORD))
+                 FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
+             unfetch_interval:
+               /* Match the characters as literals.  */
+               p = beg_interval;
+               c = '{';
+               if (syntax & RE_NO_BK_BRACES)
+                 goto normal_char;
+               else
+                 goto normal_backslash;
+             }
 
 #ifdef emacs
             /* There is no way to specify the before_dot and after_dot



reply via email to

[Prev in Thread] Current Thread [Next in Thread]