[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
glibc regex patch for compatibility support for stray '{'s in EREs
From: |
Paul Eggert |
Subject: |
glibc regex patch for compatibility support for stray '{'s in EREs |
Date: |
Wed, 21 Mar 2001 19:47:11 -0800 (PST) |
> From: Ulrich Drepper <address@hidden>
> Date: 09 Feb 2001 00:29:57 -0800
>
> > For example, we could add a new flag to regex.h that would yield the
> > desired behavior. Would that be an acceptable compromise?
>
> If there is another bit for the flag left (haven't checked): yes.
Another bit is available, so here's a patch to achieve the desired
behavior. This patch relies on the overflow-checking patch I sent to
bug-glibc a few minutes ago. I've checked these two patches with the
latest GNU grep alpha version.
(PS to alain: no change is needed to GNU grep, other than to slurp the
latest regex.c and regex.h after these patches are merged in. I can
mail you the merged regex.c and regex.h separately if you like.)
2001-03-21 Paul Eggert <address@hidden>
* posix/regex.h (RE_INVALID_INTERVAL_ORD): New macro.
(RE_SYNTAX_POSIX_EGREP): Use it.
* posix/regex.c (regex_compile): Implement it.
===================================================================
RCS file: posix/regex.h,v
retrieving revision 1.20
retrieving revision 1.20.1.1
diff -pu -r1.20 -r1.20.1.1
--- posix/regex.h 2000/10/30 04:16:19 1.20
+++ posix/regex.h 2001/03/22 03:30:12 1.20.1.1
@@ -160,6 +160,11 @@ typedef unsigned long int reg_syntax_t;
this bit set, and it won't affect anything in the normal case. */
#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+/* If this bit is set, a syntactically invalid interval is treated as
+ a string of ordinary characters. For example, the ERE 'a{1' is
+ treated as 'a\{1'. */
+#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
@@ -199,7 +204,8 @@ extern reg_syntax_t re_syntax_options;
| RE_NO_BK_VBAR)
#define RE_SYNTAX_POSIX_EGREP \
- (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
+ | RE_INVALID_INTERVAL_ORD)
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
===================================================================
RCS file: regex.c,v
retrieving revision 1.91.0.1
retrieving revision 1.91.0.2
diff -pu -r1.91.0.1 -r1.91.0.2
--- posix/regex.c 2001/03/22 02:42:43 1.91.0.1
+++ posix/regex.c 2001/03/22 03:33:10 1.91.0.2
@@ -2326,14 +2326,6 @@ regex_compile (pattern, size, syntax, bu
/* Address of beginning of regexp, or inside of last group. */
US_CHAR_TYPE *begalt;
- /* Place in the uncompiled pattern (i.e., the {) to
- which to go back if the interval is invalid. */
-#ifdef MBS_SUPPORT
- const US_CHAR_TYPE *beg_interval;
-#else
- const char *beg_interval;
-#endif /* MBS_SUPPORT */
-
/* Address of the place where a forward jump should go to the end of
the containing expression. Each alternative of an `or' -- except the
last -- ends with a forward jump of this sort. */
@@ -3827,25 +3819,19 @@ regex_compile (pattern, size, syntax, bu
/* At least (most) this many matches must be made. */
int lower_bound = -1, upper_bound = -1;
- beg_interval = p - 1;
+
+ /* Place in the uncompiled pattern (i.e., just after
+ the '{') to go back to if the interval is invalid. */
+ const CHAR_TYPE *beg_interval = p;
if (p == pend)
- {
- if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- goto unfetch_interval;
- else
- FREE_STACK_RETURN (REG_EBRACE);
- }
+ goto invalid_interval;
GET_UNSIGNED_NUMBER (lower_bound);
if (c == ',')
{
GET_UNSIGNED_NUMBER (upper_bound);
- if ((!(syntax & RE_NO_BK_BRACES) && c != '\\')
- || ((syntax & RE_NO_BK_BRACES) && c != '}'))
- FREE_STACK_RETURN (REG_BADBR);
-
if (upper_bound < 0)
upper_bound = RE_DUP_MAX;
}
@@ -3853,36 +3839,24 @@ regex_compile (pattern, size, syntax, bu
/* Interval such as `{1}' => match exactly once. */
upper_bound = lower_bound;
- if (lower_bound < 0 || upper_bound > RE_DUP_MAX
- || lower_bound > upper_bound)
- {
- if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- goto unfetch_interval;
- else
- FREE_STACK_RETURN (REG_BADBR);
- }
+ if (! (0 <= lower_bound && lower_bound <= upper_bound))
+ goto invalid_interval;
if (!(syntax & RE_NO_BK_BRACES))
{
- if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
-
+ if (c != '\\' || p == pend)
+ goto invalid_interval;
PATFETCH (c);
}
if (c != '}')
- {
- if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
- goto unfetch_interval;
- else
- FREE_STACK_RETURN (REG_BADBR);
- }
-
- /* We just parsed a valid interval. */
+ goto invalid_interval;
/* If it's invalid to have no preceding re. */
if (!laststart)
{
- if (syntax & RE_CONTEXT_INVALID_OPS)
+ if (syntax & RE_CONTEXT_INVALID_OPS
+ && !(syntax & RE_INVALID_INTERVAL_ORD))
FREE_STACK_RETURN (REG_BADRPT);
else if (syntax & RE_CONTEXT_INDEP_OPS)
laststart = b;
@@ -3890,6 +3864,11 @@ regex_compile (pattern, size, syntax, bu
goto unfetch_interval;
}
+ /* We just parsed a valid interval. */
+
+ if (RE_DUP_MAX < upper_bound)
+ FREE_STACK_RETURN (REG_BADBR);
+
/* If the upper bound is zero, don't want to succeed at
all; jump from `laststart' to `b + 3', which will be
the end of the buffer after we insert the jump. */
@@ -3975,25 +3954,20 @@ regex_compile (pattern, size, syntax, bu
}
}
pending_exact = 0;
- beg_interval = NULL;
- }
- break;
+ break;
- unfetch_interval:
- /* If an invalid interval, match the characters as literals. */
- assert (beg_interval);
- p = beg_interval;
- beg_interval = NULL;
-
- /* normal_char and normal_backslash need `c'. */
- PATFETCH (c);
-
- if (!(syntax & RE_NO_BK_BRACES))
- {
- if (p > pattern && p[-1] == '\\')
- goto normal_backslash;
- }
- goto normal_char;
+ invalid_interval:
+ if (!(syntax & RE_INVALID_INTERVAL_ORD))
+ FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
+ unfetch_interval:
+ /* Match the characters as literals. */
+ p = beg_interval;
+ c = '{';
+ if (syntax & RE_NO_BK_BRACES)
+ goto normal_char;
+ else
+ goto normal_backslash;
+ }
#ifdef emacs
/* There is no way to specify the before_dot and after_dot
- glibc regex patch for compatibility support for stray '{'s in EREs,
Paul Eggert <=