[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] Changes to emacs/src/regex.c
From: |
Kenichi Handa |
Subject: |
[Emacs-diffs] Changes to emacs/src/regex.c |
Date: |
Mon, 07 Oct 2002 08:58:29 -0400 |
Index: emacs/src/regex.c
diff -c emacs/src/regex.c:1.180 emacs/src/regex.c:1.181
*** emacs/src/regex.c:1.180 Mon Sep 9 15:41:30 2002
--- emacs/src/regex.c Tue Sep 10 01:59:32 2002
***************
*** 19,27 ****
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA. */
! /* BUGS:
! - (x?)*y\1z should match both xxxxyxz and xxxyz.
! TODO:
- structure the opcode space into opcode+flag.
- merge with glibc's regex.[ch].
- replace (succeed_n + jump_n + set_number_at) with something that doesn't
--- 19,25 ----
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA. */
! /* TODO:
- structure the opcode space into opcode+flag.
- merge with glibc's regex.[ch].
- replace (succeed_n + jump_n + set_number_at) with something that doesn't
***************
*** 1520,1545 ****
} \
} while (0)
- /* Discard a saved register off the stack. */
- #define DISCARD_FAILURE_REG_OR_COUNT()
\
- do { \
- int reg = POP_FAILURE_INT ();
\
- if (reg == -1) \
- { \
- /* It's a counter. */ \
- POP_FAILURE_POINTER (); \
- reg = POP_FAILURE_INT ();
\
- DEBUG_PRINT3 (" Discard counter %p = %d\n", ptr, reg); \
- } \
- else
\
- { \
- POP_FAILURE_POINTER (); \
- POP_FAILURE_POINTER (); \
- DEBUG_PRINT4 (" Discard reg %d (spanning %p -> %p)\n", \
- reg, regstart[reg], regend[reg]); \
- } \
- } while (0)
-
/* Check that we are not stuck in an infinite loop. */
#define CHECK_INFINITE_LOOP(pat_cur, string_place) \
do { \
--- 1518,1523 ----
***************
*** 1553,1568 ****
&& FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
if (FAILURE_PAT (failure) == pat_cur) \
{ \
! while (fail_stack.frame < fail_stack.avail) \
! DISCARD_FAILURE_REG_OR_COUNT (); \
! goto fail; \
} \
DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure)); \
failure = NEXT_FAILURE_HANDLE(failure); \
} \
DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure));
\
} while (0)
!
/* Push the information about the state we will need
if we ever fail back to it.
--- 1531,1545 ----
&& FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
if (FAILURE_PAT (failure) == pat_cur) \
{ \
! cycle = 1; \
! break; \
} \
DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure)); \
failure = NEXT_FAILURE_HANDLE(failure); \
} \
DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure));
\
} while (0)
!
/* Push the information about the state we will need
if we ever fail back to it.
***************
*** 2645,2652 ****
unsigned int startoffset = 0;
re_opcode_t ofj =
/* Check if the loop can match the empty string. */
! (simple || !analyse_first (laststart, b, NULL, 0)) ?
! on_failure_jump : on_failure_jump_loop;
assert (skip_one_char (laststart) <= b);
if (!zero_times_ok && simple)
--- 2622,2629 ----
unsigned int startoffset = 0;
re_opcode_t ofj =
/* Check if the loop can match the empty string. */
! (simple || !analyse_first (laststart, b, NULL, 0))
! ? on_failure_jump : on_failure_jump_loop;
assert (skip_one_char (laststart) <= b);
if (!zero_times_ok && simple)
***************
*** 2694,2701 ****
{
boolean emptyp = analyse_first (laststart, b, NULL, 0);
! /* The non-greedy multiple match looks like a repeat..until:
! we only need a conditional jump at the end of the loop */
if (emptyp) BUF_PUSH (no_op);
STORE_JUMP (emptyp ? on_failure_jump_nastyloop
: on_failure_jump, b, laststart);
--- 2671,2679 ----
{
boolean emptyp = analyse_first (laststart, b, NULL, 0);
! /* The non-greedy multiple match looks like
! a repeat..until: we only need a conditional jump
! at the end of the loop. */
if (emptyp) BUF_PUSH (no_op);
STORE_JUMP (emptyp ? on_failure_jump_nastyloop
: on_failure_jump, b, laststart);
***************
*** 2704,2710 ****
{
/* The repeat...until naturally matches one or more.
To also match zero times, we need to first jump to
! the end of the loop (its conditional jump). */
INSERT_JUMP (jump, laststart, b);
b += 3;
}
--- 2682,2688 ----
{
/* The repeat...until naturally matches one or more.
To also match zero times, we need to first jump to
! the end of the loop (its conditional jump). */
INSERT_JUMP (jump, laststart, b);
b += 3;
}
***************
*** 3241,3339 ****
goto unfetch_interval;
}
! if (upper_bound == 0)
! /* If the upper bound is zero, just drop the sub pattern
! altogether. */
! b = laststart;
! else if (lower_bound == 1 && upper_bound == 1)
! /* Just match it once: nothing to do here. */
! ;
!
! /* Otherwise, we have a nontrivial interval. When
! we're all done, the pattern will look like:
! set_number_at <jump count> <upper bound>
! set_number_at <succeed_n count> <lower bound>
! succeed_n <after jump addr> <succeed_n count>
! <body of loop>
! jump_n <succeed_n addr> <jump count>
! (The upper bound and `jump_n' are omitted if
! `upper_bound' is 1, though.) */
! else
! { /* If the upper bound is > 1, we need to insert
! more at the end of the loop. */
! unsigned int nbytes = (upper_bound < 0 ? 3
! : upper_bound > 1 ? 5 : 0);
! unsigned int startoffset = 0;
!
! GET_BUFFER_SPACE (20); /* We might use less. */
!
! if (lower_bound == 0)
! {
! /* A succeed_n that starts with 0 is really a
! a simple on_failure_jump_loop. */
! INSERT_JUMP (on_failure_jump_loop, laststart,
! b + 3 + nbytes);
! b += 3;
! }
! else
! {
! /* Initialize lower bound of the `succeed_n', even
! though it will be set during matching by its
! attendant `set_number_at' (inserted next),
! because `re_compile_fastmap' needs to know.
! Jump to the `jump_n' we might insert below. */
! INSERT_JUMP2 (succeed_n, laststart,
! b + 5 + nbytes,
! lower_bound);
! b += 5;
!
! /* Code to initialize the lower bound. Insert
! before the `succeed_n'. The `5' is the last two
! bytes of this `set_number_at', plus 3 bytes of
! the following `succeed_n'. */
! insert_op2 (set_number_at, laststart, 5, lower_bound,
b);
! b += 5;
! startoffset += 5;
! }
!
! if (upper_bound < 0)
! {
! /* A negative upper bound stands for infinity,
! in which case it degenerates to a plain jump. */
! STORE_JUMP (jump, b, laststart + startoffset);
! b += 3;
! }
! else if (upper_bound > 1)
! { /* More than one repetition is allowed, so
! append a backward jump to the `succeed_n'
! that starts this interval.
!
! When we've reached this during matching,
! we'll have matched the interval once, so
! jump back only `upper_bound - 1' times. */
! STORE_JUMP2 (jump_n, b, laststart + startoffset,
! upper_bound - 1);
! b += 5;
!
! /* The location we want to set is the second
! parameter of the `jump_n'; that is `b-2' as
! an absolute address. `laststart' will be
! the `set_number_at' we're about to insert;
! `laststart+3' the number to set, the source
! for the relative address. But we are
! inserting into the middle of the pattern --
! so everything is getting moved up by 5.
! Conclusion: (b - 2) - (laststart + 3) + 5,
! i.e., b - laststart.
!
! We insert this at the beginning of the loop
! so that if we fail during matching, we'll
! reinitialize the bounds. */
! insert_op2 (set_number_at, laststart, b - laststart,
! upper_bound - 1, b);
! b += 5;
! }
! }
pending_exact = 0;
beg_interval = NULL;
}
--- 3219,3317 ----
goto unfetch_interval;
}
! if (upper_bound == 0)
! /* If the upper bound is zero, just drop the sub pattern
! altogether. */
! b = laststart;
! else if (lower_bound == 1 && upper_bound == 1)
! /* Just match it once: nothing to do here. */
! ;
!
! /* Otherwise, we have a nontrivial interval. When
! we're all done, the pattern will look like:
! set_number_at <jump count> <upper bound>
! set_number_at <succeed_n count> <lower bound>
! succeed_n <after jump addr> <succeed_n count>
! <body of loop>
! jump_n <succeed_n addr> <jump count>
! (The upper bound and `jump_n' are omitted if
! `upper_bound' is 1, though.) */
! else
! { /* If the upper bound is > 1, we need to insert
! more at the end of the loop. */
! unsigned int nbytes = (upper_bound < 0 ? 3
! : upper_bound > 1 ? 5 : 0);
! unsigned int startoffset = 0;
!
! GET_BUFFER_SPACE (20); /* We might use less. */
!
! if (lower_bound == 0)
! {
! /* A succeed_n that starts with 0 is really a
! a simple on_failure_jump_loop. */
! INSERT_JUMP (on_failure_jump_loop, laststart,
! b + 3 + nbytes);
! b += 3;
! }
! else
! {
! /* Initialize lower bound of the `succeed_n', even
! though it will be set during matching by its
! attendant `set_number_at' (inserted next),
! because `re_compile_fastmap' needs to know.
! Jump to the `jump_n' we might insert below. */
! INSERT_JUMP2 (succeed_n, laststart,
! b + 5 + nbytes,
! lower_bound);
! b += 5;
!
! /* Code to initialize the lower bound. Insert
! before the `succeed_n'. The `5' is the last two
! bytes of this `set_number_at', plus 3 bytes of
! the following `succeed_n'. */
! insert_op2 (set_number_at, laststart, 5, lower_bound,
b);
! b += 5;
! startoffset += 5;
! }
!
! if (upper_bound < 0)
! {
! /* A negative upper bound stands for infinity,
! in which case it degenerates to a plain jump. */
! STORE_JUMP (jump, b, laststart + startoffset);
! b += 3;
! }
! else if (upper_bound > 1)
! { /* More than one repetition is allowed, so
! append a backward jump to the `succeed_n'
! that starts this interval.
!
! When we've reached this during matching,
! we'll have matched the interval once, so
! jump back only `upper_bound - 1' times. */
! STORE_JUMP2 (jump_n, b, laststart + startoffset,
! upper_bound - 1);
! b += 5;
!
! /* The location we want to set is the second
! parameter of the `jump_n'; that is `b-2' as
! an absolute address. `laststart' will be
! the `set_number_at' we're about to insert;
! `laststart+3' the number to set, the source
! for the relative address. But we are
! inserting into the middle of the pattern --
! so everything is getting moved up by 5.
! Conclusion: (b - 2) - (laststart + 3) + 5,
! i.e., b - laststart.
!
! We insert this at the beginning of the loop
! so that if we fail during matching, we'll
! reinitialize the bounds. */
! insert_op2 (set_number_at, laststart, b - laststart,
! upper_bound - 1, b);
! b += 5;
! }
! }
pending_exact = 0;
beg_interval = NULL;
}
***************
*** 5518,5524 ****
cycle detection cannot work. Worse yet, such a detection
can not only fail to detect a cycle, but it can also wrongly
detect a cycle (between different instantiations of the same
! loop.
So the method used for those nasty loops is a little different:
We use a special cycle-detection-stack-frame which is pushed
when the on_failure_jump_nastyloop failure-point is *popped*.
--- 5496,5502 ----
cycle detection cannot work. Worse yet, such a detection
can not only fail to detect a cycle, but it can also wrongly
detect a cycle (between different instantiations of the same
! loop).
So the method used for those nasty loops is a little different:
We use a special cycle-detection-stack-frame which is pushed
when the on_failure_jump_nastyloop failure-point is *popped*.
***************
*** 5532,5542 ****
mcnt, p + mcnt);
assert ((re_opcode_t)p[-4] == no_op);
! CHECK_INFINITE_LOOP (p - 4, d);
! PUSH_FAILURE_POINT (p - 3, d);
break;
-
/* Simple loop detecting on_failure_jump: just check on the
failure stack if the same spot was already hit earlier. */
case on_failure_jump_loop:
--- 5510,5527 ----
mcnt, p + mcnt);
assert ((re_opcode_t)p[-4] == no_op);
! {
! int cycle = 0;
! CHECK_INFINITE_LOOP (p - 4, d);
! if (!cycle)
! /* If there's a cycle, just continue without pushing
! this failure point. The failure point is the "try again"
! option, which shouldn't be tried.
! We want (x?)*?y\1z to match both xxyz and xxyxz. */
! PUSH_FAILURE_POINT (p - 3, d);
! }
break;
/* Simple loop detecting on_failure_jump: just check on the
failure stack if the same spot was already hit earlier. */
case on_failure_jump_loop:
***************
*** 5544,5552 ****
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
mcnt, p + mcnt);
!
! CHECK_INFINITE_LOOP (p - 3, d);
! PUSH_FAILURE_POINT (p - 3, d);
break;
--- 5529,5547 ----
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
mcnt, p + mcnt);
! {
! int cycle = 0;
! CHECK_INFINITE_LOOP (p - 3, d);
! if (cycle)
! /* If there's a cycle, get out of the loop, as if the matching
! had failed. We used to just `goto fail' here, but that was
! aborting the search a bit too early: we want to keep the
! empty-loop-match and keep matching after the loop.
! We want (x?)*y\1z to match both xxyz and xxyxz. */
! p += mcnt;
! else
! PUSH_FAILURE_POINT (p - 3, d);
! }
break;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Emacs-diffs] Changes to emacs/src/regex.c,
Kenichi Handa <=