>From b6386de361bc7b2b495b39d3f6a18944daf7538b Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Sun, 23 Sep 2012 15:43:00 +0200 Subject: [PATCH 2/2] Irregex: Fix problem with suffixes returned while searching instead of matching (thanks to Sven Hartrumpf for reporting this) (upstream changeset afae3f6a8f8a) Now, finalizers are run whenever we move from an accepting state to a non-accepting state. This allows memory slots to be modified even if we never reach an accepting state, returning a match found earlier. --- irregex-core.scm | 52 ++++++++++++++++++++++++++-------------------------- tests/re-tests.txt | 1 + 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/irregex-core.scm b/irregex-core.scm index edfbf01..ce3d2e1 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -2189,8 +2189,7 @@ (lp1 next (get-start next) state res-src res-index finalizer) (and index (%irregex-match-end-chunk matches index) - (or (not submatches?) - (finalize! finalizer memory matches)) + (or (not finalizer) (finalize! finalizer memory matches)) #t)))) (else (let* ((ch (string-ref str i)) @@ -2201,37 +2200,38 @@ (cdr state)))) (cond (cell - (cond - (submatches? - (let ((cmds (dfa-cell-commands dfa cell))) - (for-each (lambda (s) - (let ((slot (vector-ref memory (cdr s))) - (chunk&position (cons src (+ i 1)))) - (vector-set! slot (car s) chunk&position))) - (cdr cmds)) - (for-each (lambda (c) - (let* ((tag (vector-ref c 0)) - (ss (vector-ref memory (vector-ref c 1))) - (ds (vector-ref memory (vector-ref c 2)))) - (vector-set! ds tag (vector-ref ss tag)))) - (car cmds))))) - (let ((next (dfa-next-state dfa cell))) - (cond - ((dfa-finalizer dfa next) => - (lambda (new-finalizer) - (lp2 (+ i 1) next src (+ i 1) new-finalizer))) - (else (lp2 (+ i 1) next res-src res-index finalizer))))) + (let* ((next (dfa-next-state dfa cell)) + (new-finalizer (dfa-finalizer dfa next))) + (cond + (submatches? + (let ((cmds (dfa-cell-commands dfa cell))) + ;; Save match when we're moving from accepting state to + ;; rejecting state; this could be the last accepting one. + (cond ((and finalizer (not new-finalizer)) + (finalize! finalizer memory matches))) + (for-each (lambda (s) + (let ((slot (vector-ref memory (cdr s))) + (chunk&position (cons src (+ i 1)))) + (vector-set! slot (car s) chunk&position))) + (cdr cmds)) + (for-each (lambda (c) + (let* ((tag (vector-ref c 0)) + (ss (vector-ref memory (vector-ref c 1))) + (ds (vector-ref memory (vector-ref c 2)))) + (vector-set! ds tag (vector-ref ss tag)))) + (car cmds))))) + (if new-finalizer + (lp2 (+ i 1) next src (+ i 1) new-finalizer) + (lp2 (+ i 1) next res-src res-index #f)))) (res-src (cond (index (irregex-match-end-chunk-set! matches index res-src) (irregex-match-end-index-set! matches index res-index))) - (cond (submatches? - (finalize! finalizer memory matches))) + (cond (finalizer (finalize! finalizer memory matches))) #t) ((and index (%irregex-match-end-chunk matches index)) - (cond (submatches? - (finalize! finalizer memory matches))) + (cond (finalizer (finalize! finalizer memory matches))) #t) (else #f)))))))))) diff --git a/tests/re-tests.txt b/tests/re-tests.txt index 1cbc379..7b23357 100644 --- a/tests/re-tests.txt +++ b/tests/re-tests.txt @@ -104,6 +104,7 @@ a([bc]*)(c+d) abcd y &-\1-\2 abcd-b-cd a[bcd]*dcdcde adcdcde y & adcdcde a[bcd]+dcdcde adcdcde n - - (ab|a)b*c abc y &-\1 abc-ab +(.*)b abc y &-\1 ab-a ((a)(b)c)(d) abcd y \1-\2-\3-\4 abc-a-b-d ((a)(b)?c)(d) abcd y \1-\2-\3-\4 abc-a-b-d ((a)(b)?c)(d) acd y \1-\2-\3-\4 ac-a--d -- 1.7.9.1