>From a09b5a1c7543e2b36880e5f29f599264e9c81eab Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Wed, 28 Nov 2012 23:57:10 +0100 Subject: [PATCH 1/2] Irregex: Fixing folds on conditional begin patterns which aren't treated as searchers. This is the final fix for #686 and synchronizes with upstream version 0.9.2 (upstream changesets 01058fc79a16 and fad713187dbb) --- NEWS | 2 +- irregex-core.scm | 36 ++++++++++++++++++++---------------- tests/test-irregex.scm | 12 ++++++++++++ types.db | 2 +- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/NEWS b/NEWS index 4ff324f..719b96d 100644 --- a/NEWS +++ b/NEWS @@ -6,7 +6,7 @@ - Core libraries - Fixed EINTR handling in process-wait and when reading from file ports. - - Irregex is updated to 0.9.1, which includes bugfixes and faster submatches. + - Irregex is updated to 0.9.2, which includes bugfixes and faster submatches. 4.8.0 diff --git a/irregex-core.scm b/irregex-core.scm index c83f890..c088665 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -33,6 +33,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; History ;; +;; 0.9.2: 2012/11/29 - fixed a bug in -fold on conditional bos patterns ;; 0.9.1: 2012/11/27 - various accumulated bugfixes ;; 0.9.0: 2012/06/03 - Using tags for match extraction from Peter Bex. ;; 0.8.3: 2011/12/18 - various accumulated bugfixes @@ -1954,11 +1955,11 @@ (i (if (pair? o) (car o) ((chunker-get-start cnk) src)))) (if (not (integer? i)) (%irregex-error 'irregex-search "not an integer" i)) (irregex-match-chunker-set! matches cnk) - (irregex-search/matches irx cnk src i matches))) + (irregex-search/matches irx cnk (cons src i) src i matches))) ;; internal routine, can be used in loops to avoid reallocating the ;; match vector -(define (irregex-search/matches irx cnk src i matches) +(define (irregex-search/matches irx cnk init src i matches) (cond ((irregex-dfa irx) (cond @@ -1992,16 +1993,15 @@ (else #f))) (else - (let ((res (irregex-search/backtrack irx cnk src i matches))) + (let ((res (irregex-search/backtrack irx cnk init src i matches))) (if res (%irregex-match-fail-set! res #f)) res)))) -(define (irregex-search/backtrack irx cnk src i matches) +(define (irregex-search/backtrack irx cnk init src i matches) (let ((matcher (irregex-nfa irx)) (str ((chunker-get-str cnk) src)) (end ((chunker-get-end cnk) src)) - (get-next (chunker-get-next cnk)) - (init (cons src i))) + (get-next (chunker-get-next cnk))) (if (flag-set? (irregex-flags irx) ~searcher?) (matcher cnk init src str i end matches (lambda () #f)) (let lp ((src2 src) @@ -3811,35 +3811,38 @@ (start (if (and (pair? o) (pair? (cdr o))) (cadr o) 0)) (end (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o))) (caddr o) - (string-length str)))) + (string-length str))) + (init-src (list str start end)) + (init (cons init-src start))) (if (not (and (integer? start) (exact? start))) (%irregex-error 'irregex-fold "not an exact integer" start)) (if (not (and (integer? end) (exact? end))) (%irregex-error 'irregex-fold "not an exact integer" end)) (irregex-match-chunker-set! matches irregex-basic-string-chunker) - (let lp ((i start) (acc knil)) + (let lp ((src init-src) (i start) (acc knil)) (if (>= i end) (finish i acc) (let ((m (irregex-search/matches irx irregex-basic-string-chunker - (list str i end) + init + src i matches))) (if (not m) (finish i acc) - (let ((end (%irregex-match-end-index m 0))) - (if (= end i) + (let ((j (%irregex-match-end-index m 0))) + (if (= j i) ;; skip one char forward if we match the empty string - (lp (+ end 1) acc) + (lp (list str (+ j 1) end) (+ j 1) acc) (let ((acc (kons i m acc))) (irregex-reset-matches! matches) ;; no need to continue looping if this is a ;; searcher - it's already consumed the only ;; available match (if (flag-set? (irregex-flags irx) ~searcher?) - (finish end acc) - (lp end acc))))))))))) + (finish j acc) + (lp (list str j end) j acc))))))))))) (define (irregex-fold irx kons . args) (if (not (procedure? kons)) (%irregex-error 'irregex-fold "not a procedure" kons)) @@ -3852,13 +3855,14 @@ (finish (or (and (pair? o) (car o)) (lambda (src i acc) acc))) (i (if (and (pair? o) (pair? (cdr o))) (cadr o) - ((chunker-get-start cnk) start)))) + ((chunker-get-start cnk) start))) + (init (cons start i))) (if (not (integer? i)) (%irregex-error 'irregex-fold/chunked "not an integer" i)) (irregex-match-chunker-set! matches cnk) (let lp ((start start) (i i) (acc knil)) (if (not start) (finish start i acc) - (let ((m (irregex-search/matches irx cnk start i matches))) + (let ((m (irregex-search/matches irx cnk init start i matches))) (if (not m) (finish start i acc) (let ((end-src (%irregex-match-end-chunk m 0)) diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index 7440e18..d275421 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -375,6 +375,18 @@ (test-equal "***x***" (irregex-replace/all (irregex '(: #\space) 'dfa) " x " "*")) + (test-equal "xaac" + (irregex-replace/all + (irregex '(or (seq bos "a") (seq bos "b")) 'backtrack) "aaac" "x")) + (test-equal "xaac" + (irregex-replace/all + (irregex '(or (seq bos "a") (seq bos "b")) 'dfa) "aaac" "x")) + (test-equal "xaac" + (irregex-replace/all (irregex '(or (seq bos "a") "b") 'backtrack) + "aaac" "x")) + (test-equal "xaac" + (irregex-replace/all (irregex '(or (seq bos "a") "b") 'dfa) + "aaac" "x")) ) diff --git a/types.db b/types.db index da45786..ede5fa3 100644 --- a/types.db +++ b/types.db @@ -1386,7 +1386,7 @@ (irregex-replace/all (#(procedure #:enforce) irregex-replace/all (* string #!rest) string)) (irregex-reset-matches! (procedure irregex-reset-matches! (*) *)) (irregex-search (#(procedure #:enforce) irregex-search (* string #!optional fixnum fixnum) *)) -(irregex-search/matches (#(procedure #:enforce) irregex-search/matches (* string fixnum fixnum *) *)) +(irregex-search/matches (#(procedure #:enforce) irregex-search/matches (* string * fixnum fixnum *) *)) (irregex-split (#(procedure #:enforce) irregex-split (* string #!optional fixnum fixnum) list)) (irregex-search/chunked (#(procedure #:enforce) irregex-search/chunked (* procedure * #!optional fixnum fixnum *) *)) (irregex-match-valid-index? -- 1.7.12.2