>From 85930543f14bed93693c7565f3aee3b33e2a852a Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Fri, 9 Dec 2011 10:48:49 +0100 Subject: [PATCH] Apply upstream changesets ba70feace1dd and 78ba6b09e021 This fixes an infinite loop problem with irregex-fold when empty matches are found (reported by Manuel Serrano) and adds some extra tests for complemented UTF-8 character sets. --- irregex-core.scm | 28 ++++++++++++++++++---------- tests/test-irregex.scm | 15 ++++++++++++++- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/irregex-core.scm b/irregex-core.scm index 982f57e..54413bf 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -1485,7 +1485,7 @@ (map (lambda (_) `(/ ,(integer->char #x80) ,(integer->char #xFF))) (zero-to (+ i lo-len)))))) - (zero-to (- (length hi-ls) lo-len 1))) + (zero-to (- (length hi-ls) (+ lo-len 1)))) (list (sre-sequence (cons `(/ ,(integer->char @@ -3752,10 +3752,13 @@ matches))) (if (not m) (finish i acc) - (let* ((end (%irregex-match-end-index m 0)) - (acc (kons i m acc))) - (irregex-reset-matches! matches) - (lp end acc)))))))) + (let ((end (%irregex-match-end-index m 0))) + (if (= end i) + ;; skip one char forward if we match the empty string + (lp (+ end 1) acc) + (let ((acc (kons i m acc))) + (irregex-reset-matches! matches) + (lp end acc)))))))))) (define (irregex-fold irx kons . args) (if (not (procedure? kons)) (%irregex-error 'irregex-fold "not a procedure" kons)) @@ -3777,11 +3780,16 @@ (let ((m (irregex-search/matches irx cnk start i matches))) (if (not m) (finish start i acc) - (let* ((acc (kons start i m acc)) - (end-src (%irregex-match-end-chunk m 0)) - (end-index (%irregex-match-end-index m 0))) - (irregex-reset-matches! matches) - (lp end-src end-index acc)))))))) + (let ((end-src (%irregex-match-end-chunk m 0)) + (end-index (%irregex-match-end-index m 0))) + (if (and (eq? end-src start) (= end-index i)) + (if (>= end-index ((chunker-get-end cnk) end-src )) + (let ((next ((chunker-get-next cnk) end-src))) + (lp next ((chunker-get-start cnk) next) acc)) + (lp end-src (+ end-index 1) acc)) + (let ((acc (kons start i m acc))) + (irregex-reset-matches! matches) + (lp end-src end-index acc)))))))))) (define (irregex-fold/chunked irx kons . args) (if (not (procedure? kons)) (%irregex-error 'irregex-fold/chunked "not a procedure" kons)) diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index a06bc6b..fd2cb97 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -358,6 +358,11 @@ rope-chunker (rope "address@hidden and address@hidden") (lambda (src i s) (reverse s)))) + (test-equal '("poo poo ") + (irregex-fold '(* "poo ") + (lambda (i m s) (cons (irregex-match-substring m) s)) + '() + "poo poo platter")) ) @@ -499,5 +504,13 @@ (test-assert (not (irregex-search "(?u:<[あ-ん]*>)" "<ひらgがな>"))) (test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>"))) -(test-end)(test-exit) +(test-assert (irregex-search "(?u:<[^あ-ん]*>)" "")) +(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<あん>"))) +(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<ひらがな>"))) +(test-assert (irregex-search "(?u:<[^あ-ん語]*>)" "")) +(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<あん>"))) +(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<ひらがな>"))) +(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>"))) + +(test-end) -- 1.7.3.4