>From 11c078880d727a360251cf6f0cbe9798a6c16208 Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Tue, 27 Nov 2012 20:40:04 +0100 Subject: [PATCH] Irregex: Ensure 'bos is matched only once in irregex/fold; this fixes #686 and updates irregex to 0.9.1 (upstream changesets a6444839100c through 1741bbf14d3e) Upstream changeset message for this bugfix: Don't bother looping in irregex-fold(/chunked)/fast if the irregex is a searcher. Either it only matches the beginning, or it already consumed the rightmost match. --- NEWS | 1 + irregex-core.scm | 18 ++++++++++++++++-- tests/test-irregex.scm | 16 ++++++++++++++-- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index 39c5bb2..4ff324f 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,7 @@ - Core libraries - Fixed EINTR handling in process-wait and when reading from file ports. + - Irregex is updated to 0.9.1, which includes bugfixes and faster submatches. 4.8.0 diff --git a/irregex-core.scm b/irregex-core.scm index 5531679..c83f890 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -27,10 +27,14 @@ ;; performance tuning, but you can only go so far while staying ;; portable. AND-LET*, SRFI-9 records and custom macros would've been ;; nice. +;; +;; Version 1.0 will be released as a portable R7RS library. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; History ;; +;; 0.9.1: 2012/11/27 - various accumulated bugfixes +;; 0.9.0: 2012/06/03 - Using tags for match extraction from Peter Bex. ;; 0.8.3: 2011/12/18 - various accumulated bugfixes ;; 0.8.2: 2010/08/28 - (...)? submatch extraction fix and alternate ;; named submatches from Peter Bex @@ -3830,7 +3834,12 @@ (lp (+ end 1) acc) (let ((acc (kons i m acc))) (irregex-reset-matches! matches) - (lp end acc)))))))))) + ;; no need to continue looping if this is a + ;; searcher - it's already consumed the only + ;; available match + (if (flag-set? (irregex-flags irx) ~searcher?) + (finish end acc) + (lp end acc))))))))))) (define (irregex-fold irx kons . args) (if (not (procedure? kons)) (%irregex-error 'irregex-fold "not a procedure" kons)) @@ -3861,7 +3870,12 @@ (lp end-src (+ end-index 1) acc)) (let ((acc (kons start i m acc))) (irregex-reset-matches! matches) - (lp end-src end-index acc)))))))))) + ;; no need to continue looping if this is a + ;; searcher - it's already consumed the only + ;; available match + (if (flag-set? (irregex-flags irx) ~searcher?) + (finish end-src end-index acc) + (lp end-src end-index acc))))))))))) (define (irregex-fold/chunked irx kons . args) (if (not (procedure? kons)) (%irregex-error 'irregex-fold/chunked "not a procedure" kons)) diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index cef431c..7440e18 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -78,8 +78,8 @@ (fast) )) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; chunked irregex +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; chunked irregex (define (rope . args) (map (lambda (x) (if (pair? x) x (list x 0 (string-length x)))) args)) @@ -363,6 +363,18 @@ (lambda (i m s) (cons (irregex-match-substring m) s)) '() "poo poo platter")) + (test-equal "* x " + (irregex-replace/all + (irregex '(: bos #\space) 'backtrack) " x " "*")) + (test-equal "* x " + (irregex-replace/all + (irregex '(: bos #\space) 'dfa) " x " "*")) + (test-equal "***x***" + (irregex-replace/all + (irregex '(: #\space) 'backtrack) " x " "*")) + (test-equal "***x***" + (irregex-replace/all + (irregex '(: #\space) 'dfa) " x " "*")) ) -- 1.7.12.2