>From c80002f2d8de8e4c06636e2b11a8fd2adafc49f1 Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Thu, 9 Nov 2017 13:29:08 +0100 Subject: [PATCH] Fix an error in unicode-range->utf8-pattern The sequence generated for a utf8 character class contained an unintended trailing '(), causing the code to fail when `sre-length-ranges' is called. Reported by Chunyang Xu at CHICKEN-users. --- irregex-core.scm | 7 +++---- tests/test-irregex.scm | 2 ++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/irregex-core.scm b/irregex-core.scm index c83aff9b..bef8336e 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -1402,12 +1402,11 @@ (unicode-range-up-to hi-ls))) (let lp ((lo-ls lo-ls) (hi-ls hi-ls)) (cond - ((null? lo-ls) - '()) ((= (car lo-ls) (car hi-ls)) (sre-sequence - (list (integer->char (car lo-ls)) - (lp (cdr lo-ls) (cdr hi-ls))))) + (cons (integer->char (car lo-ls)) + (if (null? (cdr lo-ls)) '() + (cons (lp (cdr lo-ls) (cdr hi-ls)) '()))))) ((= (+ (car lo-ls) 1) (car hi-ls)) (sre-alternate (list (unicode-range-up-from lo-ls) (unicode-range-up-to hi-ls)))) diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index 19218bd8..d7bfaf59 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -539,6 +539,8 @@ (test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<ひらがな>"))) (test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>"))) +(test-assert (not (irregex-search (irregex "[一二]" 'utf8 #t) "三四"))) + (test-end) (test-exit) -- 2.11.0