[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 2/9] dfa: fix handling of ranges in multibyte character sets
From: |
Paolo Bonzini |
Subject: |
[PATCH 2/9] dfa: fix handling of ranges in multibyte character sets |
Date: |
Sun, 14 Mar 2010 16:35:07 +0100 |
* src/dfa.c (parse_bracket_exp_mb): Add separate ranges for
lowercase and uppercase endpoints if folding case.
* tests/Makefile.am (TESTS): Add case-fold-char-range.
* tests/case-fold-char-range: New.
---
src/dfa.c | 16 ++++++++++++++--
tests/Makefile.am | 1 +
tests/case-fold-char-range | 21 +++++++++++++++++++++
3 files changed, 36 insertions(+), 2 deletions(-)
create mode 100644 tests/case-fold-char-range
diff --git a/src/dfa.c b/src/dfa.c
index 6c7494e..3cc405a 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -576,10 +576,22 @@ parse_bracket_exp_mb (void)
}
REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
range_sts_al, work_mbc->nranges + 1);
- work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc;
REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
range_ends_al, work_mbc->nranges + 1);
- work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2;
+ work_mbc->range_sts[work_mbc->nranges] =
+ case_fold ? towlower(wc) : (wchar_t)wc;
+ work_mbc->range_ends[work_mbc->nranges++] =
+ case_fold ? towlower(wc2) : (wchar_t)wc2;
+
+ if (case_fold)
+ {
+ REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
+ range_sts_al, work_mbc->nranges + 1);
+ work_mbc->range_sts[work_mbc->nranges] = towupper(wc);
+ REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
+ range_ends_al, work_mbc->nranges + 1);
+ work_mbc->range_ends[work_mbc->nranges++] = towupper(wc2);
+ }
}
else if (wc != WEOF)
/* build normal characters. */
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 9724b0d..ab5fd4e 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -18,6 +18,7 @@ TESTS = \
backref.sh \
bre.sh \
case-fold-char-class \
+ case-fold-char-range \
case-fold-char-type \
dfaexec-multibyte \
empty.sh \
diff --git a/tests/case-fold-char-range b/tests/case-fold-char-range
new file mode 100644
index 0000000..e683da9
--- /dev/null
+++ b/tests/case-fold-char-range
@@ -0,0 +1,21 @@
+#!/bin/sh
+# This would fail for grep-2.5.3
+: ${srcdir=.}
+. "$srcdir/init.sh"; path_prepend_ ../src
+
+printf 'Y\n' > exp1 || framework_failure
+fail=0
+
+for LOC in en_US.UTF-8 zh_CN $LOCALE_FR_UTF8; do
+ printf '1\nY\n.\n' | LC_ALL=$LOC grep -i '[a-z]' > out1 || fail=1
+ compare out1 exp1 || fail=1
+done
+
+printf 'y\n' > exp2 || framework_failure
+
+for LOC in en_US.UTF-8 zh_CN $LOCALE_FR_UTF8; do
+ printf '1\ny\n.\n' | LC_ALL=$LOC grep -i '[A-Z]' > out2 || fail=1
+ compare out2 exp2 || fail=1
+done
+
+Exit $fail
--
1.6.6.1
[PATCH 3/9] dfa: rewrite handling of multibyte case_fold lexing, Paolo Bonzini, 2010/03/14
[PATCH 4/9] dfa: speed up handling of brackets, Paolo Bonzini, 2010/03/14