[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] dfa: speed up [[:digit:]] and [[:xdigit:]]
From: |
Paolo Bonzini |
Subject: |
[PATCH] dfa: speed up [[:digit:]] and [[:xdigit:]] |
Date: |
Tue, 4 May 2010 18:11:07 +0200 |
There's no "multibyte pain" in these two classes, since POSIX
and ISO C99 mandate their contents.
Time for "./grep -x '[[:digit:]]' /usr/share/dict/linux.words"
Before: 1.5s, after: 0.07s. (sed manages only 0.5s).
* src/dfa.c (predicates): Declare struct dfa_ctype separately
from definition. Add sb_only.
(find_pred): Return const struct dfa_ctype *.
(parse_bracket_exp): Return const struct dfa_ctype *. Do
not fill MBCSET for sb_only character types.
---
src/dfa.c | 56 +++++++++++++++++++++++++++++---------------------------
1 files changed, 29 insertions(+), 27 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 44efc02..107866a 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -713,26 +713,29 @@ typedef int predicate (int);
/* The following list maps the names of the Posix named character classes
to predicate functions that determine whether a given character is in
the class. The leading [ has already been eaten by the lexical analyzer. */
-static struct {
+struct dfa_ctype {
const char *name;
- predicate *pred;
-} const prednames[] = {
- { "alpha", isalpha },
- { "upper", isupper },
- { "lower", islower },
- { "digit", isdigit },
- { "xdigit", isxdigit },
- { "space", isspace },
- { "punct", ispunct },
- { "alnum", isalnum },
- { "print", isprint },
- { "graph", isgraph },
- { "cntrl", iscntrl },
- { "blank", isblank },
- { NULL, NULL }
+ predicate *func;
+ bool sb_only;
};
-static predicate *
+static const struct dfa_ctype prednames[] = {
+ { "alpha", isalpha, false },
+ { "upper", isupper, false },
+ { "lower", islower, false },
+ { "digit", isdigit, true },
+ { "xdigit", isxdigit, true },
+ { "space", isspace, false },
+ { "punct", ispunct, false },
+ { "alnum", isalnum, false },
+ { "print", isprint, false },
+ { "graph", isgraph, false },
+ { "cntrl", iscntrl, false },
+ { "blank", isblank, false },
+ { NULL, NULL, false }
+};
+
+static const struct dfa_ctype *
find_pred (const char *str)
{
unsigned int i;
@@ -740,7 +743,7 @@ find_pred (const char *str)
if (STREQ (str, prednames[i].name))
break;
- return prednames[i].pred;
+ return &prednames[i];
}
/* Multibyte character handling sub-routine for lex.
@@ -837,8 +840,12 @@ parse_bracket_exp (void)
|| STREQ (str, "lower"))
? "alpha"
: str);
+ const struct dfa_ctype *pred = find_pred (class);
+ if (!pred)
+ dfaerror(_("invalid character class"));
+
#if MBS_SUPPORT
- if (MB_CUR_MAX > 1)
+ if (MB_CUR_MAX > 1 && !pred->sb_only)
{
/* Store the character class as wctype_t. */
wctype_t wt = wctype (class);
@@ -852,14 +859,9 @@ parse_bracket_exp (void)
}
#endif
- {
- predicate *pred = find_pred (class);
- if (!pred)
- dfaerror(_("invalid character class"));
- for (c2 = 0; c2 < NOTCHAR; ++c2)
- if ((*pred)(c2))
- setbit_case_fold (c2, ccl);
- }
+ for (c2 = 0; c2 < NOTCHAR; ++c2)
+ if (pred->func(c2))
+ setbit_case_fold (c2, ccl);
}
#if MBS_SUPPORT
--
1.6.6.1
- [PATCH] dfa: speed up [[:digit:]] and [[:xdigit:]],
Paolo Bonzini <=