From 205a0e7aae41fb819c8dbd78e145f34161ec8a4c Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 22 Apr 2014 23:34:22 -0700 Subject: [PATCH 2/2] kwset: simplify and speed up Boyer-Moore unibyte -i in some cases This improves the performance of, for example, yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 | grep -i jk in a unibyte locale. * src/kwset.c (memchr_trans): New function. (bmexec): Use it. Simplify the code and remove some of the confusing gotos and breaks and labels. Do not treat glibc memchr as a special case; if non-glibc memchr is slow, that is lower priority and I suppose we can try to work around the problem in gnulib. --- src/kwset.c | 77 ++++++++++++++++++++++++++----------------------------------- 1 file changed, 33 insertions(+), 44 deletions(-) diff --git a/src/kwset.c b/src/kwset.c index 78fb0b2..f86ee03 100644 --- a/src/kwset.c +++ b/src/kwset.c @@ -524,6 +524,20 @@ bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len, return false; } +/* Return the address of the first byte in the buffer S that equals C. + S contains N bytes. If TRANS is nonnull, use it to transliterate + S's bytes before comparing them. */ +static char const * +memchr_trans (char const *s, char c, size_t n, char const *trans) +{ + if (! trans) + return memchr (s, c, n); + char const *slim = s + n; + for (; s < slim; s++) + if (trans[U(*s)] == c) + return s; + return NULL; +} /* Fast boyer-moore search. */ static size_t _GL_ATTRIBUTE_PURE @@ -541,18 +555,8 @@ bmexec (kwset_t kwset, char const *text, size_t size) return -1; if (len == 1) { - if (trans) - { - for (tp = text; tp < text + size; tp++) - if (trans[U(*tp)] == kwset->target[0]) - return tp - text; - return -1; - } - else - { - tp = memchr (text, kwset->target[0], size); - return tp ? tp - text : -1; - } + tp = memchr_trans (text, kwset->target[0], size, trans); + return tp ? tp - text : -1; } d1 = kwset->delta; @@ -564,48 +568,33 @@ bmexec (kwset_t kwset, char const *text, size_t size) /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */ if (size > 12 * len) /* 11 is not a bug, the initial offset happens only once. */ - for (ep = text + size - 11 * len;;) + for (ep = text + size - 11 * len; tp <= ep; ) { - while (tp <= ep) + d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])], tp += d; + if (d != 0) { d = d1[U(tp[-1])], tp += d; d = d1[U(tp[-1])], tp += d; - if (d == 0) - goto found; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - if (d == 0) - goto found; - d = d1[U(tp[-1])], tp += d; d = d1[U(tp[-1])], tp += d; - d = d1[U(tp[-1])], tp += d; - if (d == 0) - goto found; - /* memchar() of glibc is faster than seeking by delta1 on - some platforms. When there is no chance to match for a - while, use it on them. */ -#if defined(__GLIBC__) && (defined(__i386__) || defined(__x86_64__)) - if (!trans) - { - tp = memchr (tp - 1, gc1, size + text - tp + 1); - if (tp) - { - ++tp; - goto found; - } - else - return -1; - } - else -#endif + if (d != 0) { d = d1[U(tp[-1])], tp += d; d = d1[U(tp[-1])], tp += d; + d = d1[U(tp[-1])], tp += d; + if (d != 0) + { + /* Typically memchr is faster than seeking by + delta1 when there is no chance to match for + a while. */ + tp--; + tp = memchr_trans (tp, gc1, text + size - tp, trans); + if (! tp) + return -1; + tp++; + } } } - break; - found: if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset)) return tp - text; } -- 1.9.0