From 205a0e7aae41fb819c8dbd78e145f34161ec8a4c Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Tue, 22 Apr 2014 23:34:22 -0700
Subject: [PATCH 2/2] kwset: simplify and speed up Boyer-Moore unibyte -i in
 some cases

This improves the performance of, for example,
yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 | grep -i jk
in a unibyte locale.
* src/kwset.c (memchr_trans): New function.
(bmexec): Use it.  Simplify the code and remove some of the
confusing gotos and breaks and labels.  Do not treat glibc memchr
as a special case; if non-glibc memchr is slow, that is lower
priority and I suppose we can try to work around the problem in
gnulib.
---
 src/kwset.c | 77 ++++++++++++++++++++++++++-----------------------------------
 1 file changed, 33 insertions(+), 44 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index 78fb0b2..f86ee03 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -524,6 +524,20 @@ bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len,
   return false;
 }
 
+/* Return the address of the first byte in the buffer S that equals C.
+   S contains N bytes.  If TRANS is nonnull, use it to transliterate
+   S's bytes before comparing them.  */
+static char const *
+memchr_trans (char const *s, char c, size_t n, char const *trans)
+{
+  if (! trans)
+    return memchr (s, c, n);
+  char const *slim = s + n;
+  for (; s < slim; s++)
+    if (trans[U(*s)] == c)
+      return s;
+  return NULL;
+}
 
 /* Fast boyer-moore search. */
 static size_t _GL_ATTRIBUTE_PURE
@@ -541,18 +555,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
     return -1;
   if (len == 1)
     {
-      if (trans)
-        {
-          for (tp = text; tp < text + size; tp++)
-            if (trans[U(*tp)] == kwset->target[0])
-              return tp - text;
-          return -1;
-        }
-      else
-        {
-          tp = memchr (text, kwset->target[0], size);
-          return tp ? tp - text : -1;
-        }
+      tp = memchr_trans (text, kwset->target[0], size, trans);
+      return tp ? tp - text : -1;
     }
 
   d1 = kwset->delta;
@@ -564,48 +568,33 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
   if (size > 12 * len)
     /* 11 is not a bug, the initial offset happens only once. */
-    for (ep = text + size - 11 * len;;)
+    for (ep = text + size - 11 * len; tp <= ep; )
       {
-        while (tp <= ep)
+        d = d1[U(tp[-1])], tp += d;
+        d = d1[U(tp[-1])], tp += d;
+        if (d != 0)
           {
             d = d1[U(tp[-1])], tp += d;
             d = d1[U(tp[-1])], tp += d;
-            if (d == 0)
-              goto found;
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            if (d == 0)
-              goto found;
-            d = d1[U(tp[-1])], tp += d;
             d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            if (d == 0)
-              goto found;
-            /* memchar() of glibc is faster than seeking by delta1 on
-               some platforms.  When there is no chance to match for a
-               while, use it on them.  */
-#if defined(__GLIBC__) && (defined(__i386__) || defined(__x86_64__))
-            if (!trans)
-              {
-                tp = memchr (tp - 1, gc1, size + text - tp + 1);
-                if (tp)
-                  {
-                    ++tp;
-                    goto found;
-                  }
-                else
-                  return -1;
-              }
-            else
-#endif
+            if (d != 0)
               {
                 d = d1[U(tp[-1])], tp += d;
                 d = d1[U(tp[-1])], tp += d;
+                d = d1[U(tp[-1])], tp += d;
+                if (d != 0)
+                  {
+                    /* Typically memchr is faster than seeking by
+                       delta1 when there is no chance to match for
+                       a while.  */
+                    tp--;
+                    tp = memchr_trans (tp, gc1, text + size - tp, trans);
+                    if (! tp)
+                      return -1;
+                    tp++;
+                  }
               }
           }
-        break;
-      found:
         if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
           return tp - text;
       }
-- 
1.9.0