m4-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

speed up translit


From: Eric Blake
Subject: speed up translit
Date: Thu, 19 Feb 2009 06:36:54 -0700
User-agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.19) Gecko/20081209 Thunderbird/2.0.0.19 Mnenhy/0.7.6.666

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

This one surprised me, in that the performance gain was actually
noticeable.  Using branch-1.4, and running autoconf on coreutils, I saw
the speed drop from 16.7s to 16.5, an improvement better than 1%.  I
coupled it with an autoconf patch to use reduce some heavily-used 3-byte
translits down to 2 bytes, for another half a percent improvement.  It
turns out that autoconf handles lots of large strings where only one or
two bytes need to be transformed (for example, m4_flatten converts just
newline to space).  The trick is recognizing that for a small conversion
set, the overhead of creating a 256-byte map and processing one byte at a
time is large compared to searching a word at a time for just the bytes
that need processing.

- --
Don't work too hard, make some time for fun as well!

Eric Blake             address@hidden
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (Cygwin)
Comment: Public key at home.comcast.net/~ericblake/eblake.gpg
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iEYEARECAAYFAkmdYHYACgkQ84KuGfSFAYDCrgCfbQpwSZIPmfGP1IDF3g/kmttL
cKUAnA7Zl9gX8udfWdjPVQOvcsKBodzZ
=xffh
-----END PGP SIGNATURE-----
>From 29b625aa941718e43cc04dfc217f314518bbc6d1 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Wed, 18 Feb 2009 17:07:58 -0700
Subject: [PATCH] Speed up translit when from argument is short.

* m4/gnulib-cache.m4: Import memchr2 module.
* src/builtin.c (m4_translit): Use memchr2 when possible.
* doc/m4.texinfo (Translit): Add tests.
* NEWS: Document this.

Signed-off-by: Eric Blake <address@hidden>
---
 ChangeLog          |    6 +++++
 NEWS               |    3 ++
 doc/m4.texinfo     |   21 ++++++++++++++++++
 m4/gnulib-cache.m4 |    3 +-
 src/builtin.c      |   58 ++++++++++++++++++++++++++++++++++++---------------
 5 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 4f2f3d4..a80dfea 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2009-02-18  Eric Blake  <address@hidden>

+       Speed up translit when from argument is short.
+       * m4/gnulib-cache.m4: Import memchr2 module.
+       * src/builtin.c (m4_translit): Use memchr2 when possible.
+       * doc/m4.texinfo (Translit): Add tests.
+       * NEWS: Document this.
+
        Update copyright year.
        * THANKS: Mention 2009 in copyright year.

diff --git a/NEWS b/NEWS
index 5a895bb..31821b7 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,9 @@ Software Foundation, Inc.
 ** The `divert' and `undivert' builtins have been made more efficient
    when using temporary files for large diversions.

+** The `translit' builtin has been made more efficient when the second
+   argument is short.
+
 ** The command line option `--debugfile', introduced in 1.4.7, now
    treats its argument as optional, in order to allow setting the debug
    output back to stderr when used without an argument; and order is now
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 08daafe..10e63ef 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -5780,6 +5780,27 @@ Translit
 translit(`«abc~', `~-»')
 @result{}abc
 @end example
+
address@hidden Stress test short arguments, since they use a different code
address@hidden path.
address@hidden
+translit(`abcdeabcde', `a')
address@hidden
+translit(`abcdeabcde', `ab')
address@hidden
+translit(`abcdeabcde', `a', `f')
address@hidden
+translit(`abcdeabcde', `a', `f')
address@hidden
+translit(`abcdeabcde', `a', `fg')
address@hidden
+translit(`abcdeabcde', `ab', `f')
address@hidden
+translit(`abcdeabcde', `ab', `fg')
address@hidden
+translit(`abcdeabcde', `ab', `ba')
address@hidden
address@hidden example
 @end ignore

 Omitting @var{chars} evokes a warning, but still produces output.
diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4
index b439e8a..4ed53d0 100644
--- a/m4/gnulib-cache.m4
+++ b/m4/gnulib-cache.m4
@@ -15,7 +15,7 @@


 # Specification in the form of a command-line invocation:
-#   gnulib-tool --import --dir=. --local-dir=local --lib=libm4 
--source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests 
--aux-dir=build-aux --with-tests --avoid=lock-tests --avoid=tls-tests 
--no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset 
binary-io c-stack clean-temp cloexec close-stream closein config-h dirname 
error fdl-1.3 fflush filenamecat fopen fopen-safer fseeko gendocs getopt 
git-version-gen gnumakefile gnupload gpl-3.0 intprops mkstemp obstack progname 
regex sigaction stdbool stdint stdlib-safer strsignal strstr strtod strtol 
unlocked-io verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix
+#   gnulib-tool --import --dir=. --local-dir=local --lib=libm4 
--source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests 
--aux-dir=build-aux --with-tests --avoid=lock-tests --avoid=tls-tests 
--no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset 
binary-io c-stack clean-temp cloexec close-stream closein config-h dirname 
error fdl-1.3 fflush filenamecat fopen fopen-safer fseeko gendocs getopt 
git-version-gen gnumakefile gnupload gpl-3.0 intprops memchr2 mkstemp obstack 
progname regex sigaction stdbool stdint stdlib-safer strsignal strstr strtod 
strtol unlocked-io verror version-etc version-etc-fsf xalloc xprintf 
xvasprintf-posix

 # Specification in the form of a few gnulib-tool.m4 macro invocations:
 gl_LOCAL_DIR([local])
@@ -46,6 +46,7 @@ gl_MODULES([
   gnupload
   gpl-3.0
   intprops
+  memchr2
   mkstemp
   obstack
   progname
diff --git a/src/builtin.c b/src/builtin.c
index a89b861..504075a 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -26,6 +26,7 @@

 extern FILE *popen ();

+#include "memchr2.h"
 #include "regex.h"

 #if HAVE_SYS_WAIT_H
@@ -1816,35 +1817,56 @@ expand_ranges (const char *s, struct obstack *obs)
 static void
 m4_translit (struct obstack *obs, int argc, token_data **argv)
 {
-  const char *data;
-  const char *from;
+  const char *data = ARG (1);
+  const char *from = ARG (2);
   const char *to;
-  char map[256] = {0};
-  char found[256] = {0};
+  char map[UCHAR_MAX + 1];
+  char found[UCHAR_MAX + 1];
   unsigned char ch;

-  if (bad_argc (argv[0], argc, 3, 4))
+  if (bad_argc (argv[0], argc, 3, 4) || !*from)
     {
       /* builtin(`translit') is blank, but translit(`abc') is abc.  */
-      if (argc == 2)
-       obstack_grow (obs, ARG (1), strlen (ARG (1)));
+      if (argc <= 2)
+       obstack_grow (obs, data, strlen (data));
       return;
     }

-  from = ARG (2);
-  if (strchr (from, '-') != NULL)
-    {
-      from = expand_ranges (from, obs);
-      if (from == NULL)
-       return;
-    }
-
   to = ARG (3);
   if (strchr (to, '-') != NULL)
     {
       to = expand_ranges (to, obs);
-      if (to == NULL)
-       return;
+      assert (to && *to);
+    }
+
+  /* If there are only one or two bytes to replace, it is faster to
+     use memchr2.  Using expand_ranges does nothing unless there are
+     at least three bytes.  */
+  if (!from[1] || !from[2])
+    {
+      const char *p;
+      size_t len = strlen (data);
+      while ((p = (char *) memchr2 (data, from[0], from[1], len)))
+       {
+         obstack_grow (obs, data, p - data);
+         len -= p - data;
+         if (!len)
+           return;
+         data = p + 1;
+         len--;
+         if (*p == from[0] && to[0])
+           obstack_1grow (obs, to[0]);
+         else if (*p == from[1] && to[0] && to[1])
+           obstack_1grow (obs, to[1]);
+       }
+      obstack_grow (obs, data, len);
+      return;
+    }
+
+  if (strchr (from, '-') != NULL)
+    {
+      from = expand_ranges (from, obs);
+      assert (from && *from);
     }

   /* Calling strchr(from) for each character in data is quadratic,
@@ -1853,6 +1875,8 @@ m4_translit (struct obstack *obs, int argc, token_data 
**argv)
      pass of data, for linear behavior.  Traditional behavior is that
      only the first instance of a character in from is consulted,
      hence the found map.  */
+  memset (map, 0, sizeof map);
+  memset (found, 0, sizeof found);
   for ( ; (ch = *from) != '\0'; from++)
     {
       if (! found[ch])
-- 
1.6.1.2


reply via email to

[Prev in Thread] Current Thread [Next in Thread]