m4-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] GNU M4 source repository branch, branch-1.6, updated. v1.5.89a-101


From: Eric Blake
Subject: [SCM] GNU M4 source repository branch, branch-1.6, updated. v1.5.89a-101-gd7aec39
Date: Fri, 20 Feb 2009 00:08:21 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU M4 source repository".

http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=d7aec39c918085b5d9ad90fd5f02b55492ff6f15

The branch, branch-1.6 has been updated
       via  d7aec39c918085b5d9ad90fd5f02b55492ff6f15 (commit)
      from  9429358695faabee31d0413cee0d2376c7c73048 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit d7aec39c918085b5d9ad90fd5f02b55492ff6f15
Author: Eric Blake <address@hidden>
Date:   Wed Feb 18 17:07:58 2009 -0700

    29b625aa941718e43cc04dfc217f314518bbc6d1
    Speed up translit when from argument is short.
    
    * src/builtin.c (m4_translit): Use memchr2 when possible.
    * doc/m4.texinfo (Translit): Add tests.
    * NEWS: Document this.
    
    Signed-off-by: Eric Blake <address@hidden>
    (cherry picked from commit 29b625aa941718e43cc04dfc217f314518bbc6d1)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog      |    7 +++++++
 NEWS           |    7 +++++--
 doc/m4.texinfo |   27 +++++++++++++++++++++++++++
 src/builtin.c  |   35 ++++++++++++++++++++++++++++++-----
 4 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 376ef68..3e58472 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2009-02-19  Eric Blake  <address@hidden>
+
+       Speed up translit when from argument is short.
+       * src/builtin.c (m4_translit): Use memchr2 when possible.
+       * doc/m4.texinfo (Translit): Add tests.
+       * NEWS: Document this.
+
 2009-02-18  Eric Blake  <address@hidden>
 
        Prefer buffer over byte operations.
diff --git a/NEWS b/NEWS
index d4839f5..8cdd193 100644
--- a/NEWS
+++ b/NEWS
@@ -28,8 +28,6 @@ Software Foundation, Inc.
    be silenced by applying this patch:
      http://git.sv.gnu.org/gitweb/?p=autoconf.git;a=commitdiff;h=714eeee87
 
-** Improve the speed of the input engine.
-
 ** Fix the `m4wrap' builtin to accumulate wrapped text in FIFO order, as
    required by POSIX.  The manual mentions a way to restore the LIFO order
    present in earlier GNU M4 versions.  NOTE: this change exposes a bug
@@ -159,6 +157,11 @@ contains the following beta features that were deemed 
worth deferring until
 ** The `divert' and `undivert' builtins have been made more efficient
    when using temporary files for large diversions.
 
+** The `translit' builtin has been made more efficient when the second
+   argument is short.
+
+** The input engine has been optimized for faster processing.
+
 ** The command line option `--debugfile', introduced in 1.4.7, now
    treats its argument as optional, in order to allow setting the debug
    output back to stderr when used without an argument; and order is now
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 3da0443..6b0620c 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -6546,6 +6546,33 @@ resulting @samp{b} is not further remapped to @samp{g}; 
the @samp{d} and
 translit(`«abc~', `~-»')
 @result{}abc
 @end example
+
address@hidden Stress test short arguments, since they use a different code
address@hidden path.
address@hidden
+translit(`abcdeabcde', `a')
address@hidden
+translit(`abcdeabcde', `ab')
address@hidden
+translit(`abcdeabcde', `a', `f')
address@hidden
+translit(`abcdeabcde', `a', `f')
address@hidden
+translit(`abcdeabcde', `a', `fg')
address@hidden
+translit(`abcdeabcde', `ab', `f')
address@hidden
+translit(`abcdeabcde', `ab', `fg')
address@hidden
+translit(`abcdeabcde', `ab', `ba')
address@hidden
+translit(`abcdeabcde', `e', `f')
address@hidden
+translit(`abc', `', `cde')
address@hidden
+translit(`', `a', `bc')
address@hidden
address@hidden example
 @end ignore
 
 Omitting @var{chars} evokes a warning, but still produces output.
diff --git a/src/builtin.c b/src/builtin.c
index 0a62f99..6c1673d 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -24,6 +24,7 @@
 
 #include "m4.h"
 
+#include "memchr2.h"
 #include "regex.h"
 
 #if HAVE_SYS_WAIT_H
@@ -2086,8 +2087,8 @@ m4_translit (struct obstack *obs, int argc, 
macro_arguments *argv)
   const char *to;
   size_t from_len;
   size_t to_len;
-  char map[UCHAR_MAX + 1] = {0};
-  char found[UCHAR_MAX + 1] = {0};
+  char map[UCHAR_MAX + 1];
+  char found[UCHAR_MAX + 1];
   unsigned char ch;
 
   enum { ASIS, REPLACE, DELETE };
@@ -2103,15 +2104,37 @@ m4_translit (struct obstack *obs, int argc, 
macro_arguments *argv)
 
   from = ARG (2);
   from_len = ARG_LEN (2);
-  if (memchr (from, '-', from_len) != NULL)
-    from = expand_ranges (from, &from_len, arg_scratch ());
 
   to = ARG (3);
   to_len = ARG_LEN (3);
   if (memchr (to, '-', to_len) != NULL)
     to = expand_ranges (to, &to_len, arg_scratch ());
 
-  assert (from && to);
+  /* If there are only one or two bytes to replace, it is faster to
+     use memchr2.  Using expand_ranges does nothing unless there are
+     at least three bytes.  */
+  if (from_len <= 2)
+    {
+      const char *p;
+      size_t len = ARG_LEN (1);
+      int second = from[from_len / 2];
+      data = ARG (1);
+      while ((p = (char *) memchr2 (data, from[0], second, len)))
+       {
+         obstack_grow (obs, data, p - data);
+         len -= p - data + 1;
+         data = p + 1;
+         if (*p == from[0] && to_len)
+           obstack_1grow (obs, to[0]);
+         else if (*p == second && 1 < to_len)
+           obstack_1grow (obs, to[1]);
+       }
+      obstack_grow (obs, data, len);
+      return;
+    }
+
+  if (memchr (from, '-', from_len) != NULL)
+    from = expand_ranges (from, &from_len, arg_scratch ());
 
   /* Calling strchr(from) for each character in data is quadratic,
      since both strings can be arbitrarily long.  Instead, create a
@@ -2119,6 +2142,8 @@ m4_translit (struct obstack *obs, int argc, 
macro_arguments *argv)
      pass of data, for linear behavior.  Traditional behavior is that
      only the first instance of a character in from is consulted,
      hence the found map.  */
+  memset (map, 0, sizeof map);
+  memset (found, 0, sizeof found);
   while (from_len--)
     {
       ch = *from++;


hooks/post-receive
--
GNU M4 source repository




reply via email to

[Prev in Thread] Current Thread [Next in Thread]