m4-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] GNU M4 source repository branch, branch-1_4, updated. 4f74b635626f


From: Eric Blake
Subject: [SCM] GNU M4 source repository branch, branch-1_4, updated. 4f74b635626f8628301a77b6a4926b1b0738ef2b
Date: Tue, 09 Oct 2007 16:50:29 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU M4 source repository".

http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=4f74b635626f8628301a77b6a4926b1b0738ef2b

The branch, branch-1_4 has been updated
       via  4f74b635626f8628301a77b6a4926b1b0738ef2b (commit)
       via  10f18604f16b6dc60803230289a02151abf9c778 (commit)
      from  3be7d5f421c547a2118a4fc703c37ff648b55c55 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 4f74b635626f8628301a77b6a4926b1b0738ef2b
Author: Eric Blake <address@hidden>
Date:   Tue Oct 9 10:25:37 2007 -0600

    Fix regexp regression of 2007-09-29.
    
    * src/builtin.c (substitute): Allow NULL regs when no
    subexpressions were present.
    (m4_regexp): Handle \ escapes even with empty regex.
    * doc/m4.texinfo (Regexp, Patsubst): Catch this bug.
    
    Signed-off-by: Eric Blake <address@hidden>

commit 10f18604f16b6dc60803230289a02151abf9c778
Author: Eric Blake <address@hidden>
Date:   Tue Oct 9 08:31:33 2007 -0600

    Cache regex compilation for another autoconf speedup.
    
    * src/m4.h (free_macro_sequence): Rename...
    (free_regex): ...to this.
    * src/m4.c (main): Update caller.
    * src/builtin.c (REGEX_CACHE_SIZE, m4_regex, regex_cache): New
    declarations.
    (compile_pattern): New function; cache recent regexes.
    (free_regex): Rename, and clean up additional memory.
    (m4_regexp, m4_patsubst): Use new function.
    
    Signed-off-by: Eric Blake <address@hidden>

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog      |   18 ++++++
 doc/m4.texinfo |    8 +-
 src/builtin.c  |  185 ++++++++++++++++++++++++++++++++++++++++++--------------
 src/m4.c       |    2 +-
 src/m4.h       |    2 +-
 5 files changed, 164 insertions(+), 51 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 7c9755a..42bbaeb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2007-10-09  Eric Blake  <address@hidden>
+
+       Fix regexp regression of 2007-09-29.
+       * src/builtin.c (substitute): Allow NULL regs when no
+       subexpressions were present.
+       (m4_regexp): Handle \ escapes even with empty regex.
+       * doc/m4.texinfo (Regexp, Patsubst): Catch this bug.
+
+       Cache regex compilation for another autoconf speedup.
+       * src/m4.h (free_macro_sequence): Rename...
+       (free_regex): ...to this.
+       * src/m4.c (main): Update caller.
+       * src/builtin.c (REGEX_CACHE_SIZE, m4_regex, regex_cache): New
+       declarations.
+       (compile_pattern): New function; cache recent regexes.
+       (free_regex): Rename, and clean up additional memory.
+       (m4_regexp, m4_patsubst): Use new function.
+
 2007-10-02  Eric Blake  <address@hidden>
 
        Document quoting pitfalls in capitalize.
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 1bc5be9..64e37ba 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -4703,8 +4703,8 @@ regexp(`abc')
 @result{}0
 regexp(`abc', `')
 @result{}0
-regexp(`abc', `', `def')
address@hidden
+regexp(`abc', `', `\\def')
address@hidden
 @end example
 
 @node Substr
@@ -4951,8 +4951,8 @@ patsubst(`abc')
 @result{}abc
 patsubst(`abc', `')
 @result{}abc
-patsubst(`abc', `', `-')
address@hidden
+patsubst(`abc', `', `\\-')
address@hidden
 @end example
 
 @node Format
diff --git a/src/builtin.c b/src/builtin.c
index 8974b1a..75859bd 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -231,6 +231,102 @@ static struct re_registers macro_sequence_regs;
 /* True if --warn-macro-sequence is in effect.  */
 static bool macro_sequence_inuse;
 
+/* Maybe this is worth making runtime tunable.  Too small, and nothing
+   gets cached because the working set of active regex is larger than
+   the cache, and we are always swapping out entries.  Too large, and
+   the time spent searching the cache for a match overtakes the time
+   saved by caching.  For now, this size proved reasonable for the
+   typical working set of Autoconf 2.62.  */
+#define REGEX_CACHE_SIZE 16
+
+/* Structure for caching compiled regex.  */
+struct m4_regex {
+  unsigned count;                      /* usage counter */
+  size_t len;                          /* length of string */
+  char *str;                           /* copy of compiled string */
+  struct re_pattern_buffer *buf;       /* compiled regex, allocated */
+  struct re_registers regs;            /* match registers, reused */
+};
+typedef struct m4_regex m4_regex;
+
+/* Storage for the cache of regular expressions.  */
+static m4_regex regex_cache[REGEX_CACHE_SIZE];
+
+/*------------------------------------------------------------------.
+| Compile STR, with length LEN, into a regex.  On success, set BUF  |
+| and REGS to the compiled regex.  Compilation is cached, so do not |
+| free the results here; rather, use free_regex at the end of the   |
+| program.  Return NULL on success, or an error message.           |
+`------------------------------------------------------------------*/
+static const char *
+compile_pattern (const char *str, size_t len, struct re_pattern_buffer **buf,
+                struct re_registers **regs)
+{
+  int i;
+  m4_regex *victim;
+  unsigned victim_count;
+  struct re_pattern_buffer *new_buf;
+  struct re_registers *new_regs;
+  const char *msg;
+
+  /* First, check if STR is already cached.  If so, increase its use
+     count and return it.  */
+  for (i = 0; i < REGEX_CACHE_SIZE; i++)
+    if (len == regex_cache[i].len && regex_cache[i].str
+       && memcmp (str, regex_cache[i].str, len) == 0)
+      {
+       *buf = regex_cache[i].buf;
+       *regs = &regex_cache[i].regs;
+       regex_cache[i].count++;
+       return NULL;
+      }
+
+  /* Next, check if STR can be compiled.  */
+  new_buf = xzalloc (sizeof *new_buf);
+  msg = re_compile_pattern (str, len, new_buf);
+  if (msg)
+    {
+      regfree (new_buf);
+      free (new_buf);
+      return msg;
+    }
+
+  /* Now, find a victim slot.  Decrease the count of all entries, then
+     prime the count of the victim slot at REGEX_CACHE_SIZE.  This
+     way, frequently used entries and newly created entries are least
+     likely to be victims next time we have a cache miss.  */
+  victim = regex_cache;
+  victim_count = victim->count;
+  if (victim_count)
+    victim->count--;
+  for (i = 1; i < REGEX_CACHE_SIZE; i++)
+    {
+      if (regex_cache[i].count < victim_count)
+       {
+         victim_count = regex_cache[i].count;
+         victim = &regex_cache[i];
+       }
+      if (regex_cache[i].count)
+       regex_cache[i].count--;
+    }
+  victim->count = REGEX_CACHE_SIZE;
+  victim->len = len;
+  if (victim->str)
+    {
+      free (victim->str);
+      regfree (victim->buf);
+      free (victim->buf);
+    }
+  victim->str = xstrdup (str);
+  victim->buf = new_buf;
+  new_regs = &victim->regs;
+  re_set_registers (new_buf, new_regs, new_regs->num_regs,
+                   new_regs->start, new_regs->end);
+  *buf = new_buf;
+  *regs = new_regs;
+  return NULL;
+}
+
 /*----------------------------------------.
 | Clean up regular expression variables.  |
 `----------------------------------------*/
@@ -273,14 +369,21 @@ set_macro_sequence (const char *regexp)
   macro_sequence_inuse = true;
 }
 
-/*------------------------------------------------------------.
-| Free dynamic memory utilized by the define sequence regular |
-| expression.                                                |
-`------------------------------------------------------------*/
+/*------------------------------------------------------.
+| Free dynamic memory utilized by regular expressions.  |
+`------------------------------------------------------*/
 void
-free_macro_sequence (void)
+free_regex (void)
 {
+  int i;
   free_pattern_buffer (&macro_sequence_buf, &macro_sequence_regs);
+  for (i = 0; i < REGEX_CACHE_SIZE; i++)
+    if (regex_cache[i].str)
+      {
+       free (regex_cache[i].str);
+       free_pattern_buffer (regex_cache[i].buf, &regex_cache[i].regs);
+       free (regex_cache[i].buf);
+      }
 }
 
 /*-------------------------------------------------------------------------.
@@ -1906,14 +2009,15 @@ Warning: \\0 will disappear, use \\& instead in 
replacements"));
          /* Fall through.  */
 
        case '&':
-         obstack_grow (obs, victim + regs->start[0],
-                       regs->end[0] - regs->start[0]);
+         if (regs)
+           obstack_grow (obs, victim + regs->start[0],
+                         regs->end[0] - regs->start[0]);
          break;
 
        case '1': case '2': case '3': case '4': case '5': case '6':
        case '7': case '8': case '9':
          ch -= '0';
-         if (regs->num_regs - 1 <= ch)
+         if (!regs || regs->num_regs - 1 <= ch)
            M4ERROR ((warning_status, 0,
                      "Warning: sub-expression %d not present", ch));
          else if (regs->end[ch] > 0)
@@ -1951,12 +2055,12 @@ init_pattern_buffer (struct re_pattern_buffer *buf, 
struct re_registers *regs)
     }
 }
 
-/*--------------------------------------------------------------------------.
-| Regular expression version of index.  Given two arguments, expand to the  |
-| index of the first match of the second argument (a regexp) in the first.  |
-| Expand to -1 if here is no match.  Given a third argument, is changes        
    |
-| the expansion to this argument.                                          |
-`--------------------------------------------------------------------------*/
+/*------------------------------------------------------------------.
+| Regular expression version of index.  Given two arguments, expand |
+| to the index of the first match of the second argument (a regexp) |
+| in the first.  Expand to -1 if there is no match.  Given a third  |
+| argument, a match is substituted according to this argument.      |
+`------------------------------------------------------------------*/
 
 static void
 m4_regexp (struct obstack *obs, int argc, token_data **argv)
@@ -1965,8 +2069,8 @@ m4_regexp (struct obstack *obs, int argc, token_data 
**argv)
   const char *regexp;          /* regular expression */
   const char *repl;            /* replacement string */
 
-  struct re_pattern_buffer buf;        /* compiled regular expression */
-  struct re_registers regs;    /* for subexpression matches */
+  struct re_pattern_buffer *buf;/* compiled regular expression */
+  struct re_registers *regs;   /* for subexpression matches */
   const char *msg;             /* error message from re_compile_pattern */
   int startpos;                        /* start position of match */
   int length;                  /* length of first argument */
@@ -1989,25 +2093,22 @@ m4_regexp (struct obstack *obs, int argc, token_data 
**argv)
       if (argc == 3)
        shipout_int (obs, 0);
       else
-       obstack_grow (obs, repl, strlen (repl));
+       substitute (obs, victim, repl, NULL);
       return;
     }
 
-  init_pattern_buffer (&buf, &regs);
-  msg = re_compile_pattern (regexp, strlen (regexp), &buf);
-
+  msg = compile_pattern (regexp, strlen (regexp), &buf, &regs);
   if (msg != NULL)
     {
       M4ERROR ((warning_status, 0,
                "bad regular expression: `%s': %s", regexp, msg));
-      free_pattern_buffer (&buf, &regs);
       return;
     }
 
   length = strlen (victim);
   /* Avoid overhead of allocating regs if we won't use it.  */
-  startpos = re_search (&buf, victim, length, 0, length,
-                       argc == 3 ? NULL : &regs);
+  startpos = re_search (buf, victim, length, 0, length,
+                       argc == 3 ? NULL : regs);
 
   if (startpos == -2)
     M4ERROR ((warning_status, 0,
@@ -2015,17 +2116,16 @@ m4_regexp (struct obstack *obs, int argc, token_data 
**argv)
   else if (argc == 3)
     shipout_int (obs, startpos);
   else if (startpos >= 0)
-    substitute (obs, victim, repl, &regs);
-
-  free_pattern_buffer (&buf, &regs);
+    substitute (obs, victim, repl, regs);
 }
 
-/*--------------------------------------------------------------------------.
-| Substitute all matches of a regexp occuring in a string.  Each match of   |
-| the second argument (a regexp) in the first argument is changed to the    |
-| third argument, with \& substituted by the matched text, and \N          |
-| substituted by the text matched by the Nth parenthesized sub-expression.  |
-`--------------------------------------------------------------------------*/
+/*------------------------------------------------------------------.
+| Substitute all matches of a regexp occurring in a string.  Each   |
+| match of the second argument (a regexp) in the first argument is  |
+| changed to the third argument, with \& substituted by the matched |
+| text, and \N substituted by the text matched by the Nth           |
+| parenthesized sub-expression.                                     |
+`------------------------------------------------------------------*/
 
 static void
 m4_patsubst (struct obstack *obs, int argc, token_data **argv)
@@ -2034,8 +2134,8 @@ m4_patsubst (struct obstack *obs, int argc, token_data 
**argv)
   const char *regexp;          /* regular expression */
   const char *repl;
 
-  struct re_pattern_buffer buf;        /* compiled regular expression */
-  struct re_registers regs;    /* for subexpression matches */
+  struct re_pattern_buffer *buf;/* compiled regular expression */
+  struct re_registers *regs;   /* for subexpression matches */
   const char *msg;             /* error message from re_compile_pattern */
   int matchpos;                        /* start position of match */
   int offset;                  /* current match offset */
@@ -2061,14 +2161,11 @@ m4_patsubst (struct obstack *obs, int argc, token_data 
**argv)
       return;
     }
 
-  init_pattern_buffer (&buf, &regs);
-  msg = re_compile_pattern (regexp, strlen (regexp), &buf);
-
+  msg = compile_pattern (regexp, strlen (regexp), &buf, &regs);
   if (msg != NULL)
     {
       M4ERROR ((warning_status, 0,
                "bad regular expression `%s': %s", regexp, msg));
-      free (buf.buffer);
       return;
     }
 
@@ -2078,8 +2175,8 @@ m4_patsubst (struct obstack *obs, int argc, token_data 
**argv)
   matchpos = 0;
   while (offset <= length)
     {
-      matchpos = re_search (&buf, victim, length,
-                           offset, length - offset, &regs);
+      matchpos = re_search (buf, victim, length,
+                           offset, length - offset, regs);
       if (matchpos < 0)
        {
 
@@ -2102,19 +2199,17 @@ m4_patsubst (struct obstack *obs, int argc, token_data 
**argv)
 
       /* Handle the part of the string that was covered by the match.  */
 
-      substitute (obs, victim, repl, &regs);
+      substitute (obs, victim, repl, regs);
 
       /* Update the offset to the end of the match.  If the regexp
         matched a null string, advance offset one more, to avoid
         infinite loops.  */
 
-      offset = regs.end[0];
-      if (regs.start[0] == regs.end[0])
+      offset = regs->end[0];
+      if (regs->start[0] == regs->end[0])
        obstack_1grow (obs, victim[offset++]);
     }
   obstack_1grow (obs, '\0');
-
-  free_pattern_buffer (&buf, &regs);
 }
 
 /* Finally, a placeholder builtin.  This builtin is not installed by
diff --git a/src/m4.c b/src/m4.c
index 2d5ced0..daa15a8 100644
--- a/src/m4.c
+++ b/src/m4.c
@@ -590,6 +590,6 @@ main (int argc, char *const *argv, char *const *envp)
       undivert_all ();
     }
   output_exit ();
-  free_macro_sequence ();
+  free_regex ();
   exit (retcode);
 }
diff --git a/src/m4.h b/src/m4.h
index 3d1178f..38f9d09 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -416,7 +416,7 @@ struct re_registers;
 void builtin_init (void);
 void define_builtin (const char *, const builtin *, symbol_lookup);
 void set_macro_sequence (const char *);
-void free_macro_sequence (void);
+void free_regex (void);
 void define_user_macro (const char *, const char *, symbol_lookup);
 void undivert_all (void);
 void expand_user_macro (struct obstack *, symbol *, int, token_data **);


hooks/post-receive
--
GNU M4 source repository




reply via email to

[Prev in Thread] Current Thread [Next in Thread]