m4-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Changes to m4/modules/gnu.c,v


From: Eric Blake
Subject: Changes to m4/modules/gnu.c,v
Date: Tue, 09 Oct 2007 20:06:52 +0000

CVSROOT:        /sources/m4
Module name:    m4
Changes by:     Eric Blake <ericb>      07/10/09 20:06:52

Index: modules/gnu.c
===================================================================
RCS file: /sources/m4/m4/modules/gnu.c,v
retrieving revision 1.76
retrieving revision 1.77
diff -u -b -r1.76 -r1.77
--- modules/gnu.c       30 Sep 2007 01:31:38 -0000      1.76
+++ modules/gnu.c       9 Oct 2007 20:06:52 -0000       1.77
@@ -102,62 +102,124 @@
 
 
 
-/* The regs_allocated field in an re_pattern_buffer refers to the
-   state of the re_registers struct used in successive matches with
-   the same compiled pattern:  */
+/* Regular expressions.  Reuse re_registers among multiple
+   re_pattern_buffer allocations to reduce malloc usage.  */
 
+/* Maybe this is worth making runtime tunable.  Too small, and nothing
+   gets cached because the working set of active regex is larger than
+   the cache, and we are always swapping out entries.  Too large, and
+   the time spent searching the cache for a match overtakes the time
+   saved by caching.  For now, this size proved reasonable for the
+   typical working set of Autoconf 2.62.  */
+#define REGEX_CACHE_SIZE 16
+
+/* Structure for using a compiled regex, as well as making it easier
+   to cache frequently used expressions.  */
 typedef struct {
-  struct re_pattern_buffer pat;        /* compiled regular expression */
-  struct re_registers regs;    /* match registers */
+  unsigned count;                      /* usage counter */
+  int resyntax;                                /* flavor of regex */
+  size_t len;                          /* length of string */
+  char *str;                           /* copy of compiled string */
+  struct re_pattern_buffer *pat;       /* compiled regex, allocated */
+  struct re_registers regs;            /* match registers, reused */
 } m4_pattern_buffer;
 
-static m4_pattern_buffer gnu_buf;      /* compiled regular expression */
+/* Storage for the cache of regular expressions.  */
+static m4_pattern_buffer regex_cache[REGEX_CACHE_SIZE];
 
-/* Compile a REGEXP using the RESYNTAX bits, and return the buffer.
-   Report errors on behalf of CALLER.  If NO_SUB, optimize the
-   compilation to skip filling out the regs member of the buffer.  */
+/* Compile a REGEXP using the RESYNTAX flavor, and return the buffer.
+   On error, report the problem on behalf of CALLER, and return
+   NULL.  */
 
 static m4_pattern_buffer *
-m4_regexp_compile (m4 *context, const char *caller,
-                  const char *regexp, int resyntax, bool no_sub)
+regexp_compile (m4 *context, const char *caller, const char *regexp,
+               int resyntax)
 {
-  /* gnu_buf is guaranteed to start life 0-initialized, which works in the
-     below algorithm.
+  /* regex_cache is guaranteed to start life 0-initialized, which
+     works in the algorithm below.
 
      FIXME - this method is not reentrant, since re_compile_pattern
      mallocs memory, depends on the global variable re_syntax_options
      for its syntax (but at least the compiled regex remembers its
      syntax even if the global variable changes later), and since we
      use a static variable.  To be reentrant, we would need a mutex in
-     this method, and move the storage for gnu_buf into context.  */
+     this method, and move the storage for regex_cache into context.  */
 
   const char *msg;             /* error message from re_compile_pattern */
+  int i;                       /* iterator */
+  m4_pattern_buffer *victim;   /* cache slot to replace */
+  unsigned victim_count;       /* track which victim to replace */
+  struct re_pattern_buffer *pat;/* newly compiled regex */
+  size_t len = strlen (regexp);        /* regex length */
+
+  /* First, check if REGEXP is already cached with the given RESYNTAX.
+     If so, increase its use count and return it.  */
+  for (i = 0; i < REGEX_CACHE_SIZE; i++)
+    if (len == regex_cache[i].len && resyntax == regex_cache[i].resyntax
+       && regex_cache[i].str && memcmp (regexp, regex_cache[i].str, len) == 0)
+      {
+       regex_cache[i].count++;
+       return &regex_cache[i];
+      }
 
+  /* Next, check if REGEXP can be compiled.  */
+  pat = xzalloc (sizeof *pat);
   re_set_syntax (resyntax);
-  regfree (&gnu_buf.pat);
-  gnu_buf.pat.no_sub = no_sub;
-  msg = re_compile_pattern (regexp, strlen (regexp), &gnu_buf.pat);
+  msg = re_compile_pattern (regexp, len, pat);
 
   if (msg != NULL)
     {
       m4_error (context, 0, 0, _("%s: bad regular expression `%s': %s"),
                caller, regexp, msg);
+      regfree (pat);
+      free (pat);
       return NULL;
     }
 
-  re_set_registers (&gnu_buf.pat, &gnu_buf.regs, gnu_buf.regs.num_regs,
-                   gnu_buf.regs.start, gnu_buf.regs.end);
-  return &gnu_buf;
+  /* Now, find a victim slot.  Decrease the count of all entries, then
+     prime the count of the victim slot at REGEX_CACHE_SIZE.  This
+     way, frequently used entries and newly created entries are least
+     likely to be victims next time we have a cache miss.  */
+  victim = regex_cache;
+  victim_count = victim->count;
+  if (victim_count)
+    victim->count--;
+  for (i = 1; i < REGEX_CACHE_SIZE; i++)
+    {
+      if (regex_cache[i].count < victim_count)
+       {
+         victim_count = regex_cache[i].count;
+         victim = &regex_cache[i];
+       }
+      if (regex_cache[i].count)
+       regex_cache[i].count--;
+    }
+  victim->count = REGEX_CACHE_SIZE;
+  victim->resyntax = resyntax;
+  victim->len = len;
+  if (victim->str)
+    {
+      free (victim->str);
+      regfree (victim->pat);
+      free (victim->pat);
+    }
+  victim->str = xstrdup (regexp);
+  victim->pat = pat;
+  re_set_registers (pat, &victim->regs, victim->regs.num_regs,
+                   victim->regs.start, victim->regs.end);
+  return victim;
 }
 
 
-/* Wrap up GNU Regex re_search call to work with an m4_pattern_buffer.  */
+/* Wrap up GNU Regex re_search call to work with an m4_pattern_buffer.
+   If NO_SUB, then storing matches in buf->regs is not necessary.  */
 
 static int
-m4_regexp_search (m4_pattern_buffer *buf, const char *string,
-                 const int size, const int start, const int range)
+regexp_search (m4_pattern_buffer *buf, const char *string, const int size,
+              const int start, const int range, bool no_sub)
 {
-  return re_search (&buf->pat, string, size, start, range, &buf->regs);
+  return re_search (buf->pat, string, size, start, range,
+                   no_sub ? NULL : &buf->regs);
 }
 
 
@@ -192,7 +254,7 @@
        case '1': case '2': case '3': case '4': case '5': case '6':
        case '7': case '8': case '9':
          ch -= '0';
-         if (buf->pat.re_nsub < ch)
+         if (buf->pat->re_nsub < ch)
            m4_warn (context, 0, _("%s: sub-expression %d not present"),
                     caller, ch);
          else if (buf->regs.end[ch] > 0)
@@ -214,14 +276,14 @@
 
 
 /* For each match against compiled REGEXP (held in BUF -- as returned
-   by m4_regexp_compile) in VICTIM, substitute REPLACE.  Non-matching
+   by regexp_compile) in VICTIM, substitute REPLACE.  Non-matching
    characters are copied verbatim, and the result copied to the
    obstack.  Errors are reported on behalf of CALLER.  Return true if
    a substitution was made.  If IGNORE_DUPLICATES is set, don't worry
    about completing the obstack when returning false.  */
 
 static bool
-m4_regexp_substitute (m4 *context, m4_obstack *obs, const char *caller,
+regexp_substitute (m4 *context, m4_obstack *obs, const char *caller,
                      const char *victim, const char *regexp,
                      m4_pattern_buffer *buf, const char *replace,
                      bool ignore_duplicates)
@@ -233,8 +295,8 @@
 
   while (offset <= length)
     {
-      matchpos = m4_regexp_search (buf, victim, length,
-                                  offset, length - offset);
+      matchpos = regexp_search (buf, victim, length, offset, length - offset,
+                               false);
 
       if (matchpos < 0)
        {
@@ -284,12 +346,19 @@
 /* Reclaim memory used by this module.  */
 M4FINISH_HANDLER(gnu)
 {
-  regfree (&gnu_buf.pat);
-  free (gnu_buf.regs.start);
-  free (gnu_buf.regs.end);
+  int i;
+  for (i = 0; i < REGEX_CACHE_SIZE; i++)
+    if (regex_cache[i].str)
+      {
+       free (regex_cache[i].str);
+       regfree (regex_cache[i].pat);
+       free (regex_cache[i].pat);
+       free (regex_cache[i].regs.start);
+       free (regex_cache[i].regs.end);
+      }
   /* If this module was preloaded, then we need to explicitly reset
      the memory in case it gets reloaded.  */
-  memset (&gnu_buf, 0, sizeof gnu_buf);
+  memset (&regex_cache, 0, sizeof regex_cache);
 }
 
 
@@ -672,11 +741,11 @@
       return;
     }
 
-  buf = m4_regexp_compile (context, me, pattern, resyntax, false);
+  buf = regexp_compile (context, me, pattern, resyntax);
   if (!buf)
     return;
 
-  m4_regexp_substitute (context, obs, me, M4ARG (1), pattern, buf,
+  regexp_substitute (context, obs, me, M4ARG (1), pattern, buf,
                        replace, false);
 }
 
@@ -741,12 +810,12 @@
       return;
     }
 
-  buf = m4_regexp_compile (context, me, pattern, resyntax, replace == NULL);
+  buf = regexp_compile (context, me, pattern, resyntax);
   if (!buf)
     return;
 
   length = strlen (M4ARG (1));
-  startpos = m4_regexp_search (buf, M4ARG (1), length, 0, length);
+  startpos = regexp_search (buf, M4ARG (1), length, 0, length, replace == 
NULL);
 
   if (startpos == -2)
     {
@@ -797,7 +866,7 @@
            return;
        }
 
-      buf = m4_regexp_compile (context, me, regexp, resyntax, false);
+      buf = regexp_compile (context, me, regexp, resyntax);
       if (!buf)
        return;
 
@@ -810,7 +879,7 @@
        {
          const char *name = data.base[0];
 
-         if (m4_regexp_substitute (context, &rename_obs, me, name, regexp,
+         if (regexp_substitute (context, &rename_obs, me, name, regexp,
                                    buf, replace, true))
            {
              const char *renamed = obstack_finish (&rename_obs);




reply via email to

[Prev in Thread] Current Thread [Next in Thread]