m4-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

head: Re: branch-1_4 regexp coredump


From: Eric Blake
Subject: head: Re: branch-1_4 regexp coredump
Date: Mon, 21 Aug 2006 00:12:28 +0000 (UTC)
User-agent: Loom/3.14 (http://gmane.org/)

Eric Blake <ebb9 <at> byu.net> writes:

> 2006-08-18  Eric Blake  <ebb9 <at> byu.net>
> 
>       Regular expressions were leaking memory.
>       * src/builtin.c (init_pattern_buffer, free_pattern_buffer): New
>       helper methods.

Ported to head as follows (fortunately, I did not need to port the changeword
stuff, since that is no longer in head).

2006-08-20  Eric Blake  <address@hidden>

        * modules/gnu.c (includes): Assume stdlib.h, errno.
        (m4_regexp_compile): Add no_sub parameter, avoid memory leaks.
        (substitute): Add caller parameter, avoid out-of-bounds memory
        references.
        (m4_regexp_substitute, patsubst, regexp, renamesyms): Adjust
        callers.

Index: modules/gnu.c
===================================================================
RCS file: /sources/m4/m4/modules/gnu.c,v
retrieving revision 1.46
diff -u -p -p -r1.46 gnu.c
--- modules/gnu.c       9 Aug 2006 21:33:24 -0000       1.46
+++ modules/gnu.c       21 Aug 2006 00:08:09 -0000
@@ -21,21 +21,13 @@
 #  include <config.h>
 #endif
 
-#include <ctype.h>
-
-#if HAVE_STDLIB_H
-#  include <stdlib.h>
-#endif
-
 #include <m4module.h>
 #include <modules/m4.h>
 
-#include <errno.h>
-#ifndef errno
-int errno;
-#endif
-
 #include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
 
 #ifdef NDEBUG
 #  include "m4private.h"
@@ -64,7 +56,7 @@ int errno;
        BUILTIN(patsubst,       false,  true,   3,      5  )    \
        BUILTIN(regexp,         false,  true,   3,      5  )    \
        BUILTIN(renamesyms,     false,  true,   3,      4  )    \
-       BUILTIN(symbols,        false,  false,  0,      -1 )    \
+       BUILTIN(symbols,        false,  false,  1,      -1 )    \
        BUILTIN(syncoutput,     false,  true,   2,      2  )    \
 
 
@@ -114,26 +106,30 @@ typedef struct {
 
 
 /* Compile a REGEXP using the RESYNTAX bits, and return the buffer.
-   Report errors on behalf of CALLER.  */
+   Report errors on behalf of CALLER.  If NO_SUB, optimize the
+   compilation to skip filling out the regs member of the buffer.  */
 
 static m4_pattern_buffer *
 m4_regexp_compile (m4 *context, const char *caller,
-                  const char *regexp, int resyntax)
+                  const char *regexp, int resyntax, bool no_sub)
 {
+  /* buf is guaranteed to start life 0-initialized, which works in the
+     below algorithm.
+
+     FIXME - this method is not reentrant, since re_compile_pattern
+     mallocs memory, depends on the global variable re_syntax_options
+     for its syntax (but at least the compiled regex remembers its
+     syntax even if the global variable changes later), and since we
+     use a static variable.  To be reentrant, we would need a mutex in
+     this method, and we should have a way to free the memory used by
+     buf when this module is unloaded.  */
+  
   static m4_pattern_buffer buf;        /* compiled regular expression */
-  static bool buf_initialized = false;
   const char *msg;             /* error message from re_compile_pattern */
 
-  if (!buf_initialized)
-    {
-      buf_initialized  = true;
-      buf.pat.buffer   = NULL;
-      buf.pat.allocated        = 0;
-      buf.pat.fastmap  = NULL;
-      buf.pat.translate        = NULL;
-    }
-
   re_set_syntax (resyntax);
+  regfree (&buf.pat);
+  buf.pat.no_sub = no_sub;
   msg = re_compile_pattern (regexp, strlen (regexp), &buf.pat);
 
   if (msg != NULL)
@@ -143,6 +139,8 @@ m4_regexp_compile (m4 *context, const ch
       return NULL;
     }
 
+  re_set_registers (&buf.pat, &buf.regs, buf.regs.num_regs, buf.regs.start,
+                   buf.regs.end);
   return &buf;
 }
 
@@ -164,10 +162,10 @@ m4_regexp_search (m4_pattern_buffer *buf
    substituted by the text matched by the Nth parenthesized sub-expression.  */
 
 static void
-substitute (m4 *context, m4_obstack *obs, const char *victim,
-           const char *repl, m4_pattern_buffer *buf)
+substitute (m4 *context, m4_obstack *obs, const char *caller,
+           const char *victim, const char *repl, m4_pattern_buffer *buf)
 {
-  register unsigned int ch;
+  unsigned int ch;
 
   for (;;)
     {
@@ -188,11 +186,21 @@ substitute (m4 *context, m4_obstack *obs
        case '1': case '2': case '3': case '4': case '5': case '6':
        case '7': case '8': case '9':
          ch -= '0';
-         if (buf->regs.end[ch] > 0)
+         if (buf->pat.re_nsub < ch)
+           m4_warn (context, 0,
+                    _("Warning: %s: sub-expression %d not present"),
+                    caller, ch);
+         else if (buf->regs.end[ch] > 0)
            obstack_grow (obs, victim + buf->regs.start[ch],
                          buf->regs.end[ch] - buf->regs.start[ch]);
          break;
 
+       case '\0':
+         m4_warn (context, 0,
+                  _("Warning: %s: trailing \\ ignored in replacement"),
+                  caller);
+         return;
+
        default:
          obstack_1grow (obs, ch);
          break;
@@ -243,7 +251,7 @@ m4_regexp_substitute (m4 *context, m4_ob
 
       /* Handle the part of the string that was covered by the match.  */
 
-      substitute (context, obs, victim, replace, buf);
+      substitute (context, obs, caller, victim, replace, buf);
 
       /* Update the offset to the end of the match.  If the regexp
         matched a null string, advance offset one more, to avoid
@@ -526,7 +534,7 @@ M4BUILTIN_HANDLER (patsubst)
        return;
     }
 
-  buf = m4_regexp_compile (context, me, M4ARG (2), resyntax);
+  buf = m4_regexp_compile (context, me, M4ARG (2), resyntax, false);
   if (!buf)
     return;
 
@@ -583,14 +591,14 @@ M4BUILTIN_HANDLER (regexp)
     else
       regexp(VICTIM, REGEXP)  */
 
-  buf = m4_regexp_compile (context, me, M4ARG (2), resyntax);
+  buf = m4_regexp_compile (context, me, M4ARG (2), resyntax, argc == 3);
   if (!buf)
     return;
 
   length = strlen (M4ARG (1));
   startpos = m4_regexp_search (buf, M4ARG (1), length, 0, length);
 
-  if (startpos  == -2)
+  if (startpos == -2)
     {
       m4_error (context, 0, 0, _("%s: error matching regular expression `%s'"),
                me, M4ARG (2));
@@ -600,7 +608,7 @@ M4BUILTIN_HANDLER (regexp)
   if ((argc == 3) || (replace == NULL))
     m4_shipout_int (obs, startpos);
   else if (startpos >= 0)
-    substitute (context, obs, M4ARG (1), replace, buf);
+    substitute (context, obs, me, M4ARG (1), replace, buf);
 
   return;
 }
@@ -642,7 +650,7 @@ M4BUILTIN_HANDLER (renamesyms)
            return;
        }
 
-      buf = m4_regexp_compile (context, me, regexp, resyntax);
+      buf = m4_regexp_compile (context, me, regexp, resyntax, false);
       if (!buf)
        return;
 
@@ -654,7 +662,7 @@ M4BUILTIN_HANDLER (renamesyms)
 
       for (; data.size > 0; --data.size, data.base++)
        {
-         const char *  name    = data.base[0];
+         const char *name = data.base[0];
 
          if (m4_regexp_substitute (context, &rename_obs, me, name, regexp,
                                    buf, replace, true))






reply via email to

[Prev in Thread] Current Thread [Next in Thread]