m4-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] GNU M4 source repository branch, branch-1.6, updated. v1.5.89a-46-


From: Eric Blake
Subject: [SCM] GNU M4 source repository branch, branch-1.6, updated. v1.5.89a-46-ga3a7734
Date: Mon, 04 Aug 2008 04:41:55 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU M4 source repository".

http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=a3a7734d1beabbb438656461076258f5ff32c08b

The branch, branch-1.6 has been updated
       via  a3a7734d1beabbb438656461076258f5ff32c08b (commit)
      from  0d6fb01e76bc35550a00cbf7710d1471db9e7b00 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit a3a7734d1beabbb438656461076258f5ff32c08b
Author: Eric Blake <address@hidden>
Date:   Mon Jan 14 17:25:13 2008 -0700

    Stage 26: Allow embedded NUL in macro definitions.
    
    * src/m4.h (set_word_regexp, arg_len, define_user_macro): Add
    parameters.
    (SYMBOL_TEXT_LEN): New macro.
    (ARG_LEN): Adjust callers.
    * src/builtin.c (define_user_macro): Add a parameter.
    (builtin_init, define_macro): Adjust callers.
    (m4_dumpdef, m4_defn, m4_changeword): Handle embedded NULs.
    (expand_user_macro): Handle embedded NUL, and speed up search for
    embedded $.
    * src/macro.c (arg_len): Add parameter.
    * src/input.c (set_word_regexp): Add parameter.
    (input_init): Adjust caller.
    * src/m4.c (main): Likewise.
    * src/freeze.c (dump_symbol_CB): Preserve NUL on freeze.
    (reload_frozen_state): Retrieve NUL on load.
    * doc/m4.texinfo (Builtin, Using frozen files): Enhance tests.
    * examples/null.m4: Likewise.
    * examples/null.out: Update expected output.
    * examples/null.err: Likewise.
    
    (cherry picked from commit cb26d7cb8b438224908d53df59b1d394ba1928f8)
    
    Signed-off-by: Eric Blake <address@hidden>

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog         |   26 ++++++++++++++++++++
 doc/m4.texinfo    |   13 ++++++++--
 examples/null.err |  Bin 572 -> 713 bytes
 examples/null.m4  |  Bin 6189 -> 6499 bytes
 examples/null.out |  Bin 468 -> 510 bytes
 src/builtin.c     |   67 +++++++++++++++++++++++++++++++---------------------
 src/freeze.c      |    6 ++--
 src/input.c       |   28 +++++++++++++--------
 src/m4.c          |    2 +-
 src/m4.h          |   10 ++++---
 src/macro.c       |   25 +++++++------------
 11 files changed, 112 insertions(+), 65 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 325bf7a..7a50b85 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,31 @@
 2008-08-03  Eric Blake  <address@hidden>
 
+       Stage 26: Allow embedded NUL in macro definitions.
+       Track macro definitions by length, to allow embedded NUL.  Make
+       arg_len callers aware of the issue of flattening builtins when
+       determining length.  Optimize loops that scan a definition.
+       Memory impact: none.
+       Speed impact: slight improvement, due to faster scans.
+       * src/m4.h (set_word_regexp, arg_len, define_user_macro): Add
+       parameters.
+       (SYMBOL_TEXT_LEN): New macro.
+       (ARG_LEN): Adjust callers.
+       * src/builtin.c (define_user_macro): Add a parameter.
+       (builtin_init, define_macro): Adjust callers.
+       (m4_dumpdef, m4_defn, m4_changeword): Handle embedded NULs.
+       (expand_user_macro): Handle embedded NUL, and speed up search for
+       embedded $.
+       * src/macro.c (arg_len): Add parameter.
+       * src/input.c (set_word_regexp): Add parameter.
+       (input_init): Adjust caller.
+       * src/m4.c (main): Likewise.
+       * src/freeze.c (dump_symbol_CB): Preserve NUL on freeze.
+       (reload_frozen_state): Retrieve NUL on load.
+       * doc/m4.texinfo (Builtin, Using frozen files): Enhance tests.
+       * examples/null.m4: Likewise.
+       * examples/null.out: Update expected output.
+       * examples/null.err: Likewise.
+
        Fix regression in commenting unbalanced quotes, from 2008-02-16.
        * src/m4.h (enum token_type): Add TOKEN_COMMENT.
        * src/input.c (next_token, peek_token, token_type_string)
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index d8e2625..7f3cb49 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -2684,6 +2684,13 @@ builtin(`builtin')
 builtin(`builtin',)
 @error{}m4:stdin:4: Warning: builtin: undefined builtin `'
 @result{}
+builtin(`builtin', ``'
+')
address@hidden:stdin:5: Warning: builtin: undefined builtin ``\'\n'
address@hidden
+indir(`index')
address@hidden:stdin:7: Warning: index: too few arguments: 0 < 2
address@hidden
 @end example
 
 @ignore
@@ -7153,13 +7160,13 @@ ifdef(`__unix__', ,
       `errprint(` skipping: syscmd does not have unix semantics
 ')m4exit(`77')')dnl
 changequote(`[', `]')dnl
-syscmd([printf 'define(-\0-,hi)changequote([,\0])changecom(--\0)dnl
+syscmd([printf 'define(-\0-,\0-\0)changequote([,\0])changecom(--\0)dnl
 divert(1)undivert(null.out)' | ]__program__[ -F in.m4f \
-     && printf 'errprint([divnum\0] #-- indir(-\0-))' \
+     && printf 'errprint([divnum\0] #-- len(indir(-\0-)))' \
        | ]__program__[ -R in.m4f \
      && rm in.m4f])errprint([ ]sysval[
 ])dnl
address@hidden #-- hi 0
address@hidden #-- 3 0
 @end example
 @end ignore
 
diff --git a/examples/null.err b/examples/null.err
index 5f989ee..897ce34 100644
Binary files a/examples/null.err and b/examples/null.err differ
diff --git a/examples/null.m4 b/examples/null.m4
index de76742..1823073 100644
Binary files a/examples/null.m4 and b/examples/null.m4 differ
diff --git a/examples/null.out b/examples/null.out
index 5e90221..dd83416 100644
Binary files a/examples/null.out and b/examples/null.out differ
diff --git a/src/builtin.c b/src/builtin.c
index f8a3f3c..cc21ea2 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -429,26 +429,32 @@ free_regex (void)
       }
 }
 
-/*-----------------------------------------------------------------.
-| Define a predefined or user-defined macro, with name NAME of     |
-| length NAME_LEN, and expansion TEXT.  MODE is SYMBOL_INSERT for  |
-| "define" or SYMBOL_PUSHDEF for "pushdef".  This function is also |
-| used from main ().                                               |
-`-----------------------------------------------------------------*/
+/*------------------------------------------------------------------.
+| Define a predefined or user-defined macro, with name NAME of      |
+| length NAME_LEN, and expansion TEXT of length LEN.  LEN may be    |
+| SIZE_MAX, to use the string length of TEXT instead.  MODE is      |
+| SYMBOL_INSERT for "define" or SYMBOL_PUSHDEF for "pushdef".  This |
+| function is also used from main ().                               |
+`------------------------------------------------------------------*/
 
 void
 define_user_macro (const char *name, size_t name_len, const char *text,
-                  symbol_lookup mode)
+                  size_t len, symbol_lookup mode)
 {
   symbol *s;
-  char *defn = xstrdup (text ? text : "");
+  char *defn;
 
+  assert (text);
+  if (len == SIZE_MAX)
+    len = strlen (text);
+  defn = xmemdup (text, len);
   s = lookup_symbol (name, name_len, mode);
   if (SYMBOL_TYPE (s) == TOKEN_TEXT)
     free (SYMBOL_TEXT (s));
 
   SYMBOL_TYPE (s) = TOKEN_TEXT;
   SYMBOL_TEXT (s) = defn;
+  SYMBOL_TEXT_LEN (s) = len;
   SYMBOL_MACRO_ARGS (s) = true;
 
   /* Implement --warn-macro-sequence.  */
@@ -456,7 +462,6 @@ define_user_macro (const char *name, size_t name_len, const 
char *text,
     {
       regoff_t offset = 0;
       struct re_registers *regs = &macro_sequence_regs;
-      size_t len = strlen (defn);
 
       while (offset < len
             && (offset = re_search (&macro_sequence_buf, defn, len, offset,
@@ -515,13 +520,13 @@ builtin_init (void)
       {
        if (pp->unix_name != NULL)
          define_user_macro (pp->unix_name, strlen (pp->unix_name),
-                            pp->func, SYMBOL_INSERT);
+                            pp->func, SIZE_MAX, SYMBOL_INSERT);
       }
     else
       {
        if (pp->gnu_name != NULL)
          define_user_macro (pp->gnu_name, strlen (pp->gnu_name),
-                            pp->func, SYMBOL_INSERT);
+                            pp->func, SIZE_MAX, SYMBOL_INSERT);
       }
 }
 
@@ -675,7 +680,7 @@ define_macro (int argc, macro_arguments *argv, 
symbol_lookup mode)
 
   if (argc == 2)
     {
-      define_user_macro (ARG (1), ARG_LEN (1), "", mode);
+      define_user_macro (ARG (1), ARG_LEN (1), "", 0, mode);
       return;
     }
 
@@ -685,7 +690,8 @@ define_macro (int argc, macro_arguments *argv, 
symbol_lookup mode)
       m4_warn (0, me, _("cannot concatenate builtins"));
       /* fallthru */
     case TOKEN_TEXT:
-      define_user_macro (ARG (1), ARG_LEN (1), arg_text (argv, 2, true), mode);
+      define_user_macro (ARG (1), ARG_LEN (1), arg_text (argv, 2, true),
+                        arg_len (argv, 2, true), mode);
       break;
 
     case TOKEN_FUNC:
@@ -914,7 +920,8 @@ m4_dumpdef (struct obstack *obs, int argc, macro_arguments 
*argv)
        case TOKEN_TEXT:
          if (debug_level & DEBUG_TRACE_QUOTE)
            fwrite (curr_quote.str1, 1, curr_quote.len1, debug);
-         fputs (SYMBOL_TEXT (data.base[0]), debug);
+         fwrite (SYMBOL_TEXT (data.base[0]), 1,
+                 SYMBOL_TEXT_LEN (data.base[0]), debug);
          if (debug_level & DEBUG_TRACE_QUOTE)
            fwrite (curr_quote.str2, 1, curr_quote.len2, debug);
          break;
@@ -1049,7 +1056,7 @@ m4_defn (struct obstack *obs, int argc, macro_arguments 
*argv)
        {
        case TOKEN_TEXT:
          obstack_grow (obs, curr_quote.str1, curr_quote.len1);
-         obstack_grow (obs, SYMBOL_TEXT (s), strlen (SYMBOL_TEXT (s)));
+         obstack_grow (obs, SYMBOL_TEXT (s), SYMBOL_TEXT_LEN (s));
          obstack_grow (obs, curr_quote.str2, curr_quote.len2);
          break;
 
@@ -1422,7 +1429,7 @@ m4_changeword (struct obstack *obs, int argc, 
macro_arguments *argv)
 
   if (bad_argc (me, argc, 1, 1))
     return;
-  set_word_regexp (me, ARG (1));
+  set_word_regexp (me, ARG (1), ARG_LEN (1));
 }
 
 #endif /* ENABLE_CHANGEWORD */
@@ -2305,29 +2312,31 @@ void
 expand_user_macro (struct obstack *obs, symbol *sym,
                   int argc, macro_arguments *argv)
 {
-  const char *text;
+  const char *text = SYMBOL_TEXT (sym);
+  size_t len = SYMBOL_TEXT_LEN (sym);
   int i;
+  const char *dollar = memchr (text, '$', len);
 
-  for (text = SYMBOL_TEXT (sym); *text != '\0';)
+  while (dollar)
     {
-      if (*text != '$')
-       {
-         obstack_1grow (obs, *text);
-         text++;
-         continue;
-       }
-      text++;
-      switch (*text)
+      obstack_grow (obs, text, dollar - text);
+      len -= dollar - text;
+      text = dollar;
+      if (len == 1)
+       break;
+      len--;
+      switch (*++text)
        {
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
          if (no_gnu_extensions)
            {
              i = *text++ - '0';
+             len--;
            }
          else
            {
-             for (i = 0; isdigit (to_uchar (*text)); text++)
+             for (i = 0; len && isdigit (to_uchar (*text)); text++, len--)
                i = i * 10 + (*text - '0');
            }
          push_arg (obs, argv, i);
@@ -2336,17 +2345,21 @@ expand_user_macro (struct obstack *obs, symbol *sym,
        case '#':               /* number of arguments */
          shipout_int (obs, argc - 1);
          text++;
+         len--;
          break;
 
        case '*':               /* all arguments */
        case '@':               /* ... same, but quoted */
          push_args (obs, argv, false, *text == '@');
          text++;
+         len--;
          break;
 
        default:
          obstack_1grow (obs, '$');
          break;
        }
+      dollar = memchr (text, '$', len);
     }
+  obstack_grow (obs, text, len);
 }
diff --git a/src/freeze.c b/src/freeze.c
index 2a7d9dc..c45722f 100644
--- a/src/freeze.c
+++ b/src/freeze.c
@@ -75,9 +75,9 @@ dump_symbol_CB (symbol *sym, void *f)
        case TOKEN_TEXT:
          xfprintf (file, "T%d,%d\n",
                    (int) SYMBOL_NAME_LEN (sym),
-                   (int) strlen (SYMBOL_TEXT (sym)));
+                   (int) SYMBOL_TEXT_LEN (sym));
          fwrite (SYMBOL_NAME (sym), 1, SYMBOL_NAME_LEN (sym), file);
-         fputs (SYMBOL_TEXT (sym), file);
+         fwrite (SYMBOL_TEXT (sym), 1, SYMBOL_TEXT_LEN (sym), file);
          fputc ('\n', file);
          break;
 
@@ -379,7 +379,7 @@ reload_frozen_state (const char *name)
 
              /* Enter a macro having an expansion text as a definition.  */
 
-             define_user_macro (string[0], number[0], string[1],
+             define_user_macro (string[0], number[0], string[1], number[1],
                                 SYMBOL_PUSHDEF);
              break;
 
diff --git a/src/input.c b/src/input.c
index 4f969b7..b967087 100644
--- a/src/input.c
+++ b/src/input.c
@@ -1309,7 +1309,7 @@ input_init (void)
   curr_comm.len2 = 1;
 
 #ifdef ENABLE_CHANGEWORD
-  set_word_regexp (NULL, user_word_regexp);
+  set_word_regexp (NULL, user_word_regexp, SIZE_MAX);
 #endif /* ENABLE_CHANGEWORD */
 
   set_quote_age ();
@@ -1406,19 +1406,24 @@ set_comment (const char *bc, size_t bc_len, const char 
*ec, size_t ec_len)
 
 #ifdef ENABLE_CHANGEWORD
 
-/*-------------------------------------------------------------------.
-| Set the regular expression for recognizing words to REGEXP, and    |
-| report errors on behalf of CALLER.  If REGEXP is NULL, revert back |
-| to the default parsing rules.                                      |
-`-------------------------------------------------------------------*/
+/*-----------------------------------------------------------------.
+| Set the regular expression for recognizing words to REGEXP of    |
+| length LEN, and report errors on behalf of CALLER.  If REGEXP is |
+| NULL, revert back to the default parsing rules.  If LEN is       |
+| SIZE_MAX, use strlen(REGEXP) instead.                            |
+`-----------------------------------------------------------------*/
 
 void
-set_word_regexp (const call_info *caller, const char *regexp)
+set_word_regexp (const call_info *caller, const char *regexp, size_t len)
 {
   const char *msg;
   struct re_pattern_buffer new_word_regexp;
 
-  if (!*regexp || !strcmp (regexp, DEFAULT_WORD_REGEXP))
+  if (len == SIZE_MAX)
+    len = strlen (regexp);
+  if (len == 0
+      || (len == strlen (DEFAULT_WORD_REGEXP)
+         && !memcmp (regexp, DEFAULT_WORD_REGEXP, len)))
     {
       default_word_regexp = true;
       set_quote_age ();
@@ -1427,12 +1432,13 @@ set_word_regexp (const call_info *caller, const char 
*regexp)
 
   /* Dry run to see whether the new expression is compilable.  */
   init_pattern_buffer (&new_word_regexp, NULL);
-  msg = re_compile_pattern (regexp, strlen (regexp), &new_word_regexp);
+  msg = re_compile_pattern (regexp, len, &new_word_regexp);
   regfree (&new_word_regexp);
 
   if (msg != NULL)
     {
-      m4_warn (0, caller, _("bad regular expression `%s': %s"), regexp, msg);
+      m4_warn (0, caller, _("bad regular expression %s: %s"),
+              quotearg_style_mem (locale_quoting_style, regexp, len), msg);
       return;
     }
 
@@ -1442,7 +1448,7 @@ set_word_regexp (const call_info *caller, const char 
*regexp)
      by the final regfree.  */
   if (!word_regexp.fastmap)
     word_regexp.fastmap = xcharalloc (UCHAR_MAX + 1);
-  msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
+  msg = re_compile_pattern (regexp, len, &word_regexp);
   assert (!msg);
   re_set_registers (&word_regexp, &regs, regs.num_regs, regs.start, regs.end);
   if (re_compile_fastmap (&word_regexp))
diff --git a/src/m4.c b/src/m4.c
index 551d80c..1bb1ec7 100644
--- a/src/m4.c
+++ b/src/m4.c
@@ -623,7 +623,7 @@ main (int argc, char *const *argv, char *const *envp)
            const char *value = strchr (defines->arg, '=');
            size_t len = value ? value - defines->arg : strlen (defines->arg);
            define_user_macro (defines->arg, len, value ? value + 1 : "",
-                              SYMBOL_INSERT);
+                              value ? SIZE_MAX : 0, SYMBOL_INSERT);
          }
          break;
 
diff --git a/src/m4.h b/src/m4.h
index 40aa5ec..8da7d3c 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -381,7 +381,7 @@ extern string_pair curr_quote;
 void set_quotes (const char *, size_t, const char *, size_t);
 void set_comment (const char *, size_t, const char *, size_t);
 #ifdef ENABLE_CHANGEWORD
-void set_word_regexp (const call_info *, const char *);
+void set_word_regexp (const call_info *, const char *, size_t);
 #endif
 unsigned int quote_age (void);
 bool safe_quotes (void);
@@ -438,6 +438,7 @@ struct symbol
 #define SYMBOL_NAME_LEN(S)     ((S)->len)
 #define SYMBOL_TYPE(S)         (TOKEN_DATA_TYPE (&(S)->data))
 #define SYMBOL_TEXT(S)         (TOKEN_DATA_TEXT (&(S)->data))
+#define SYMBOL_TEXT_LEN(S)     (TOKEN_DATA_LEN (&(S)->data))
 #define SYMBOL_FUNC(S)         (TOKEN_DATA_FUNC (&(S)->data))
 
 typedef enum symbol_lookup symbol_lookup;
@@ -467,7 +468,7 @@ token_data_type arg_type (macro_arguments *, unsigned int);
 const char *arg_text (macro_arguments *, unsigned int, bool);
 bool arg_equal (macro_arguments *, unsigned int, unsigned int);
 bool arg_empty (macro_arguments *, unsigned int);
-size_t arg_len (macro_arguments *, unsigned int);
+size_t arg_len (macro_arguments *, unsigned int, bool);
 builtin_func *arg_func (macro_arguments *, unsigned int);
 struct obstack *arg_scratch (void);
 bool arg_print (struct obstack *, macro_arguments *, unsigned int,
@@ -487,7 +488,7 @@ void wrap_args (macro_arguments *);
 
 /* Grab the text length at argv index I.  Assumes macro_argument *argv
    is in scope, and aborts if the argument is not text.  */
-#define ARG_LEN(i) arg_len (argv, i)
+#define ARG_LEN(i) arg_len (argv, i, false)
 
 
 /* File: builtin.c  --- builtins.  */
@@ -523,7 +524,8 @@ bool bad_argc (const call_info *, int, unsigned int, 
unsigned int);
 void define_builtin (const char *, size_t, const builtin *, symbol_lookup);
 void set_macro_sequence (const char *);
 void free_regex (void);
-void define_user_macro (const char *, size_t, const char *, symbol_lookup);
+void define_user_macro (const char *, size_t, const char *, size_t,
+                       symbol_lookup);
 void undivert_all (void);
 void expand_user_macro (struct obstack *, symbol *, int, macro_arguments *);
 void m4_placeholder (struct obstack *, int, macro_arguments *);
diff --git a/src/macro.c b/src/macro.c
index 9d8ffbb..d1f70e9 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -1128,9 +1128,10 @@ arg_empty (macro_arguments *argv, unsigned int arg)
 }
 
 /* Given ARGV, return the length of argument ARG.  Abort if the
-   argument is not text.  Indices beyond argc return 0.  */
+   argument is not text.  Indices beyond argc return 0.  If FLATTEN,
+   builtins are ignored.  */
 size_t
-arg_len (macro_arguments *argv, unsigned int arg)
+arg_len (macro_arguments *argv, unsigned int arg, bool flatten)
 {
   token_data *token;
   token_chain *chain;
@@ -1143,7 +1144,7 @@ arg_len (macro_arguments *argv, unsigned int arg)
     }
   if (arg >= argv->argc)
     return 0;
-  token = arg_token (argv, arg, NULL, false);
+  token = arg_token (argv, arg, NULL, flatten);
   switch (TOKEN_DATA_TYPE (token))
     {
     case TOKEN_TEXT:
@@ -1163,9 +1164,8 @@ arg_len (macro_arguments *argv, unsigned int arg)
              len += chain->u.u_s.len;
              break;
            case CHAIN_FUNC:
-             /* TODO concatenate builtins.  */
-             assert (!"implemented");
-             abort ();
+             assert (flatten);
+             break;
            case CHAIN_ARGV:
              i = chain->u.u_a.index;
              limit = chain->u.u_a.argv->argc - i - chain->u.u_a.skip_last;
@@ -1176,15 +1176,8 @@ arg_len (macro_arguments *argv, unsigned int arg)
                len += (quotes->len1 + quotes->len2) * limit;
              len += limit - 1;
              while (limit--)
-               {
-                 /* TODO handle builtin concatenation.  */
-                 if (TOKEN_DATA_TYPE (arg_token (chain->u.u_a.argv, i, NULL,
-                                                 false)) == TOKEN_FUNC)
-                   assert (argv->flatten);
-                 else
-                   len += arg_len (chain->u.u_a.argv, i);
-                 i++;
-               }
+               len += arg_len (chain->u.u_a.argv, i++,
+                               flatten || chain->u.u_a.flatten);
              break;
            default:
              assert (!"arg_len");
@@ -1192,7 +1185,7 @@ arg_len (macro_arguments *argv, unsigned int arg)
            }
          chain = chain->next;
        }
-      assert (len);
+      assert (len || flatten);
       return len;
     case TOKEN_FUNC:
     default:


hooks/post-receive
--
GNU M4 source repository




reply via email to

[Prev in Thread] Current Thread [Next in Thread]