m4-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] GNU M4 source repository branch, branch-1_4, updated. branch-cvs-r


From: Eric Blake
Subject: [SCM] GNU M4 source repository branch, branch-1_4, updated. branch-cvs-readonly-40-gc2c0a7d
Date: Tue, 22 Jan 2008 20:39:45 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU M4 source repository".

http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=c2c0a7ddc9f559d66a17184ea8be2c363dd4807c

The branch, branch-1_4 has been updated
       via  c2c0a7ddc9f559d66a17184ea8be2c363dd4807c (commit)
      from  5d61bd60454bca489dc2f4eb4ee0d9eba4f1f425 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit c2c0a7ddc9f559d66a17184ea8be2c363dd4807c
Author: Eric Blake <address@hidden>
Date:   Sat Oct 27 05:44:09 2007 -0600

    Stage 11: full circle for single argument references.
    
    Pass quoted strings through to argument collection in a single
    action, so that an argument can be reused throughout macro
    recursion if it remains unchanged.
    Memory impact: noticeable improvement, due to more reuse in
    argument collection stacks.
    Speed impact: noticeable improvement, due to less copying.
    * src/m4.h (struct token_chain): Add quote_age member.
    (struct token_data): Add end member to chain alternate.
    (make_text_link): New prototype.
    * src/input.c (CHAR_QUOTE): New macro.
    (word_start): Pre-allocate.
    (set_word_regexp): Simplify.
    (make_text_link): Export, and handle new fields.
    (next_char, next_char_1): Add parameter.
    (append_quote_token): New function.
    (match_input, next_token): Adjust callers to handle quoted input
    blocks.
    * src/macro.c (struct macro_arguments): Add wrapper member.
    (expand_argument): Accept composite blocks from input engine.
    (expand_macro): Reduce refcounts of composite arguments.
    (collect_arguments, arg_token, arg_mark, make_argv_ref): Update to
    use new fields.
    (arg_type, arg_text, arg_equal, arg_len): Treat composite
    arguments as text.
    (push_arg, push_args): Handle composites.
    
    (cherry picked from commit b1fef201f5d121e25e5dd61ec8ca3eac41a899ba)
    
    Signed-off-by: Eric Blake <address@hidden>

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog   |   29 ++++++++
 src/input.c |  207 +++++++++++++++++++++++++++++++++--------------------
 src/m4.h    |   25 ++++---
 src/macro.c |  233 +++++++++++++++++++++++++++++++++++++++++++++++++++--------
 4 files changed, 376 insertions(+), 118 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 5ad26e3..15549a6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,32 @@
+2008-01-22  Eric Blake  <address@hidden>
+
+       Stage 11: full circle for single argument references.
+       Pass quoted strings through to argument collection in a single
+       action, so that an argument can be reused throughout macro
+       recursion if it remains unchanged.
+       Memory impact: noticeable improvement, due to more reuse in
+       argument collection stacks.
+       Speed impact: noticeable improvement, due to less copying.
+       * src/m4.h (struct token_chain): Add quote_age member.
+       (struct token_data): Add end member to chain alternate.
+       (make_text_link): New prototype.
+       * src/input.c (CHAR_QUOTE): New macro.
+       (word_start): Pre-allocate.
+       (set_word_regexp): Simplify.
+       (make_text_link): Export, and handle new fields.
+       (next_char, next_char_1): Add parameter.
+       (append_quote_token): New function.
+       (match_input, next_token): Adjust callers to handle quoted input
+       blocks.
+       * src/macro.c (struct macro_arguments): Add wrapper member.
+       (expand_argument): Accept composite blocks from input engine.
+       (expand_macro): Reduce refcounts of composite arguments.
+       (collect_arguments, arg_token, arg_mark, make_argv_ref): Update to
+       use new fields.
+       (arg_type, arg_text, arg_equal, arg_len): Treat composite
+       arguments as text.
+       (push_arg, push_args): Handle composites.
+
 2008-01-17  Eric Blake  <address@hidden>
 
        Stage 10: avoid extra copying of strings and comments.
diff --git a/src/input.c b/src/input.c
index bc73c6f..9f25e8f 100644
--- a/src/input.c
+++ b/src/input.c
@@ -153,6 +153,7 @@ static bool input_change;
 
 #define CHAR_EOF       256     /* Character return on EOF.  */
 #define CHAR_MACRO     257     /* Character return for MACRO token.  */
+#define CHAR_QUOTE     258     /* Character return for quoted string.  */
 
 /* Quote chars.  */
 STRING rquote;
@@ -167,7 +168,7 @@ STRING ecomm;
 # define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
 
 /* Table of characters that can start a word.  */
-static char *word_start;
+static char word_start[256];
 
 /* Current regular expression for detecting words.  */
 static struct re_pattern_buffer word_regexp;
@@ -201,7 +202,7 @@ static const char *token_type_string (token_type);
 | chain that starts at *START and ends at *END.  START may be NULL   |
 | if *END is non-NULL.                                               |
 `-------------------------------------------------------------------*/
-static void
+void
 make_text_link (struct obstack *obs, token_chain **start, token_chain **end)
 {
   token_chain *chain;
@@ -218,6 +219,7 @@ make_text_link (struct obstack *obs, token_chain **start, 
token_chain **end)
        *start = chain;
       *end = chain;
       chain->next = NULL;
+      chain->quote_age = 0;
       chain->str = str;
       chain->len = len;
       chain->level = -1;
@@ -361,6 +363,7 @@ push_token (token_data *token, int level)
     next->u.u_c.chain = chain;
   next->u.u_c.end = chain;
   chain->next = NULL;
+  chain->quote_age = TOKEN_DATA_QUOTE_AGE (token);
   chain->str = TOKEN_DATA_TEXT (token);
   chain->len = TOKEN_DATA_LEN (token);
   chain->level = level;
@@ -563,19 +566,6 @@ pop_wrapup (void)
   return true;
 }
 
-/*-------------------------------------------------------------------.
-| When a MACRO token is seen, next_token () uses init_macro_token () |
-| to retrieve the value of the function pointer and store it in TD.  |
-`-------------------------------------------------------------------*/
-
-static void
-init_macro_token (token_data *td)
-{
-  assert (isp->type == INPUT_MACRO);
-  TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
-  TOKEN_DATA_FUNC (td) = isp->u.func;
-}
-
 /*--------------------------------------------------------------.
 | Dump a representation of INPUT to the obstack OBS, for use in |
 | tracing.                                                      |
@@ -699,16 +689,19 @@ peek_input (void)
 | consisting of a newline alone is taken as belonging to the line it |
 | ends, and the current line number is not incremented until the     |
 | next character is read.  99.9% of all calls will read from a       |
-| string, so factor that out into a macro for speed.                 |
+| string, so factor that out into a macro for speed.  If             |
+| ALLOW_QUOTE, and the current input matches the current quote age,  |
+| return CHAR_QUOTE and leave consumption of data for                |
+| append_quote_token.                                                |
 `-------------------------------------------------------------------*/
 
-#define next_char()                                                    \
+#define next_char(AQ)                                                  \
   (isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \
    ? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++))                  \
-   : next_char_1 ())
+   : next_char_1 (AQ))
 
 static int
-next_char_1 (void)
+next_char_1 (bool allow_quote)
 {
   int ch;
   token_chain *chain;
@@ -765,10 +758,14 @@ next_char_1 (void)
          chain = isp->u.u_c.chain;
          while (chain)
            {
+             if (allow_quote && chain->quote_age == current_quote_age)
+               return CHAR_QUOTE;
              if (chain->str)
                {
                  if (chain->len)
                    {
+                     /* Partial consumption invalidates quote age.  */
+                     chain->quote_age = 0;
                      chain->len--;
                      return to_uchar (*chain->str++);
                    }
@@ -808,7 +805,7 @@ skip_line (const char *name)
   const char *file = current_file;
   int line = current_line;
 
-  while ((ch = next_char ()) != CHAR_EOF && ch != '\n')
+  while ((ch = next_char (false)) != CHAR_EOF && ch != '\n')
     ;
   if (ch == CHAR_EOF)
     /* current_file changed to "" if we see CHAR_EOF, use the
@@ -825,6 +822,49 @@ skip_line (const char *name)
 }
 
 
+/*-------------------------------------------------------------------.
+| When a MACRO token is seen, next_token () uses init_macro_token () |
+| to retrieve the value of the function pointer and store it in TD.  |
+`-------------------------------------------------------------------*/
+
+static void
+init_macro_token (token_data *td)
+{
+  assert (isp->type == INPUT_MACRO);
+  TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
+  TOKEN_DATA_FUNC (td) = isp->u.func;
+}
+
+/*-------------------------------------------------------------------.
+| When a QUOTE token is seen, convert TD to a composite (if it is    |
+| not one already), consisting of any unfinished text on OBS, as     |
+| well as the quoted token from the top of the input stack.  Use OBS |
+| for any additional allocations needed to store the token chain.    |
+`-------------------------------------------------------------------*/
+static void
+append_quote_token (struct obstack *obs, token_data *td)
+{
+  token_chain *src_chain = isp->u.u_c.chain;
+  token_chain *chain;
+  assert (isp->type == INPUT_CHAIN && obs && current_quote_age);
+
+  if (TOKEN_DATA_TYPE (td) == TOKEN_VOID)
+    {
+      TOKEN_DATA_TYPE (td) = TOKEN_COMP;
+      td->u.u_c.chain = td->u.u_c.end = NULL;
+    }
+  assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP);
+  make_text_link (obs, &td->u.u_c.chain, &td->u.u_c.end);
+  chain = (token_chain *) obstack_copy (obs, src_chain, sizeof *chain);
+  if (td->u.u_c.end)
+    td->u.u_c.end->next = chain;
+  else
+    td->u.u_c.chain = chain;
+  td->u.u_c.end = chain;
+  td->u.u_c.end->next = NULL;
+  isp->u.u_c.chain = src_chain->next;
+}
+
 /*------------------------------------------------------------------.
 | This function is for matching a string against a prefix of the    |
 | input stream.  If the string S matches the input and CONSUME is   |
@@ -848,14 +888,14 @@ match_input (const char *s, bool consume)
   if (s[1] == '\0')
     {
       if (consume)
-       (void) next_char ();
+       next_char (false);
       return true;                     /* short match */
     }
 
-  (void) next_char ();
+  next_char (false);
   for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); )
     {
-      (void) next_char ();
+      next_char (false);
       n++;
       if (*s == '\0')          /* long match */
        {
@@ -1016,7 +1056,6 @@ void
 set_word_regexp (const char *caller, const char *regexp)
 {
   int i;
-  char test[2];
   const char *msg;
   struct re_pattern_buffer new_word_regexp;
 
@@ -1048,15 +1087,10 @@ set_word_regexp (const char *caller, const char *regexp)
   default_word_regexp = false;
   set_quote_age ();
 
-  if (word_start == NULL)
-    word_start = (char *) xmalloc (256);
-
-  word_start[0] = '\0';
-  test[1] = '\0';
   for (i = 1; i < 256; i++)
     {
-      test[0] = i;
-      word_start[i] = re_search (&word_regexp, test, 1, 0, 0, NULL) >= 0;
+      char test = i;
+      word_start[i] = re_match (&word_regexp, &test, 1, 0, NULL) > 0;
     }
 }
 
@@ -1140,16 +1174,17 @@ safe_quotes (void)
 
 
 /*--------------------------------------------------------------------.
-| Parse and return a single token from the input stream.  A token     |
-| can either be TOKEN_EOF, if the input_stack is empty; it can be     |
-| TOKEN_STRING for a quoted string or comment; TOKEN_WORD for         |
-| something that is a potential macro name; and TOKEN_SIMPLE for any  |
-| single character that is not a part of any of the previous types.   |
-| If LINE is not NULL, set *LINE to the line where the token starts.  |
-| If OBS is not NULL, expand TOKEN_STRING directly into OBS rather    |
-| than in token_stack temporary storage area.  Report errors          |
-| (unterminated comments or strings) on behalf of CALLER, if          |
-| non-NULL.                                                           |
+| Parse a single token from the input stream, set TD to its           |
+| contents, and return its type.  A token is TOKEN_EOF if the         |
+| input_stack is empty; TOKEN_STRING for a quoted string or comment;  |
+| TOKEN_WORD for something that is a potential macro name; and        |
+| TOKEN_SIMPLE for any single character that is not a part of any of  |
+| the previous types.  If LINE is not NULL, set *LINE to the line     |
+| where the token starts.  If OBS is not NULL, expand TOKEN_STRING    |
+| directly into OBS rather than in token_stack temporary storage      |
+| area, and TD could be a TOKEN_COMP instead of the usual             |
+| TOKEN_TEXT.  Report errors (unterminated comments or strings) on    |
+| behalf of CALLER, if non-NULL.                                      |
 |                                                                     |
 | Next_token () returns the token type, and passes back a pointer to  |
 | the token data through TD.  Non-string token text is collected on   |
@@ -1165,7 +1200,6 @@ next_token (token_data *td, int *line, struct obstack 
*obs, const char *caller)
   int quote_level;
   token_type type;
 #ifdef ENABLE_CHANGEWORD
-  int startpos;
   char *orig_text = NULL;
 #endif /* ENABLE_CHANGEWORD */
   const char *file;
@@ -1181,19 +1215,20 @@ next_token (token_data *td, int *line, struct obstack 
*obs, const char *caller)
     line = &dummy;
 
   /* Can't consume character until after CHAR_MACRO is handled.  */
+  TOKEN_DATA_TYPE (td) = TOKEN_VOID;
   ch = peek_input ();
   if (ch == CHAR_EOF)
     {
 #ifdef DEBUG_INPUT
       xfprintf (stderr, "next_token -> EOF\n");
 #endif /* DEBUG_INPUT */
-      next_char ();
+      next_char (false);
       return TOKEN_EOF;
     }
   if (ch == CHAR_MACRO)
     {
       init_macro_token (td);
-      next_char ();
+      next_char (false);
 #ifdef DEBUG_INPUT
       xfprintf (stderr, "next_token -> MACDEF (%s)\n",
                find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name);
@@ -1201,7 +1236,7 @@ next_token (token_data *td, int *line, struct obstack 
*obs, const char *caller)
       return TOKEN_MACDEF;
     }
 
-  next_char (); /* Consume character we already peeked at.  */
+  next_char (false); /* Consume character we already peeked at.  */
   file = current_file;
   *line = current_line;
   if (MATCH (ch, bcomm.string, true))
@@ -1209,11 +1244,14 @@ next_token (token_data *td, int *line, struct obstack 
*obs, const char *caller)
       if (obs)
        obs_td = obs;
       obstack_grow (obs_td, bcomm.string, bcomm.length);
-      while ((ch = next_char ()) != CHAR_EOF
+      while ((ch = next_char (false)) < CHAR_EOF
             && !MATCH (ch, ecomm.string, true))
        obstack_1grow (obs_td, ch);
       if (ch != CHAR_EOF)
-       obstack_grow (obs_td, ecomm.string, ecomm.length);
+       {
+         assert (ch < CHAR_EOF);
+         obstack_grow (obs_td, ecomm.string, ecomm.length);
+       }
       else
        /* Current_file changed to "" if we see CHAR_EOF, use the
           previous value we stored earlier.  */
@@ -1225,10 +1263,10 @@ next_token (token_data *td, int *line, struct obstack 
*obs, const char *caller)
   else if (default_word_regexp && (isalpha (ch) || ch == '_'))
     {
       obstack_1grow (&token_stack, ch);
-      while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
+      while ((ch = peek_input ()) < CHAR_EOF && (isalnum (ch) || ch == '_'))
        {
          obstack_1grow (&token_stack, ch);
-         (void) next_char ();
+         next_char (false);
        }
       type = TOKEN_WORD;
     }
@@ -1241,20 +1279,17 @@ next_token (token_data *td, int *line, struct obstack 
*obs, const char *caller)
       while (1)
        {
          ch = peek_input ();
-         if (ch == CHAR_EOF)
+         if (ch >= CHAR_EOF)
            break;
          obstack_1grow (&token_stack, ch);
-         startpos = re_search (&word_regexp,
-                               (char *) obstack_base (&token_stack),
-                               obstack_object_size (&token_stack), 0, 0,
-                               &regs);
-         if (startpos != 0 ||
-             regs.end [0] != obstack_object_size (&token_stack))
+         if (re_match (&word_regexp, (char *) obstack_base (&token_stack),
+                       obstack_object_size (&token_stack), 0, &regs)
+             != obstack_object_size (&token_stack))
            {
              obstack_blank (&token_stack, -1);
              break;
            }
-         next_char ();
+         next_char (false);
        }
 
       obstack_1grow (&token_stack, '\0');
@@ -1297,14 +1332,16 @@ next_token (token_data *td, int *line, struct obstack 
*obs, const char *caller)
       quote_level = 1;
       while (1)
        {
-         ch = next_char ();
+         ch = next_char (obs != NULL && current_quote_age);
          if (ch == CHAR_EOF)
            /* Current_file changed to "" if we see CHAR_EOF, use
               the previous value we stored earlier.  */
            m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller,
                              _("end of file in string"));
 
-         if (MATCH (ch, rquote.string, true))
+         if (ch == CHAR_QUOTE)
+           append_quote_token (obs, td);
+         else if (MATCH (ch, rquote.string, true))
            {
              if (--quote_level == 0)
                break;
@@ -1316,35 +1353,49 @@ next_token (token_data *td, int *line, struct obstack 
*obs, const char *caller)
              obstack_grow (obs_td, lquote.string, lquote.length);
            }
          else
-           obstack_1grow (obs_td, ch);
+           {
+             assert (ch < CHAR_EOF);
+             obstack_1grow (obs_td, ch);
+           }
        }
       type = TOKEN_STRING;
     }
 
-  TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
-  TOKEN_DATA_LEN (td) = obstack_object_size (obs_td);
-  if (obs_td != obs)
+  if (TOKEN_DATA_TYPE (td) == TOKEN_VOID)
     {
-      obstack_1grow (obs_td, '\0');
-      TOKEN_DATA_TEXT (td) = (char *) obstack_finish (obs_td);
-    }
-  else
-    TOKEN_DATA_TEXT (td) = NULL;
-  TOKEN_DATA_QUOTE_AGE (td) = current_quote_age;
+      TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
+      TOKEN_DATA_LEN (td) = obstack_object_size (obs_td);
+      if (obs_td != obs)
+       {
+         obstack_1grow (obs_td, '\0');
+         TOKEN_DATA_TEXT (td) = (char *) obstack_finish (obs_td);
+       }
+      else
+       TOKEN_DATA_TEXT (td) = NULL;
+      TOKEN_DATA_QUOTE_AGE (td) = current_quote_age;
 #ifdef ENABLE_CHANGEWORD
-  if (orig_text == NULL)
-    TOKEN_DATA_ORIG_TEXT (td) = TOKEN_DATA_TEXT (td);
+      if (orig_text == NULL)
+       TOKEN_DATA_ORIG_TEXT (td) = TOKEN_DATA_TEXT (td);
+      else
+       {
+         TOKEN_DATA_ORIG_TEXT (td) = orig_text;
+         TOKEN_DATA_LEN (td) = strlen (orig_text);
+       }
+#endif /* ENABLE_CHANGEWORD */
+#ifdef DEBUG_INPUT
+      xfprintf (stderr, "next_token -> %s (%s), len %zu\n",
+               token_type_string (type), TOKEN_DATA_TEXT (td),
+               TOKEN_DATA_LEN (td));
+#endif /* DEBUG_INPUT */
+    }
   else
     {
-      TOKEN_DATA_ORIG_TEXT (td) = orig_text;
-      TOKEN_DATA_LEN (td) = strlen (orig_text);
-    }
-#endif /* ENABLE_CHANGEWORD */
+      assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP && type == TOKEN_STRING);
 #ifdef DEBUG_INPUT
-  xfprintf (stderr, "next_token -> %s (%s), len %zu\n",
-           token_type_string (type), TOKEN_DATA_TEXT (td),
-           TOKEN_DATA_LEN (td));
+      xfprintf (stderr, "next_token -> %s <chain>\n",
+               token_type_string (type));
 #endif /* DEBUG_INPUT */
+    }
   return type;
 }
 
diff --git a/src/m4.h b/src/m4.h
index ea3947f..474338b 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -271,19 +271,20 @@ enum token_data_type
   TOKEN_VOID,  /* Token still being constructed, u is invalid.  */
   TOKEN_TEXT,  /* Straight text, u.u_t is valid.  */
   TOKEN_FUNC,  /* Builtin function definition, u.func is valid.  */
-  TOKEN_COMP   /* Composite argument, u.chain is valid.  */
+  TOKEN_COMP   /* Composite argument, u.u_c is valid.  */
 };
 
 /* Composite tokens are built of a linked list of chains.  */
 struct token_chain
 {
-  token_chain *next;   /* Pointer to next link of chain.  */
-  const char *str;     /* NUL-terminated string if text, else NULL.  */
-  size_t len;          /* Length of str, else 0.  */
-  int level;           /* Expansion level of link content, or -1.  */
-  macro_arguments *argv;/* Reference to earlier address@hidden  */
-  unsigned int index;  /* Argument index within argv.  */
-  bool flatten;                /* True to treat builtins as text.  */
+  token_chain *next;           /* Pointer to next link of chain.  */
+  unsigned int quote_age;      /* Quote_age of this link of chain, or 0.  */
+  const char *str;             /* NUL-terminated string if text, or NULL.  */
+  size_t len;                  /* Length of str, else 0.  */
+  int level;                   /* Expansion level of link content, or -1.  */
+  macro_arguments *argv;       /* Reference to earlier address@hidden  */
+  unsigned int index;          /* Argument index within argv.  */
+  bool flatten;                        /* True to treat builtins as text.  */
 };
 
 /* The content of a token or macro argument.  */
@@ -319,7 +320,12 @@ struct token_data
 
       /* Composite text: a linked list of straight text and $@
         placeholders.  */
-      token_chain *chain;
+      struct
+       {
+         token_chain *chain;   /* First link of the chain.  */
+         token_chain *end;     /* Last link of the chain.  */
+       }
+      u_c;
     }
   u;
 };
@@ -342,6 +348,7 @@ token_type next_token (token_data *, int *, struct obstack 
*, const char *);
 void skip_line (const char *);
 
 /* push back input */
+void make_text_link (struct obstack *, token_chain **, token_chain **);
 void push_file (FILE *, const char *, bool);
 void push_macro (builtin_func *);
 struct obstack *push_string_init (void);
diff --git a/src/macro.c b/src/macro.c
index ef18b8f..62af398 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -45,6 +45,9 @@ struct macro_arguments
   bool_bitfield inuse : 1;
   /* False if all arguments are just text or func, true if this argv
      refers to another one.  */
+  bool_bitfield wrapper : 1;
+  /* False if all arguments belong to this argv, true if some of them
+     include references to another.  */
   bool_bitfield has_ref : 1;
   const char *argv0; /* The macro name being expanded.  */
   size_t argv0_len; /* Length of argv0.  */
@@ -382,11 +385,16 @@ expand_argument (struct obstack *obs, token_data *argp, 
const char *caller)
                    return t == TOKEN_COMMA;
                  warn_builtin_concat (caller, TOKEN_DATA_FUNC (argp));
                }
-             obstack_1grow (obs, '\0');
-             TOKEN_DATA_TYPE (argp) = TOKEN_TEXT;
-             TOKEN_DATA_TEXT (argp) = (char *) obstack_finish (obs);
-             TOKEN_DATA_LEN (argp) = len;
-             TOKEN_DATA_QUOTE_AGE (argp) = age;
+             if (TOKEN_DATA_TYPE (argp) != TOKEN_COMP)
+               {
+                 obstack_1grow (obs, '\0');
+                 TOKEN_DATA_TYPE (argp) = TOKEN_TEXT;
+                 TOKEN_DATA_TEXT (argp) = (char *) obstack_finish (obs);
+                 TOKEN_DATA_LEN (argp) = len;
+                 TOKEN_DATA_QUOTE_AGE (argp) = age;
+               }
+             else
+               make_text_link (obs, NULL, &argp->u.u_c.end);
              return t == TOKEN_COMMA;
            }
          /* fallthru */
@@ -411,6 +419,23 @@ expand_argument (struct obstack *obs, token_data *argp, 
const char *caller)
        case TOKEN_STRING:
          if (!expand_token (obs, t, &td, line, first))
            age = 0;
+         if (TOKEN_DATA_TYPE (&td) == TOKEN_COMP)
+           {
+             if (TOKEN_DATA_TYPE (argp) != TOKEN_COMP)
+               {
+                 if (TOKEN_DATA_TYPE (argp) == TOKEN_FUNC)
+                   warn_builtin_concat (caller, TOKEN_DATA_FUNC (argp));
+                 TOKEN_DATA_TYPE (argp) = TOKEN_COMP;
+                 argp->u.u_c.chain = td.u.u_c.chain;
+                 argp->u.u_c.end = td.u.u_c.end;
+               }
+             else
+               {
+                 assert (argp->u.u_c.end);
+                 argp->u.u_c.end->next = td.u.u_c.chain;
+                 argp->u.u_c.end = td.u.u_c.end;
+               }
+           }
          break;
 
        case TOKEN_MACDEF:
@@ -459,6 +484,7 @@ collect_arguments (symbol *sym, struct obstack *arguments,
 
   args.argc = 1;
   args.inuse = false;
+  args.wrapper = false;
   args.has_ref = false;
   args.argv0 = SYMBOL_NAME (sym);
   args.argv0_len = strlen (args.argv0);
@@ -490,11 +516,14 @@ collect_arguments (symbol *sym, struct obstack *arguments,
              && TOKEN_DATA_LEN (tdp) > 0
              && TOKEN_DATA_QUOTE_AGE (tdp) != args.quote_age)
            args.quote_age = 0;
+         else if (TOKEN_DATA_TYPE (tdp) == TOKEN_COMP)
+           args.has_ref = true;
        }
       while (more_args);
     }
   argv = (macro_arguments *) obstack_finish (argv_stack);
   argv->argc = args.argc;
+  argv->has_ref = args.has_ref;
   if (args.quote_age != quote_age ())
     argv->quote_age = 0;
   argv->arraylen = args.arraylen;
@@ -633,8 +662,23 @@ expand_macro (symbol *sym)
   if (SYMBOL_DELETED (sym))
     free_symbol (sym);
 
-  /* If argv contains references, those refcounts can be reduced now.  */
-  /* TODO - support references in argv.  */
+  /* If argv contains references, those refcounts must be reduced now.  */
+  if (argv->has_ref)
+    {
+      token_chain *chain;
+      size_t i;
+      for (i = 0; i < argv->arraylen; i++)
+       if (TOKEN_DATA_TYPE (argv->array[i]) == TOKEN_COMP)
+         {
+           chain = argv->array[i]->u.u_c.chain;
+           while (chain)
+             {
+               if (chain->level >= 0)
+                 adjust_refcount (chain->level, false);
+               chain = chain->next;
+             }
+         }
+    }
 
   /* We no longer need argv, so reduce the refcount.  Additionally, if
      no other references to argv were created, we can free our portion
@@ -698,7 +742,7 @@ arg_token (macro_arguments *argv, unsigned int index)
   token_data *token;
 
   assert (index && index < argv->argc);
-  if (!argv->has_ref)
+  if (!argv->wrapper)
     return argv->array[index - 1];
   /* Must cycle through all tokens, until we find index, since a ref
      may occupy multiple indices.  */
@@ -707,7 +751,7 @@ arg_token (macro_arguments *argv, unsigned int index)
       token = argv->array[i];
       if (TOKEN_DATA_TYPE (token) == TOKEN_COMP)
        {
-         token_chain *chain = token->u.chain;
+         token_chain *chain = token->u.u_c.chain;
          /* TODO - for now we support only a single-length $@ chain.  */
          assert (!chain->next && !chain->str);
          if (index < chain->argv->argc - (chain->index - 1))
@@ -731,14 +775,14 @@ static void
 arg_mark (macro_arguments *argv)
 {
   argv->inuse = true;
-  if (argv->has_ref)
+  if (argv->wrapper)
     {
       /* TODO for now we support only a single-length $@ chain.  */
       assert (argv->arraylen == 1
              && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP
-             && !argv->array[0]->u.chain->next
-             && !argv->array[0]->u.chain->str);
-      argv->array[0]->u.chain->argv->inuse = true;
+             && !argv->array[0]->u.u_c.chain->next
+             && !argv->array[0]->u.u_c.chain->str);
+      argv->array[0]->u.u_c.chain->argv->inuse = true;
     }
 }
 
@@ -761,17 +805,22 @@ arg_type (macro_arguments *argv, unsigned int index)
     return TOKEN_TEXT;
   token = arg_token (argv, index);
   type = TOKEN_DATA_TYPE (token);
-  assert (type != TOKEN_COMP);
+  /* Composite tokens are currently sequences of text only.  */
+  if (type == TOKEN_COMP)
+    type = TOKEN_TEXT;
   return type;
 }
 
 /* Given ARGV, return the text at argument INDEX.  Abort if the
    argument is not text.  Index 0 is always text, and indices beyond
-   argc return the empty string.  */
+   argc return the empty string.  The result is always NUL-terminated,
+   even if it includes embedded NUL characters.  */
 const char *
 arg_text (macro_arguments *argv, unsigned int index)
 {
   token_data *token;
+  token_chain *chain;
+  struct obstack *obs;
 
   if (index == 0)
     return argv->argv0;
@@ -783,8 +832,18 @@ arg_text (macro_arguments *argv, unsigned int index)
     case TOKEN_TEXT:
       return TOKEN_DATA_TEXT (token);
     case TOKEN_COMP:
-      /* TODO - how to concatenate multiple arguments?  For now, we expect
-        only one element in the chain, and arg_token dereferences it.  */
+      /* TODO - concatenate multiple arguments?  For now, we assume
+        all elements are text.  */
+      chain = token->u.u_c.chain;
+      obs = arg_scratch ();
+      while (chain)
+       {
+         assert (chain->str);
+         obstack_grow (obs, chain->str, chain->len);
+         chain = chain->next;
+       }
+      obstack_1grow (obs, '\0');
+      return (char *) obstack_finish (obs);
     default:
       break;
     }
@@ -801,14 +860,84 @@ arg_equal (macro_arguments *argv, unsigned int indexa, 
unsigned int indexb)
 {
   token_data *ta = arg_token (argv, indexa);
   token_data *tb = arg_token (argv, indexb);
+  token_chain tmpa;
+  token_chain tmpb;
+  token_chain *ca = &tmpa;
+  token_chain *cb = &tmpb;
 
+  /* Quick tests.  */
   if (ta == &empty_token || tb == &empty_token)
     return ta == tb;
+  if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT
+      && TOKEN_DATA_TYPE (tb) == TOKEN_TEXT)
+    return (TOKEN_DATA_LEN (ta) == TOKEN_DATA_LEN (tb)
+           && memcmp (TOKEN_DATA_TEXT (ta), TOKEN_DATA_TEXT (tb),
+                      TOKEN_DATA_LEN (ta)) == 0);
+
+  /* Convert both arguments to chains, if not one already.  */
   /* TODO - allow builtin tokens in the comparison?  */
-  assert (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT
-         && TOKEN_DATA_TYPE (tb) == TOKEN_TEXT);
-  return (TOKEN_DATA_LEN (ta) == TOKEN_DATA_LEN (tb)
-         && strcmp (TOKEN_DATA_TEXT (ta), TOKEN_DATA_TEXT (tb)) == 0);
+  if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT)
+    {
+      tmpa.next = NULL;
+      tmpa.str = TOKEN_DATA_TEXT (ta);
+      tmpa.len = TOKEN_DATA_LEN (ta);
+    }
+  else
+    {
+      assert (TOKEN_DATA_TYPE (ta) == TOKEN_COMP);
+      ca = ta->u.u_c.chain;
+    }
+  if (TOKEN_DATA_TYPE (tb) == TOKEN_TEXT)
+    {
+      tmpb.next = NULL;
+      tmpb.str = TOKEN_DATA_TEXT (tb);
+      tmpb.len = TOKEN_DATA_LEN (tb);
+    }
+  else
+    {
+      assert (TOKEN_DATA_TYPE (tb) == TOKEN_COMP);
+      cb = tb->u.u_c.chain;
+    }
+
+  /* Compare each link of the chain.  */
+  while (ca && cb)
+    {
+      /* TODO support comparison against $@ refs.  */
+      assert (ca->str && cb->str);
+      if (ca->len == cb->len)
+       {
+         if (memcmp (ca->str, cb->str, ca->len) != 0)
+           return false;
+         ca = ca->next;
+         cb = cb->next;
+       }
+      else if (ca->len < cb->len)
+       {
+         if (memcmp (ca->str, cb->str, ca->len) != 0)
+           return false;
+         tmpb.next = cb->next;
+         tmpb.str = cb->str + ca->len;
+         tmpb.len = cb->len - ca->len;
+         ca = ca->next;
+         cb = &tmpb;
+       }
+      else
+       {
+         assert (ca->len > cb->len);
+         if (memcmp (ca->str, cb->str, cb->len) != 0)
+           return false;
+         tmpa.next = ca->next;
+         tmpa.str = ca->str + cb->len;
+         tmpa.len = ca->len - cb->len;
+         ca = &tmpa;
+         cb = cb->next;
+       }
+    }
+
+  /* If we get this far, the two tokens are equal only if both chains
+     are exhausted.  */
+  assert (ca != cb || ca == NULL);
+  return ca == cb;
 }
 
 /* Given ARGV, return true if argument INDEX is the empty string.
@@ -830,6 +959,8 @@ size_t
 arg_len (macro_arguments *argv, unsigned int index)
 {
   token_data *token;
+  token_chain *chain;
+  size_t len;
 
   if (index == 0)
     return argv->argv0_len;
@@ -842,8 +973,18 @@ arg_len (macro_arguments *argv, unsigned int index)
       assert ((token == &empty_token) == (TOKEN_DATA_LEN (token) == 0));
       return TOKEN_DATA_LEN (token);
     case TOKEN_COMP:
-      /* TODO - how to concatenate multiple arguments?  For now, we expect
-        only one element in the chain, and arg_token dereferences it.  */
+      /* TODO - concatenate multiple arguments?  For now, we assume
+        all elements are text.  */
+      chain = token->u.u_c.chain;
+      len = 0;
+      while (chain)
+       {
+         assert (chain->str);
+         len += chain->len;
+         chain = chain->next;
+       }
+      assert (len);
+      return len;
     default:
       break;
     }
@@ -892,12 +1033,12 @@ make_argv_ref (macro_arguments *argv, const char *argv0, 
size_t argv0_len,
 
   /* When making a reference through a reference, point to the
      original if possible.  */
-  if (argv->has_ref)
+  if (argv->wrapper)
     {
       /* TODO - for now we support only a single-length $@ chain.  */
       assert (argv->arraylen == 1
              && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP);
-      chain = argv->array[0]->u.chain;
+      chain = argv->array[0]->u.u_c.chain;
       assert (!chain->next && !chain->str);
       argv = chain->argv;
       index += chain->index - 1;
@@ -907,6 +1048,7 @@ make_argv_ref (macro_arguments *argv, const char *argv0, 
size_t argv0_len,
       new_argv = (macro_arguments *)
        obstack_alloc (obs, offsetof (macro_arguments, array));
       new_argv->arraylen = 0;
+      new_argv->wrapper = false;
       new_argv->has_ref = false;
     }
   else
@@ -918,10 +1060,12 @@ make_argv_ref (macro_arguments *argv, const char *argv0, 
size_t argv0_len,
       chain = (token_chain *) obstack_alloc (obs, sizeof *chain);
       new_argv->arraylen = 1;
       new_argv->array[0] = token;
+      new_argv->wrapper = true;
       new_argv->has_ref = true;
       TOKEN_DATA_TYPE (token) = TOKEN_COMP;
-      token->u.chain = chain;
+      token->u.u_c.chain = token->u.u_c.end = chain;
       chain->next = NULL;
+      chain->quote_age = argv->quote_age;
       chain->str = NULL;
       chain->len = 0;
       chain->level = expansion_level - 1;
@@ -955,9 +1099,23 @@ push_arg (struct obstack *obs, macro_arguments *argv, 
unsigned int index)
     return;
   token = arg_token (argv, index);
   /* TODO handle func tokens?  */
-  assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT);
-  if (push_token (token, expansion_level - 1))
-    arg_mark (argv);
+  if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT)
+    {
+      if (push_token (token, expansion_level - 1))
+       arg_mark (argv);
+    }
+  else if (TOKEN_DATA_TYPE (token) == TOKEN_COMP)
+    {
+      /* TODO - concatenate multiple arguments?  For now, we assume
+        all elements are text.  */
+      token_chain *chain = token->u.u_c.chain;
+      while (chain)
+       {
+         assert (chain->str);
+         obstack_grow (obs, chain->str, chain->len);
+         chain = chain->next;
+       }
+    }
 }
 
 /* Push series of comma-separated arguments from ARGV, which should
@@ -968,6 +1126,7 @@ void
 push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote)
 {
   token_data *token;
+  token_chain *chain;
   unsigned int i = skip ? 2 : 1;
   const char *sep = ",";
   size_t sep_len = 1;
@@ -1007,8 +1166,20 @@ push_args (struct obstack *obs, macro_arguments *argv, 
bool skip, bool quote)
       else
        use_sep = true;
       /* TODO handle func tokens?  */
-      assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT);
-      inuse |= push_token (token, expansion_level - 1);
+      if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT)
+       inuse |= push_token (token, expansion_level - 1);
+      else
+       {
+         /* TODO - handle composite text in push_token.  */
+         assert (TOKEN_DATA_TYPE (token) == TOKEN_COMP);
+         chain = token->u.u_c.chain;
+         while (chain)
+           {
+             assert (chain->str);
+             obstack_grow (obs, chain->str, chain->len);
+             chain = chain->next;
+           }
+       }
     }
   if (quote)
     obstack_grow (obs, rquote.string, rquote.length);


hooks/post-receive
--
GNU M4 source repository




reply via email to

[Prev in Thread] Current Thread [Next in Thread]