m4-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] GNU M4 source repository branch, master, updated. cvs-readonly-47-


From: Eric Blake
Subject: [SCM] GNU M4 source repository branch, master, updated. cvs-readonly-47-gccc250d
Date: Sun, 27 Jan 2008 05:05:35 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU M4 source repository".

http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=ccc250d2238b57b313c54921b57fc078e5bb8220

The branch, master has been updated
       via  ccc250d2238b57b313c54921b57fc078e5bb8220 (commit)
      from  88382ff9ef2efddf6279fb8af908ddd07210e70c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit ccc250d2238b57b313c54921b57fc078e5bb8220
Author: Eric Blake <address@hidden>
Date:   Sat Jan 26 21:39:25 2008 -0700

    Stage 13: push composite text tokens.
    
    * m4/m4private.h (m4__push_symbol): Adjust prototype.
    * m4/input.c (m4__push_symbol): Add parameter, and support
    composite tokens.
    (append_quote_token): Add parameter, and support inlining of short
    text.
    (m4__next_token): Adjust caller.
    * m4/macro.c (m4_push_arg, m4_push_args): Likewise.
    
    Signed-off-by: Eric Blake <address@hidden>

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog      |   17 ++++++
 m4/input.c     |  175 +++++++++++++++++++++++++++++++++++++++++++-------------
 m4/m4private.h |    3 +-
 m4/macro.c     |   38 ++-----------
 4 files changed, 160 insertions(+), 73 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 25c408e..34b9491 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2008-01-27  Eric Blake  <address@hidden>
+
+       Stage 13: push composite text tokens.
+       Support pushing composite tokens, allowing back-references to be
+       reused through multiple macro expansions.  Add hueristic that
+       avoids creating new reference when pushing existing references.
+       Memory impact: noticeable improvement due to better reference
+       reuse, except for boxed recursion doing more copying.
+       Speed impact: slight penalty, due to more bookkeeping.
+       * m4/m4private.h (m4__push_symbol): Adjust prototype.
+       * m4/input.c (m4__push_symbol): Add parameter, and support
+       composite tokens.
+       (append_quote_token): Add parameter, and support inlining of short
+       text.
+       (m4__next_token): Adjust caller.
+       * m4/macro.c (m4_push_arg, m4_push_args): Likewise.
+
 2008-01-26  Eric Blake  <address@hidden>
 
        Stage 12c: add macro for m4_arg_len.
diff --git a/m4/input.c b/m4/input.c
index 4adce9d..9616d37 100644
--- a/m4/input.c
+++ b/m4/input.c
@@ -112,7 +112,8 @@ static      bool    composite_clean         (m4_input_block 
*, m4 *, bool);
 static void    composite_print         (m4_input_block *, m4 *, m4_obstack *);
 
 static void    init_builtin_token      (m4 *, m4_symbol_value *);
-static void    append_quote_token      (m4_obstack *, m4_symbol_value *);
+static void    append_quote_token      (m4 *, m4_obstack *,
+                                        m4_symbol_value *);
 static bool    match_input             (m4 *, const char *, bool);
 static int     next_char               (m4 *, bool, bool);
 static int     peek_char               (m4 *);
@@ -526,35 +527,72 @@ m4_push_string_init (m4 *context)
   return current_input;
 }
 
-/* If VALUE contains text, then convert the current string into a
+/* This function allows gathering input from multiple locations,
+   rather than copying everything consecutively onto the input stack.
+   Must be called between push_string_init and push_string_finish.
+
+   If VALUE contains text, then convert the current input block into a
    chain if it is not one already, and add the contents of VALUE as a
    new link in the chain.  LEVEL describes the current expansion
-   level, or SIZE_MAX if the contents of VALUE reside entirely on the
-   current_input stack and VALUE lives in temporary storage.  Allows
-   gathering input from multiple locations, rather than copying
-   everything consecutively onto the input stack.  Must be called
-   between push_string_init and push_string_finish.  Return true only
-   if LEVEL is less than SIZE_MAX and a reference was created to
-   VALUE, in which case, the lifetime of the contents of VALUE must
-   last as long as the input engine can parse references from it.  */
+   level, or SIZE_MAX if VALUE is composite, its contents reside
+   entirely on the current_input stack, and VALUE lives in temporary
+   storage.  If VALUE is a simple string, then it belongs to the
+   current macro expansion.  If VALUE is composit, then each text link
+   has a level of SIZE_MAX if it belongs to the current macro
+   expansion, otherwise it is a back-reference where level tracks
+   which stack it came from.  The resulting input block chain contains
+   links with a level of SIZE_MAX if the text belongs to the input
+   stack, otherwise the level where the back-reference comes from.
+
+   Return true only if a reference was created to the contents of
+   VALUE, in which case, LEVEL is less than SIZE_MAX and the lifetime
+   of VALUE and its contents must last as long as the input engine can
+   parse references from it.  INUSE determines whether composite
+   symbols should favor creating back-references or copying text.  */
 bool
-m4__push_symbol (m4 *context, m4_symbol_value *value, size_t level)
+m4__push_symbol (m4 *context, m4_symbol_value *value, size_t level, bool inuse)
 {
+  m4__symbol_chain *src_chain = NULL;
   m4__symbol_chain *chain;
-  bool result = false;
 
   assert (next);
-  /* TODO - also accept TOKEN_COMP chains.  */
-  assert (m4_is_symbol_value_text (value));
+  /* TODO - also accept composite chains with $@ refs.  */
 
   /* Speed consideration - for short enough symbols, the speed and
      memory overhead of parsing another INPUT_CHAIN link outweighs the
-     time to inline the symbol text.  */
-  if (m4_get_symbol_value_len (value) <= INPUT_INLINE_THRESHOLD)
+     time to inline the symbol text.  But don't copy text if it
+     already lives on the obstack.  */
+  if (m4_is_symbol_value_text (value))
     {
-      obstack_grow (current_input, m4_get_symbol_value_text (value),
-                   m4_get_symbol_value_len (value));
-      return false;
+      assert (level < SIZE_MAX);
+      if (m4_get_symbol_value_len (value) <= INPUT_INLINE_THRESHOLD)
+       {
+         obstack_grow (current_input, m4_get_symbol_value_text (value),
+                       m4_get_symbol_value_len (value));
+         return false;
+       }
+    }
+  else
+    {
+      /* For composite values, if argv is already in use, creating
+        additional references for long text segments is more
+        efficient in time.  But if argv is not yet in use, and we
+        have a composite value, then the value must already contain a
+        back-reference, and memory usage is more efficient if we can
+        avoid using the current expand_macro, even if it means larger
+        copies.  */
+      assert (value->type == M4_SYMBOL_COMP);
+      src_chain = value->u.u_c.chain;
+      while (level < SIZE_MAX && src_chain && src_chain->type == M4__CHAIN_STR
+            && (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD
+                || (!inuse && src_chain->u.u_s.level == SIZE_MAX)))
+       {
+         obstack_grow (current_input, src_chain->u.u_s.str,
+                       src_chain->u.u_s.len);
+         src_chain = src_chain->next;
+       }
+      if (!src_chain)
+       return false;
     }
 
   if (next->funcs == &string_funcs)
@@ -563,24 +601,72 @@ m4__push_symbol (m4 *context, m4_symbol_value *value, 
size_t level)
       next->u.u_c.chain = next->u.u_c.end = NULL;
     }
   m4__make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end);
-  chain = (m4__symbol_chain *) obstack_alloc (current_input, sizeof *chain);
-  if (next->u.u_c.end)
-    next->u.u_c.end->next = chain;
-  else
-    next->u.u_c.chain = chain;
-  next->u.u_c.end = chain;
-  chain->next = NULL;
-  chain->type = M4__CHAIN_STR;
-  chain->quote_age = m4_get_symbol_value_quote_age (value);
-  chain->u.u_s.str = m4_get_symbol_value_text (value);
-  chain->u.u_s.len = m4_get_symbol_value_len (value);
-  chain->u.u_s.level = level;
-  if (level < SIZE_MAX)
+  if (m4_is_symbol_value_text (value))
     {
+      chain = (m4__symbol_chain *) obstack_alloc (current_input,
+                                                 sizeof *chain);
+      if (next->u.u_c.end)
+       next->u.u_c.end->next = chain;
+      else
+       next->u.u_c.chain = chain;
+      next->u.u_c.end = chain;
+      chain->next = NULL;
+      chain->type = M4__CHAIN_STR;
+      chain->quote_age = m4_get_symbol_value_quote_age (value);
+      chain->u.u_s.str = m4_get_symbol_value_text (value);
+      chain->u.u_s.len = m4_get_symbol_value_len (value);
+      chain->u.u_s.level = level;
       m4__adjust_refcount (context, level, true);
-      result = true;
+      inuse = true;
     }
-  return result;
+  while (src_chain)
+    {
+      if (level == SIZE_MAX)
+       {
+         /* Nothing to copy, since link already lives on obstack.  */
+         assert (src_chain->type != M4__CHAIN_STR
+                 || src_chain->u.u_s.level == SIZE_MAX);
+         chain = src_chain;
+       }
+      else
+       {
+         /* Allow inlining the final link with subsequent text.  */
+         if (!src_chain->next && src_chain->type == M4__CHAIN_STR
+             && (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD
+                 || (!inuse && src_chain->u.u_s.level == SIZE_MAX)))
+           {
+             obstack_grow (current_input, src_chain->u.u_s.str,
+                           src_chain->u.u_s.len);
+             break;
+           }
+         /* We must clone each link in the chain, since next_char
+            destructively modifies the chain it is parsing.  */
+         chain = (m4__symbol_chain *) obstack_copy (current_input, src_chain,
+                                                    sizeof *chain);
+         if (chain->type == M4__CHAIN_STR && chain->u.u_s.level == SIZE_MAX)
+           {
+             if (chain->u.u_s.len <= INPUT_INLINE_THRESHOLD || !inuse)
+               chain->u.u_s.str = (char *) obstack_copy (current_input,
+                                                         chain->u.u_s.str,
+                                                         chain->u.u_s.len);
+             else
+               {
+                 chain->u.u_s.level = level;
+                 inuse = true;
+               }
+           }
+       }
+      if (next->u.u_c.end)
+       next->u.u_c.end->next = chain;
+      else
+       next->u.u_c.chain = chain;
+      next->u.u_c.end = chain;
+      assert (chain->type == M4__CHAIN_STR);
+      if (chain->u.u_s.level < SIZE_MAX)
+       m4__adjust_refcount (context, chain->u.u_s.level, true);
+      src_chain = src_chain->next;
+    }
+  return inuse;
 }
 
 /* Last half of m4_push_string ().  If next is now NULL, a call to
@@ -925,11 +1011,23 @@ init_builtin_token (m4 *context, m4_symbol_value *token)
    as the quoted token from the top of the input stack.  Use OBS for
    any additional allocations needed to store the token chain.  */
 static void
-append_quote_token (m4_obstack *obs, m4_symbol_value *value)
+append_quote_token (m4 *context, m4_obstack *obs, m4_symbol_value *value)
 {
   m4__symbol_chain *src_chain = isp->u.u_c.chain;
   m4__symbol_chain *chain;
-  assert (isp->funcs == &composite_funcs && obs);
+  assert (isp->funcs == &composite_funcs && obs && m4__quote_age (M4SYNTAX)
+         && src_chain->type == M4__CHAIN_STR
+         && src_chain->u.u_s.level <= SIZE_MAX);
+  isp->u.u_c.chain = src_chain->next;
+
+  /* Speed consideration - for short enough symbols, the speed and
+     memory overhead of parsing another INPUT_CHAIN link outweighs the
+     time to inline the symbol text.  */
+  if (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD)
+    {
+      obstack_grow (obs, src_chain->u.u_s.str, src_chain->u.u_s.len);
+      m4__adjust_refcount (context, src_chain->u.u_s.level, false);
+    }
 
   if (value->type == M4_SYMBOL_VOID)
     {
@@ -944,8 +1042,7 @@ append_quote_token (m4_obstack *obs, m4_symbol_value 
*value)
   else
     value->u.u_c.chain = chain;
   value->u.u_c.end = chain;
-  value->u.u_c.end->next = NULL;
-  isp->u.u_c.chain = src_chain->next;
+  chain->next = NULL;
 }
 
 
@@ -1293,7 +1390,7 @@ m4__next_token (m4 *context, m4_symbol_value *token, int 
*line,
              m4_error_at_line (context, EXIT_FAILURE, 0, file, *line, caller,
                                _("end of file in string"));
            if (ch == CHAR_QUOTE)
-             append_quote_token (obs, token);
+             append_quote_token (context, obs, token);
            else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_RQUOTE))
              {
                if (--quote_level == 0)
diff --git a/m4/m4private.h b/m4/m4private.h
index 4261c4c..5304682 100644
--- a/m4/m4private.h
+++ b/m4/m4private.h
@@ -475,7 +475,8 @@ typedef enum {
 
 extern void            m4__make_text_link (m4_obstack *, m4__symbol_chain **,
                                            m4__symbol_chain **);
-extern bool            m4__push_symbol (m4 *, m4_symbol_value *, size_t);
+extern bool            m4__push_symbol (m4 *, m4_symbol_value *, size_t,
+                                        bool);
 extern m4__token_type  m4__next_token (m4 *, m4_symbol_value *, int *,
                                        m4_obstack *, const char *);
 extern bool            m4__next_token_is_open (m4 *);
diff --git a/m4/macro.c b/m4/macro.c
index 88ee391..f91923c 100644
--- a/m4/macro.c
+++ b/m4/macro.c
@@ -1319,23 +1319,9 @@ m4_push_arg (m4 *context, m4_obstack *obs, m4_macro_args 
*argv,
        return;
     }
   /* TODO handle builtin tokens?  */
-  if (value->type == M4_SYMBOL_TEXT)
-    {
-      if (m4__push_symbol (context, value, context->expansion_level - 1))
-       arg_mark (argv);
-    }
-  else if (value->type == M4_SYMBOL_COMP)
-    {
-      /* TODO - really handle composites; for now, just flatten the
-        composite and push its text.  */
-      m4__symbol_chain *chain = value->u.u_c.chain;
-      while (chain)
-       {
-         assert (chain->type == M4__CHAIN_STR);
-         obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
-         chain = chain->next;
-       }
-    }
+  if (m4__push_symbol (context, value, context->expansion_level - 1,
+                      argv->inuse))
+    arg_mark (argv);
 }
 
 /* Push series of comma-separated arguments from ARGV, which should
@@ -1347,7 +1333,6 @@ m4_push_args (m4 *context, m4_obstack *obs, m4_macro_args 
*argv, bool skip,
              bool quote)
 {
   m4_symbol_value *value;
-  m4__symbol_chain *chain;
   unsigned int i = skip ? 2 : 1;
   const char *sep = ",";
   size_t sep_len = 1;
@@ -1389,21 +1374,8 @@ m4_push_args (m4 *context, m4_obstack *obs, 
m4_macro_args *argv, bool skip,
       else
        use_sep = true;
       /* TODO handle builtin tokens?  */
-      if (value->type == M4_SYMBOL_TEXT)
-       inuse |= m4__push_symbol (context, value,
-                                 context->expansion_level - 1);
-      else
-       {
-         /* TODO handle composite text.  */
-         assert (value->type == M4_SYMBOL_COMP);
-         chain = value->u.u_c.chain;
-         while (chain)
-           {
-             assert (chain->type == M4__CHAIN_STR);
-             obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
-             chain = chain->next;
-           }
-       }
+      inuse |= m4__push_symbol (context, value,
+                               context->expansion_level - 1, inuse);
     }
   if (quote)
     obstack_grow (obs, quotes->str2, quotes->len2);


hooks/post-receive
--
GNU M4 source repository




reply via email to

[Prev in Thread] Current Thread [Next in Thread]