m4-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] GNU M4 source repository branch, argv_ref, updated. branch-cvs-rea


From: Eric Blake
Subject: [SCM] GNU M4 source repository branch, argv_ref, updated. branch-cvs-readonly-53-g4504da8
Date: Tue, 15 Jul 2008 15:37:15 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU M4 source repository".

http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=4504da8cb8c5006044634a7b6a4df7c90487ec16

The branch, argv_ref has been updated
       via  4504da8cb8c5006044634a7b6a4df7c90487ec16 (commit)
      from  ffac5851f2b3a287e0308502e79532432553bcf0 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 4504da8cb8c5006044634a7b6a4df7c90487ec16
Author: Eric Blake <address@hidden>
Date:   Tue Jul 15 09:33:43 2008 -0600

    Stage33: make appending to existing macros linear

-----------------------------------------------------------------------

Summary of changes:
 doc/m4.texinfo |   12 ++++++
 src/builtin.c  |  120 +++++++++++++++++++++++++++++++++++++++++---------------
 src/m4.h       |    9 ++++
 src/macro.c    |   89 +++++++++++++++++++++++++++++------------
 4 files changed, 172 insertions(+), 58 deletions(-)

diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 755725d..7b34589 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -8116,6 +8116,18 @@ include(`loop.m4')dnl
 @result{}10000
 @end example
 
address@hidden appending to macro definition
+
address@hidden examples
address@hidden options: -Dlimit=50000
address@hidden
+$ @kbd {m4 -I examples -Dlimit=50000}
+define(`next', `.')define(`debug', `divert`'len(var)')
address@hidden
+include(`append.m4')dnl
address@hidden
address@hidden example
+
 @end ignore
 
 @node Improved m4wrap
diff --git a/src/builtin.c b/src/builtin.c
index 4e29d00..6c7526a 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -424,6 +424,47 @@ set_macro_sequence (const char *regexp)
   macro_sequence_inuse = true;
 }
 
+/*----------------------------------------------------------------.
+| Check the contents DEFN of length LEN, of the new definition of |
+| macro NAME of length NAME_LEN, for any match of                 |
+| --warn-macro-sequence.                                          |
+`----------------------------------------------------------------*/
+static void
+check_macro_sequence (const char *name, size_t name_len, const char *defn,
+                     size_t len)
+{
+  if (macro_sequence_inuse && len)
+    {
+      regoff_t offset = 0;
+      struct re_registers *regs = &macro_sequence_regs;
+
+      while (offset < len
+            && (offset = re_search (&macro_sequence_buf, defn, len, offset,
+                                    len - offset, regs)) >= 0)
+       {
+         /* Skip empty matches.  */
+         if (regs->start[0] == regs->end[0])
+           offset++;
+         else
+           {
+             offset = regs->end[0];
+             /* Safe to use slot 1 since we don't pass a macro name
+                to m4_warn.  */
+             m4_warn (0, NULL, _("definition of %s contains sequence %s"),
+                      quotearg_style_mem (locale_quoting_style, name,
+                                          name_len),
+                      quotearg_n_style_mem (1, locale_quoting_style,
+                                            defn + regs->start[0],
+                                            regs->end[0] - regs->start[0]));
+           }
+       }
+      if (offset == -2)
+       m4_warn (0, NULL,
+                _("problem checking --warn-macro-sequence for macro %s"),
+                quotearg_style_mem (locale_quoting_style, name, name_len));
+    }
+}
+
 /*------------------------------------------------------.
 | Free dynamic memory utilized by regular expressions.  |
 `------------------------------------------------------*/
@@ -469,40 +510,10 @@ define_user_macro (const char *name, size_t name_len, 
const char *text,
   SYMBOL_TYPE (s) = TOKEN_TEXT;
   SYMBOL_TEXT (s) = defn;
   SYMBOL_TEXT_LEN (s) = len;
+  SYMBOL_TEXT_ALLOCATED (s) = len;
   SYMBOL_TEXT_QUOTE_AGE (s) = quote_age;
   SYMBOL_MACRO_ARGS (s) = true;
-
-  /* Implement --warn-macro-sequence.  */
-  if (macro_sequence_inuse && text)
-    {
-      regoff_t offset = 0;
-      struct re_registers *regs = &macro_sequence_regs;
-
-      while (offset < len
-            && (offset = re_search (&macro_sequence_buf, defn, len, offset,
-                                    len - offset, regs)) >= 0)
-       {
-         /* Skip empty matches.  */
-         if (regs->start[0] == regs->end[0])
-           offset++;
-         else
-           {
-             offset = regs->end[0];
-             /* Safe to use slot 1 since we don't pass a macro name
-                to m4_warn.  */
-             m4_warn (0, NULL, _("definition of %s contains sequence %s"),
-                      quotearg_style_mem (locale_quoting_style, name,
-                                          name_len),
-                      quotearg_n_style_mem (1, locale_quoting_style,
-                                            defn + regs->start[0],
-                                            regs->end[0] - regs->start[0]));
-           }
-       }
-      if (offset == -2)
-       m4_warn (0, NULL,
-                _("problem checking --warn-macro-sequence for macro %s"),
-                quotearg_style_mem (locale_quoting_style, name, name_len));
-    }
+  check_macro_sequence (name, name_len, defn, len);
 }
 
 /*-----------------------------------------------.
@@ -727,6 +738,51 @@ define_macro (int argc, macro_arguments *argv, 
symbol_lookup mode)
 static void
 m4_define (struct obstack *obs, int argc, macro_arguments *argv)
 {
+  /* Special case if we detect that the user is appending to an
+     existing text macro: try to copy only the suffix into the
+     over-allocated slop, to avoid quadratic behavior of always
+     copying the prefix on each append.
+
+     Pending expansions of the existing macro must be exactly 1 (the
+     macro's existing definition is in use as the prefix of the
+     argument); if pending expansions is zero, we are not appending to
+     the current macro, and if it is larger than 1, then either the
+     macro's existing definition is in use elsewhere and must not be
+     invalidated, or we are at least doubling the size of the macro's
+     defintion which means the suffix wouldn't fit in the
+     pre-allocated slop anyway.  */
+  if (argc > 2 && arg_type (argv, 1) == TOKEN_TEXT
+      && arg_type (argv, 2) == TOKEN_TEXT)
+    {
+      const char *name = ARG (1);
+      size_t name_len = ARG_LEN (1);
+      symbol *s = lookup_symbol (name, name_len, SYMBOL_LOOKUP);
+      const char *tail;
+      if (s && SYMBOL_TYPE (s) == TOKEN_TEXT
+         && SYMBOL_PENDING_EXPANSIONS (s) == 1
+         && (tail = arg_has_prefix (argv, 2, SYMBOL_TEXT (s),
+                                    SYMBOL_TEXT_LEN (s))))
+       {
+         size_t len = ARG_LEN (2);
+         if (SYMBOL_TEXT_ALLOCATED (s) < len)
+           {
+             size_t new_len = len * 2;
+             char *old_defn = SYMBOL_TEXT (s);
+             if (new_len < len)
+               new_len = len;
+             SYMBOL_TEXT (s) = xcharalloc (new_len);
+             memcpy (SYMBOL_TEXT (s), old_defn, SYMBOL_TEXT_LEN (s));
+             free (old_defn);
+             SYMBOL_TEXT_ALLOCATED (s) = new_len;
+           }
+         memcpy (SYMBOL_TEXT (s) + SYMBOL_TEXT_LEN (s), tail,
+                 len - SYMBOL_TEXT_LEN (s));
+         SYMBOL_TEXT_LEN (s) = len;
+         SYMBOL_TEXT_QUOTE_AGE (s) = arg_quote_age (argv);
+         check_macro_sequence (name, name_len, SYMBOL_TEXT (s), len);
+         return;
+       }
+    }
   define_macro (argc, argv, SYMBOL_INSERT);
 }
 
diff --git a/src/m4.h b/src/m4.h
index 3305fc2..69234ca 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -431,6 +431,12 @@ struct symbol
   char *name;
   size_t len;
   token_data data;  /* Type should be only TOKEN_TEXT or TOKEN_FUNC.  */
+  /* In order to make appending an amortized O(n) rather than O(n^2),
+     we must geometrically over-allocate and modify only the suffix
+     when possible, rather than reallocating on every insertion.  This
+     is only valid for TOKEN_TEXT, and is at least as large as
+     SYMBOL_TEXT_LEN.  */
+  size_t allocated;
 };
 
 #define SYMBOL_NEXT(S)         ((S)->next)
@@ -447,6 +453,7 @@ struct symbol
 /* Only safe when SYMBOL_TYPE(S) == TOKEN_TEXT:  */
 #define SYMBOL_TEXT(S)                 (TOKEN_DATA_TEXT (&(S)->data))
 #define SYMBOL_TEXT_LEN(S)             (TOKEN_DATA_LEN (&(S)->data))
+#define SYMBOL_TEXT_ALLOCATED(S)       ((S)->allocated)
 #define SYMBOL_TEXT_QUOTE_AGE(S)       (TOKEN_DATA_QUOTE_AGE (&(S)->data))
 
 /* Only safe when SYMBOL_TYPE(S) == TOKEN_FUNC:  */
@@ -479,6 +486,8 @@ unsigned int arg_quote_age (macro_arguments *);
 token_data_type arg_type (macro_arguments *, unsigned int);
 const char *arg_text (macro_arguments *, unsigned int, bool);
 bool arg_equal (macro_arguments *, unsigned int, unsigned int);
+const char *arg_has_prefix (macro_arguments *, unsigned int, const char *,
+                           size_t);
 bool arg_empty (macro_arguments *, unsigned int);
 size_t arg_len (macro_arguments *, unsigned int, bool);
 builtin_func *arg_func (macro_arguments *, unsigned int);
diff --git a/src/macro.c b/src/macro.c
index 4d16f76..00d4e17 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -912,6 +912,37 @@ arg_type (macro_arguments *argv, unsigned int arg)
   return type;
 }
 
+/* Given a CHAIN, convert its links into text on OBS.  If FLATTEN,
+   builtins are ignored.  */
+static void
+collect_chain (struct obstack *obs, token_chain *chain, bool flatten)
+{
+  while (chain)
+    {
+      switch (chain->type)
+       {
+       case CHAIN_STR:
+         obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
+         break;
+       case CHAIN_FUNC:
+         if (flatten)
+           break;
+         assert (!"flatten_chain");
+         abort ();
+       case CHAIN_ARGV:
+         assert (!chain->u.u_a.has_func || flatten);
+         arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
+                    quote_cache (NULL, chain->quote_age, chain->u.u_a.quotes),
+                    flatten || chain->u.u_a.flatten, NULL, NULL, NULL, false);
+         break;
+       default:
+         assert (!"flatten_chain");
+         abort ();
+       }
+      chain = chain->next;
+    }
+}
+
 /* Given ARGV, return the text at argument ARG.  Abort if the argument
    is not text.  Arg 0 is always text, and indices beyond argc return
    the empty string.  If FLATTEN, builtins are ignored.  The result is
@@ -939,32 +970,7 @@ arg_text (macro_arguments *argv, unsigned int arg, bool 
flatten)
     case TOKEN_COMP:
       chain = token->u.u_c.chain;
       obs = arg_scratch ();
-      while (chain)
-       {
-         switch (chain->type)
-           {
-           case CHAIN_STR:
-             obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
-             break;
-           case CHAIN_FUNC:
-             if (flatten)
-               break;
-             assert (!"arg_text");
-             abort ();
-           case CHAIN_ARGV:
-             assert (!chain->u.u_a.has_func || flatten || argv->flatten);
-             arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
-                        quote_cache (NULL, chain->quote_age,
-                                     chain->u.u_a.quotes),
-                        flatten || argv->flatten || chain->u.u_a.flatten,
-                        NULL, NULL, NULL, false);
-             break;
-           default:
-             assert (!"arg_text");
-             abort ();
-           }
-         chain = chain->next;
-       }
+      collect_chain (obs, chain, flatten || argv->flatten);
       obstack_1grow (obs, '\0');
       return (char *) obstack_finish (obs);
     case TOKEN_FUNC:
@@ -1124,6 +1130,37 @@ arg_equal (macro_arguments *argv, unsigned int indexa, 
unsigned int indexb)
   return ca == cb;
 }
 
+/* Given index ARG within ARGV, if the argument is determined to be
+   appending text onto the existing definition DEFN of length LEN,
+   then return only the text occurring after LEN.  Otherwise, return
+   NULL.  This is useful for optimizing the builtin define, making
+   appending O(n) rather than O(n^2).  */
+const char *
+arg_has_prefix (macro_arguments *argv, unsigned int arg, const char *defn,
+               size_t len)
+{
+  token_data *token;
+  token_chain *chain;
+  struct obstack *obs; /* Scratch space; cleaned at end of macro_expand.  */
+
+  if (arg == 0 || arg >= argv->argc)
+    return NULL;
+  token = arg_token (argv, arg, NULL, false);
+  if (TOKEN_DATA_TYPE (token) != TOKEN_COMP
+      || (!argv->flatten && token->u.u_c.has_func))
+    return NULL;
+  chain = token->u.u_c.chain;
+  assert (chain);
+  if (chain->type == CHAIN_STR && chain->u.u_s.str == defn
+      && chain->u.u_s.len == len)
+    {
+      obs = arg_scratch ();
+      collect_chain (obs, chain->next, argv->flatten);
+      return (char *) obstack_finish (obs);
+    }
+  return NULL;
+}
+
 /* Given ARGV, return true if argument ARG is the empty string.  This
    gives the same result as comparing arg_len against 0, but is often
    faster.  */


hooks/post-receive
--
GNU M4 source repository




reply via email to

[Prev in Thread] Current Thread [Next in Thread]