m4-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] GNU M4 source repository branch, master, updated. cvs-readonly-148


From: Eric Blake
Subject: [SCM] GNU M4 source repository branch, master, updated. cvs-readonly-148-gaefa7ad
Date: Mon, 04 Aug 2008 00:45:52 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU M4 source repository".

http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=aefa7adecf8d255496f5d06ffa35491c86793d47

The branch, master has been updated
       via  aefa7adecf8d255496f5d06ffa35491c86793d47 (commit)
      from  0db7c1504d4bec63381e1bd6385d8a8ded8f309b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit aefa7adecf8d255496f5d06ffa35491c86793d47
Author: Eric Blake <address@hidden>
Date:   Sun Aug 3 17:51:31 2008 -0600

    Fix regression in commenting unbalanced quotes, from 2008-02-16.
    
    * m4/m4private.h (m4__token_type): Add M4_TOKEN_COMMENT.
    * m4/input.c (m4__next_token, m4_print_token): Supply new token
    type for comments.
    * m4/macro.c (expand_token): Penalize comments, as they can
    contain unbalanced quotes; latent bug since 2007-12-07, exposed by
    passing $@ references built from comments.
    (expand_argument): Adjust caller.
    * tests/others.at (Comments): Enhance test to catch it.
    * NEWS: Mention the fix.
    
    Signed-off-by: Eric Blake <address@hidden>

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog       |   13 +++++++++++++
 NEWS            |   14 +++++++-------
 m4/input.c      |   12 ++++++++----
 m4/m4private.h  |    4 ++--
 m4/macro.c      |   49 ++++++++++++++++++++++++++++++++++---------------
 tests/others.at |   30 ++++++++++++++++++++++++++++--
 6 files changed, 92 insertions(+), 30 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 55843b8..c08f6f1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2008-08-03  Eric Blake  <address@hidden>
+
+       Fix regression in commenting unbalanced quotes, from 2008-02-16.
+       * m4/m4private.h (m4__token_type): Add M4_TOKEN_COMMENT.
+       * m4/input.c (m4__next_token, m4_print_token): Supply new token
+       type for comments.
+       * m4/macro.c (expand_token): Penalize comments, as they can
+       contain unbalanced quotes; latent bug since 2007-12-07, exposed by
+       passing $@ references built from comments.
+       (expand_argument): Adjust caller.
+       * tests/others.at (Comments): Enhance test to catch it.
+       * NEWS: Mention the fix.
+
 2008-07-30  Eric Blake  <address@hidden>
 
        Fix regression in trace output, introduced 2008-05-08.
diff --git a/NEWS b/NEWS
index 9aa47db..562bc7c 100644
--- a/NEWS
+++ b/NEWS
@@ -222,13 +222,13 @@ promoted to 2.0.
    a macro.  This was most noticeable with `traceon(`traceon')', but
    would also happen in cases such as `foo(traceon(`foo'))'.
 
-** Fix regression introduced in 1.4.10b (but not present in 1.4.11) where
-   using `builtin' or `indir' to perform nested `shift' calls triggered an
-   assertion failure.
-
-** Fix regression introduced in 1.4.10b (but not present in 1.4.11) where
-   the command-line option -dV, as well as the builtin `debugmode(V)',
-   failed to enable `t' and `c' debug options.
+** Fix regressions introduced in 1.4.10b but not present in 1.4.11:
+*** Using `builtin' or `indir' to perform nested `shift' calls triggered
+    an assertion failure.
+*** The command-line option -dV, as well as the builtin `debugmode(V)',
+    failed to enable `t' and `c' debug options.
+*** Comments that contain unbalanced quotes were not rescanned correctly
+    when passed through address@hidden
 
 ** Fix the `m4wrap' builtin to accumulate wrapped text in FIFO order, as
    required by POSIX.  The manual mentions a way to restore the LIFO order
diff --git a/m4/input.c b/m4/input.c
index de4a175..71e48a5 100644
--- a/m4/input.c
+++ b/m4/input.c
@@ -1714,7 +1714,7 @@ m4__next_token (m4 *context, m4_symbol_value *token, int 
*line,
            obstack_1grow (obs_safe, ch);
          }
        type = (m4_get_discard_comments_opt (context)
-               ? M4_TOKEN_NONE : M4_TOKEN_STRING);
+               ? M4_TOKEN_NONE : M4_TOKEN_COMMENT);
       }
     else if (!m4_is_syntax_single_comments (M4SYNTAX)
             && MATCH (context, ch, context->syntax->comm.str1,
@@ -1754,7 +1754,7 @@ m4__next_token (m4 *context, m4_symbol_value *token, int 
*line,
            obstack_1grow (obs_safe, ch);
          }
        type = (m4_get_discard_comments_opt (context)
-               ? M4_TOKEN_NONE : M4_TOKEN_STRING);
+               ? M4_TOKEN_NONE : M4_TOKEN_COMMENT);
       }
     else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ACTIVE))
       {                                        /* ACTIVE CHARACTER */
@@ -1843,10 +1843,11 @@ m4__next_token (m4 *context, m4_symbol_value *token, 
int *line,
                                    m4__quote_age (M4SYNTAX));
        }
       else
-       assert (type == M4_TOKEN_STRING);
+       assert (type == M4_TOKEN_STRING || type == M4_TOKEN_COMMENT);
     }
   else
-    assert (token->type == M4_SYMBOL_COMP && type == M4_TOKEN_STRING);
+    assert (token->type == M4_SYMBOL_COMP
+           && (type == M4_TOKEN_STRING || type == M4_TOKEN_COMMENT));
   VALUE_MAX_ARGS (token) = -1;
 
 #ifdef DEBUG_INPUT
@@ -1914,6 +1915,9 @@ m4_print_token (m4 *context, const char *s, 
m4__token_type type,
     case M4_TOKEN_STRING:
       fputs ("string\t", stderr);
       break;
+    case M4_TOKEN_COMMENT:
+      fputs ("comment\t", stderr);
+      break;
     case M4_TOKEN_SPACE:
       fputs ("space\t", stderr);
       break;
diff --git a/m4/m4private.h b/m4/m4private.h
index 1a3c0c0..603af64 100644
--- a/m4/m4private.h
+++ b/m4/m4private.h
@@ -540,8 +540,8 @@ extern const m4_string_pair *m4__quote_cache 
(m4_syntax_table *,
 typedef enum {
   M4_TOKEN_EOF,                /* End of file, M4_SYMBOL_VOID.  */
   M4_TOKEN_NONE,       /* Discardable token, M4_SYMBOL_VOID.  */
-  M4_TOKEN_STRING,     /* Quoted string or comment, M4_SYMBOL_TEXT or
-                          M4_SYMBOL_COMP.  */
+  M4_TOKEN_STRING,     /* Quoted string, M4_SYMBOL_TEXT or M4_SYMBOL_COMP.  */
+  M4_TOKEN_COMMENT,    /* Comment, M4_SYMBOL_TEXT or M4_SYMBOL_COMP.  */
   M4_TOKEN_SPACE,      /* Whitespace, M4_SYMBOL_TEXT.  */
   M4_TOKEN_WORD,       /* An identifier, M4_SYMBOL_TEXT.  */
   M4_TOKEN_OPEN,       /* Argument list start, M4_SYMBOL_TEXT.  */
diff --git a/m4/macro.c b/m4/macro.c
index c638023..5653576 100644
--- a/m4/macro.c
+++ b/m4/macro.c
@@ -196,7 +196,7 @@ expand_token (m4 *context, m4_obstack *obs, m4__token_type 
type,
              m4_symbol_value *token, int line, bool first)
 {
   m4_symbol *symbol;
-  bool result;
+  bool result = false;
   const char *text = (m4_is_symbol_value_text (token)
                      ? m4_get_symbol_value_text (token) : NULL);
 
@@ -208,14 +208,21 @@ expand_token (m4 *context, m4_obstack *obs, 
m4__token_type type,
       return true;
 
     case M4_TOKEN_STRING:
-      /* Tokens and comments are safe in isolation (since quote_age
-        detects any change in delimiters).  This is also returned for
-        sequences of benign characters, such as digits.  But if other
-        text is already present, multi-character delimiters could be
-        formed by concatenation, so use a conservative heuristic.  If
-        obstack was provided, the string was already expanded into it
-        during m4__next_token.  */
+      /* Strings are safe in isolation (since quote_age detects any
+        change in delimiters), or when safe_quotes is true.  This is
+        also returned for sequences of benign characters, such as
+        digits.  When safe_quotes is false, we could technically
+        return true if we can prove that the concatenation of this
+        string to prior text does not form a multi-byte quote
+        delimiter, but that is a lot of overhead, so we give the
+        conservative answer of false.  */
       result = first || m4__safe_quotes (M4SYNTAX);
+      /* fallthru */
+    case M4_TOKEN_COMMENT:
+      /* Comments can contain unbalanced quote delimiters.  Rather
+        than search for one, we return the conservative answer of
+        false.  If obstack is provided, the string or comment was
+        already expanded into it during next_token.  */
       if (obs)
        return result;
       break;
@@ -224,15 +231,24 @@ expand_token (m4 *context, m4_obstack *obs, 
m4__token_type type,
     case M4_TOKEN_COMMA:
     case M4_TOKEN_CLOSE:
     case M4_TOKEN_SPACE:
-      /* Conservative heuristic, thanks to multi-character delimiter
-        concatenation.  */
+      /* If safe_quotes is true, then these do not form a quote
+        delimiter.  If it is false, we give the conservative answer
+        of false rather than taking time to prove that no multi-byte
+        quote delimiter is formed.  */
       result = m4__safe_quotes (M4SYNTAX);
       break;
 
     case M4_TOKEN_SIMPLE:
-      /* No guarantees here.  */
-      assert (m4_get_symbol_value_len (token) == 1);
-      result = false;
+      /* If safe_quotes is true, then all but the single-byte end
+        quote delimiter is safe in a quoted context; a single-byte
+        start delimiter will trigger M4_TOKEN_STRING instead.  If
+        safe_quotes is false, we give the conservative answer of
+        false rather than taking time to prove that no multi-byte
+        quote delimiter is formed.  */
+      result = (!m4_has_syntax (M4SYNTAX, *text, M4_SYNTAX_RQUOTE)
+               && m4__safe_quotes (M4SYNTAX));
+      if (result)
+       assert (!m4_has_syntax (M4SYNTAX, *text, M4_SYNTAX_LQUOTE));
       break;
 
     case M4_TOKEN_WORD:
@@ -255,8 +271,10 @@ expand_token (m4 *context, m4_obstack *obs, m4__token_type 
type,
                && !m4__next_token_is_open (context)))
          {
            m4_divert_text (context, obs, text, len, line);
-           /* The word just output is unquoted, but we can trust the
-              heuristics of safe_quote.  */
+           /* If safe_quotes is true, then words do not overlap with
+              quote delimiters.  If it is false, we give the
+              conservative answer of false rather than prove that no
+              multi-byte delimiters are formed.  */
            return m4__safe_quotes (M4SYNTAX);
          }
        expand_macro (context, textp, len2, symbol);
@@ -363,6 +381,7 @@ expand_argument (m4 *context, m4_obstack *obs, 
m4_symbol_value *argp,
        case M4_TOKEN_WORD:
        case M4_TOKEN_SPACE:
        case M4_TOKEN_STRING:
+       case M4_TOKEN_COMMENT:
        case M4_TOKEN_MACDEF:
          if (!expand_token (context, obs, type, &token, line, first))
            age = 0;
diff --git a/tests/others.at b/tests/others.at
index 7928b0d..a1111c6 100644
--- a/tests/others.at
+++ b/tests/others.at
@@ -51,7 +51,9 @@ This Sentence Should Be Capitalized
 ## comments ##
 ## -------- ##
 
-AT_TEST_M4([Comments],
+AT_SETUP([Comments])
+
+AT_DATA([input.m4],
 [[# An ordinary comment
 define(`foo', # A comment in a macro
 `Macro `foo' expansion')
@@ -59,7 +61,9 @@ foo
 define(`comment', `*** Macro `comment' expansion ***')
 changecom(`@', `@')
 foo
-]],
+]])
+
+AT_CHECK_M4([input.m4], [0],
 [[# An ordinary comment
 
 # A comment in a macro
@@ -70,6 +74,28 @@ Macro foo expansion
 Macro foo expansion
 ]])
 
+dnl Detect regression in 1.4.10b in regards to reparsing comments.
+AT_DATA([input.m4],
+[[define(`e', `$@')define(`q', ``$@'')define(`foo', `bar')
+q(e(`one
+',#two ' foo
+))
+changecom(`<', `>')define(`n', `$#')
+n(e(<`>, <'>))
+len(e(<`>, ,<'>))
+]])
+
+AT_CHECK_M4([input.m4], [0],
+[[
+`one
+',`#two  bar
+''
+
+1
+12
+]])
+
+AT_CLEANUP
 
 ## --------- ##
 ## countdown ##


hooks/post-receive
--
GNU M4 source repository




reply via email to

[Prev in Thread] Current Thread [Next in Thread]