m4-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Changes to m4/m4/input.c,v


From: Eric Blake
Subject: Changes to m4/m4/input.c,v
Date: Thu, 07 Sep 2006 23:53:05 +0000

CVSROOT:        /sources/m4
Module name:    m4
Changes by:     Eric Blake <ericb>      06/09/07 23:53:04

Index: m4/input.c
===================================================================
RCS file: /sources/m4/m4/m4/input.c,v
retrieving revision 1.46
retrieving revision 1.47
diff -u -b -r1.46 -r1.47
--- m4/input.c  5 Sep 2006 23:16:39 -0000       1.46
+++ m4/input.c  7 Sep 2006 23:53:04 -0000       1.47
@@ -28,42 +28,44 @@
 /*#define DEBUG_INPUT */
 
 /*
-   Unread input can be either files, that should be read (eg. included
-   files), strings, which should be rescanned (eg. macro expansion
-   text), single characters or quoted builtin definitions (as returned by
-   the builtin "defn").  Unread input are organised in a stack,
+   Unread input can be either files that should be read (eg. included
+   files), strings which should be rescanned (eg. macro expansion
+   text), single characters, or quoted builtin definitions (as returned by
+   the builtin "defn").  Unread input is organized in a stack,
    implemented with an obstack.  Each input source is described by a
    "struct input_block".  The obstack is "input_stack".  The top of the
    input stack is "isp".
 
    Each input_block has an associated struct input_funcs, that defines
-   functions for peeking, reading, unget and cleanup.  All input is done
-   through the functions pointers of the input_funcs of the top most
-   input_block.  When a input_block is exausted, its reader returns
-   CHAR_RETRY which causes the input_block to be popped from the
-   input_stack.
-
-   The macro "m4wrap" places the text to be saved on another input stack,
-   on the obstack "wrapup_stack", whose top is "wsp".  When EOF is seen
-   on normal input (eg, when "input_stack" is empty), input is switched
-   over to "wrapup_stack".  To make this easier, all references to the
-   current input stack, whether it be "input_stack" or "wrapup_stack",
-   are done through a pointer "current_input", which points to either
-   "input_stack" or "wrapup_stack".
+   functions for peeking, reading, unget and cleanup.  All input is
+   done through the function pointers of the input_funcs of the top
+   most input_block, and all characters are unsigned.  When a
+   input_block is exausted, its reader returns CHAR_RETRY which causes
+   the input_block to be popped from the input_stack.
+
+   The macro "m4wrap" places the text to be saved on another input
+   stack, on the obstack "wrapup_stack", whose top is "wsp".  When EOF
+   is seen on normal input (eg, when "current_input" is empty), input is
+   switched over to "wrapup_stack", and the original "current_input" is
+   freed.  A new stack is allocated for "wrapup_stack", which will
+   accept any text produced by calls to "m4wrap" from within the
+   wrapped text.  This process of shuffling "wrapup_stack" to
+   "current_input" can continue indefinitely, even generating infinite
+   loops (e.g. "define(`f',`m4wrap(`f')')f"), without memory leaks.
 
    Pushing new input on the input stack is done by m4_push_file (),
    m4_push_string (), m4_push_single () or m4_push_wrapup () (for wrapup
    text), and m4_push_builtin () (for builtin definitions).  Because
    macro expansion needs direct access to the current input obstack (for
-   optimisation), m4_push_string () are split in two functions,
+   optimization), m4_push_string () is split in two functions,
    push_string_init (), which returns a pointer to the current input
-   stack, and push_string_finish (), which return a pointer to the final
+   stack, and push_string_finish (), which returns a pointer to the final
    text.  The input_block *next is used to manage the coordination
    between the different push routines.
 
-   The current file and line number are stored in two global variables,
+   The current file and line number are stored in the context,
    for use by the error handling functions in m4.c.  Whenever a file
-   input_block is pushed, the current file name and line number is saved
+   input_block is pushed, the current file name and line number are saved
    in the input_block, and the two variables are reset to match the new
    input file.  */
 
@@ -74,8 +76,10 @@
 static void  init_builtin_token        (m4 *context, m4_symbol_value *token);
 static int   builtin_peek              (void);
 static int   builtin_read              (m4 *);
-static int   match_input               (m4 *context, const unsigned char *s);
+static bool  match_input               (m4 *context, const unsigned char *s,
+                                        bool);
 static int   next_char                 (m4 *context);
+static int   peek_char                 (m4 *context);
 static void  pop_input                 (m4 *context);
 static int   single_peek               (void);
 static int   single_read               (m4 *);
@@ -113,20 +117,22 @@
       struct
        {
          FILE *file;           /* input file handle */
+         bool end;             /* true iff peek returned EOF */
+         bool close;           /* true if file should be closed on EOF */
          const char *name;     /* name of PREVIOUS input file */
-         int lineno;           /* current line number for do */
-         /* Yet another attack of "The curse of global variables" (sigh) */
-         int out_lineno;       /* current output line number do */
+         int lineno;           /* current line of previous file */
+         int out_lineno;       /* current output line of previous file */
          bool advance_line;    /* start_of_input_line from next_char () */
        }
       u_f;
       struct
        {
-         m4_builtin_func *func;  /* pointer to builtins function. */
+         m4_builtin_func *func;  /* pointer to builtin's function. */
          lt_dlhandle handle;     /* originating module. */
          int flags;              /* flags associated with the builtin. */
          m4_hash *arg_signature; /* argument signature for builtin.  */
-         int min_args, max_args; /* argv maxima and minima for the builtin. */
+         unsigned int min_args;  /* argv minima for the builtin. */
+         unsigned int max_args;  /* argv maxima for the builtin. */
          bool traced;    /* true iff builtin is traced. */
          bool read;              /* true iff block has been read. */
        }
@@ -141,13 +147,14 @@
 /* Obstack for storing individual tokens.  */
 static m4_obstack token_stack;
 
-/* Normal input stack.  */
-static m4_obstack input_stack;
+/* Wrapup input stack.
 
-/* Wrapup input stack.  */
-static m4_obstack wrapup_stack;
+   FIXME - m4wrap should be FIFO, which implies a queue, not a stack.
+   While fixing this, m4wrap should also remember what the current
+   file and line are for each chunk of wrapped text.  */
+static m4_obstack *wrapup_stack;
 
-/* Input or wrapup.  */
+/* Current stack, from input or wrapup.  */
 static m4_obstack *current_input;
 
 /* Bottom of token_stack, for obstack_free.  */
@@ -168,15 +175,6 @@
 
 
 
-/* m4_push_file () pushes an input file on the input stack, saving the
-  current file name and line number.  If next is non-NULL, this push
-  invalidates a call to m4_push_string_init (), whose storage are
-  consequentely released.
-
-  file_read () manages line numbers for error messages, so they do not
-  get wrong, due to lookahead.  The token consisting of a newline
-  alone is taken as belonging to the line it ends, and the current
-  line number is not incremented until the next character is read.  */
 static int
 file_peek (void)
 {
@@ -184,7 +182,10 @@
 
   ch = getc (isp->u.u_f.file);
   if (ch == EOF)
+    {
+      isp->u.u_f.end = true;
     return CHAR_RETRY;
+    }
 
   ungetc (ch, isp->u.u_f.file);
   return ch;
@@ -201,7 +202,9 @@
       m4_set_current_line (context, m4_get_current_line (context) + 1);
     }
 
-  ch = getc (isp->u.u_f.file);
+  /* If stdin is a terminal, calling getc after peek_input already
+     called it would make the user have to hit ^D twice to quit.  */
+  ch = isp->u.u_f.end ? EOF : getc (isp->u.u_f.file);
   if (ch == EOF)
     return CHAR_RETRY;
 
@@ -211,10 +214,10 @@
 }
 
 static void
-file_unget (ch)
-     int ch;
+file_unget (int ch)
 {
   ungetc (ch, isp->u.u_f.file);
+  isp->u.u_f.end = false;
   if (ch == '\n')
     start_of_input_line = false;
 }
@@ -229,7 +232,15 @@
   else
     m4_debug_message (context, M4_DEBUG_TRACE_INPUT, _("input exhausted"));
 
+  if (ferror (isp->u.u_f.file))
+    {
+      m4_error (context, 0, 0, _("error reading file `%s'"),
+               m4_get_current_file (context));
   fclose (isp->u.u_f.file);
+    }
+  else if (isp->u.u_f.close && fclose (isp->u.u_f.file) == EOF)
+    m4_error (context, 0, errno, _("error reading file `%s'"),
+             m4_get_current_file (context));
   m4_set_current_file (context, isp->u.u_f.name);
   m4_set_current_line (context, isp->u.u_f.lineno);
   m4_output_current_line = isp->u.u_f.out_lineno;
@@ -242,8 +253,18 @@
   file_peek, file_read, file_unget, file_clean
 };
 
+/* m4_push_file () pushes an input file FP with name TITLE on the
+  input stack, saving the current file name and line number.  If next
+  is non-NULL, this push invalidates a call to m4_push_string_init (),
+  whose storage is consequently released.  If CLOSE, then close FP at
+  end of file.
+
+  file_read () manages line numbers for error messages, so they do not
+  get wrong due to lookahead.  The token consisting of a newline
+  alone is taken as belonging to the line it ends, and the current
+  line number is not incremented until the next character is read.  */
 void
-m4_push_file (m4 *context, FILE *fp, const char *title)
+m4_push_file (m4 *context, FILE *fp, const char *title, bool close)
 {
   input_block *i;
 
@@ -261,6 +282,7 @@
   i->funcs = &file_funcs;
 
   i->u.u_f.file = fp;
+  i->u.u_f.end = false;
   i->u.u_f.name = m4_get_current_file (context);
   i->u.u_f.lineno = m4_get_current_line (context);
   i->u.u_f.out_lineno = m4_output_current_line;
@@ -275,9 +297,6 @@
   isp = i;
 }
 
-/* m4_push_builtin () pushes a builtins definition on the input stack.  If
-   next is non-NULL, this push invalidates a call to m4_push_string_init (),
-   whose storage are consequentely released.  */
 static int
 builtin_peek (void)
 {
@@ -301,6 +320,10 @@
   builtin_peek, builtin_read, NULL, NULL
 };
 
+/* m4_push_builtin () pushes TOKEN, which contains a builtin's
+   definition, on the input stack.  If next is non-NULL, this push
+   invalidates a call to m4_push_string_init (), whose storage is
+   consequently released.  */
 void
 m4_push_builtin (m4_symbol_value *token)
 {
@@ -331,7 +354,6 @@
   isp = i;
 }
 
-/* Push a single character on to the input stack.  */
 static int
 single_peek (void)
 {
@@ -353,6 +375,7 @@
   single_peek, single_read, NULL, NULL
 };
 
+/* Push a single character CH on to the input stack.  */
 void
 m4_push_single (int ch)
 {
@@ -375,12 +398,10 @@
   isp = i;
 }
 
-/* First half of m4_push_string ().  The pointer next points to the new
-   input_block.  */
 static int
 string_peek (void)
 {
-  int ch = *isp->u.u_s.current;
+  int ch = (unsigned char) *isp->u.u_s.current;
 
   return (ch == '\0') ? CHAR_RETRY : ch;
 }
@@ -388,7 +409,7 @@
 static int
 string_read (m4 *context M4_GNUC_UNUSED)
 {
-  int ch = *isp->u.u_s.current++;
+  int ch = (unsigned char) *isp->u.u_s.current++;
 
   return (ch == '\0') ? CHAR_RETRY : ch;
 
@@ -400,13 +421,15 @@
   if (isp->u.u_s.current > isp->u.u_s.start)
     *--isp->u.u_s.current = ch;
   else
-    m4_push_single(ch);
+    m4_push_single (ch);
 }
 
 static struct input_funcs string_funcs = {
   string_peek, string_read, string_unget, NULL
 };
 
+/* First half of m4_push_string ().  The pointer next points to the new
+   input_block.  */
 m4_obstack *
 m4_push_string_init (m4 *context)
 {
@@ -459,17 +482,19 @@
    the input stack, and m4_push_string () and m4_push_file () will
    operate on wrapup_stack.  M4_push_wrapup should be done as
    m4_push_string (), but this will suffice, as long as arguments to
-   m4_m4wrap () are moderate in size.  */
+   m4_m4wrap () are moderate in size.
+
+   FIXME - we should allow pushing builtins as well as text.  */
 void
 m4_push_wrapup (const char *s)
 {
-  input_block *i = (input_block *) obstack_alloc (&wrapup_stack,
+  input_block *i = (input_block *) obstack_alloc (wrapup_stack,
                                                  sizeof (struct input_block));
   i->prev = wsp;
 
   i->funcs = &string_funcs;
 
-  i->u.u_s.start = obstack_copy0 (&wrapup_stack, s, strlen (s));
+  i->u.u_s.start = obstack_copy0 (wrapup_stack, s, strlen (s));
   i->u.u_s.current = i->u.u_s.start;
 
   wsp = i;
@@ -478,7 +503,7 @@
 
 /* The function pop_input () pops one level of input sources.  If the
    popped input_block is a file, current_file and current_line are
-   reset to the saved values before the memory for the input_block are
+   reset to the saved values before the memory for the input_block is
    released.  */
 static void
 pop_input (m4 *context)
@@ -488,22 +513,37 @@
   if (isp->funcs->clean_func != NULL)
     (*isp->funcs->clean_func) (context);
 
+  if (tmp != NULL)
+    {
   obstack_free (current_input, isp);
   next = NULL;                 /* might be set in m4_push_string_init () */
+    }
 
   isp = tmp;
 }
 
-/* To switch input over to the wrapup stack, main () calls pop_wrapup
+/* To switch input over to the wrapup stack, main () calls pop_wrapup.
    Since wrapup text can install new wrapup text, pop_wrapup () returns
    false when there is no wrapup text on the stack, and true otherwise.  */
 bool
 m4_pop_wrapup (void)
 {
+  next = NULL;
+  obstack_free (current_input, NULL);
+  free (current_input);
+
   if (wsp == NULL)
+    {
+      obstack_free (wrapup_stack, NULL);
+      current_input = NULL;
+      DELETE (wrapup_stack);
     return false;
+    }
+
+  current_input = wrapup_stack;
+  wrapup_stack = (m4_obstack *) xmalloc (sizeof (m4_obstack));
+  obstack_init (wrapup_stack);
 
-  current_input = &wrapup_stack;
   isp = wsp;
   wsp = NULL;
 
@@ -524,7 +564,7 @@
   m4_set_symbol_value_func (token, isp->u.u_b.func);
   VALUE_HANDLE (token)         = isp->u.u_b.handle;
   VALUE_FLAGS (token)          = isp->u.u_b.flags;
-  VALUE_ARG_SIGNATURE(token)   = isp->u.u_b.arg_signature;
+  VALUE_ARG_SIGNATURE (token)  = isp->u.u_b.arg_signature;
   VALUE_MIN_ARGS (token)       = isp->u.u_b.min_args;
   VALUE_MAX_ARGS (token)       = isp->u.u_b.max_args;
 }
@@ -549,7 +589,7 @@
        {
          while ((ch = f (context)) != CHAR_RETRY)
            {
-             /* if (!IS_IGNORE(ch)) */
+             /* if (!IS_IGNORE (ch)) */
                return ch;
            }
        }
@@ -564,11 +604,11 @@
     }
 }
 
-/* The function m4_peek_input () is used to look at the next character in
+/* The function peek_char () is used to look at the next character in
    the input stream.  At any given time, it reads from the input_block
    on the top of the current input stack.  */
-int
-m4_peek_input (m4 *context)
+static int
+peek_char (m4 *context)
 {
   int ch;
   int (*f) (void);
@@ -581,97 +621,121 @@
       f = isp->funcs->peek_func;
       if (f != NULL)
        {
-         if ((ch = (*f)()) != CHAR_RETRY)
+         if ((ch = f ()) != CHAR_RETRY)
            {
-             return /* (IS_IGNORE(ch)) ? next_char () : */ ch;
+             return /* (IS_IGNORE (ch)) ? next_char () : */ ch;
            }
        }
       else
        {
-         assert (!"INTERNAL ERROR: input stack botch in m4_peek_input ()");
+         assert (!"INTERNAL ERROR: input stack botch in peek_char ()");
          abort ();
        }
 
-      /* End of input source --- pop one level.  */
+      /* End of current input source --- pop one level if another
+        level of input still exists.  */
+      if (isp->prev != NULL)
       pop_input (context);
+      else
+       return CHAR_EOF;
     }
 }
 
 /* The function unget_input () puts back a character on the input
-   stack, using an existing input_block if possible.  */
+   stack, using an existing input_block if possible.  This is not safe
+   to call more than once without an intervening next_char.  */
 static void
 unget_input (int ch)
 {
   if (isp != NULL && isp->funcs->unget_func != NULL)
-    (*isp->funcs->unget_func)(ch);
+    isp->funcs->unget_func (ch);
   else
-    m4_push_single(ch);
+    m4_push_single (ch);
 }
 
-/* skip_line () simply discards all immediately following characters, upto
+/* skip_line () simply discards all immediately following characters, up to
    the first newline.  It is only used from m4_dnl ().  */
 void
 m4_skip_line (m4 *context)
 {
   int ch;
+  const char *file = m4_get_current_file (context);
+  int line = m4_get_current_line (context);
 
   while ((ch = next_char (context)) != CHAR_EOF && ch != '\n')
     ;
+  if (ch == CHAR_EOF)
+    /* current_file changed; use the previous value we cached.  */
+    m4_warn_at_line (context, 0, file, line,
+                    _("end of file treated as newline"));
 }
 
 
 
 /* This function is for matching a string against a prefix of the
-   input stream.  If the string matches the input, the input is
-   discarded, otherwise the characters read are pushed back again.
-   The function is used only when multicharacter quotes or comment
-   delimiters are used.
+   input stream.  If the string S matches the input and CONSUME is
+   true, the input is discarded; otherwise any characters read are
+   pushed back again.  The function is used only when multicharacter
+   quotes or comment delimiters are used.
 
    All strings herein should be unsigned.  Otherwise sign-extension
    of individual chars might break quotes with 8-bit chars in it.  */
-static int
-match_input (m4 *context, const unsigned char *s)
+static bool
+match_input (m4 *context, const unsigned char *s, bool consume)
 {
   int n;                       /* number of characters matched */
   int ch;                      /* input character */
   const unsigned char *t;
   m4_obstack *st;
+  bool result = false;
 
-  ch = m4_peek_input (context);
+  ch = peek_char (context);
   if (ch != *s)
-    return 0;                  /* fail */
-  (void) next_char (context);
+    return false;                      /* fail */
 
   if (s[1] == '\0')
-    return 1;                  /* short match */
+    {
+      if (consume)
+       next_char (context);
+      return true;                     /* short match */
+    }
 
-  for (n = 1, t = s++; (ch = m4_peek_input (context)) == *s++; n++)
+  next_char (context);
+  for (n = 1, t = s++; (ch = peek_char (context)) == *s++; )
     {
-      (void) next_char (context);
+      next_char (context);
+      n++;
       if (*s == '\0')          /* long match */
-       return 1;
+       {
+         if (consume)
+           return true;
+         result = true;
+         break;
+       }
     }
 
-  /* Failed, push back input.  */
+  /* Failed or shouldn't consume, push back input.  */
   st = m4_push_string_init (context);
   obstack_grow (st, t, n);
   m4_push_string_finish ();
-  return 0;
+  return result;
 }
 
-/* The macro MATCH() is used to match a string against the input.  The
-  first character is handled inline, for speed.  Hopefully, this will not
-  hurt efficiency too much when single character quotes and comment
-  delimiters are used.  */
-#define MATCH(C, ch, s) \
+/* The macro MATCH() is used to match an unsigned char string S
+  against the input.  The first character is handled inline, for
+  speed.  Hopefully, this will not hurt efficiency too much when
+  single character quotes and comment delimiters are used.  If
+  CONSUME, then CH is the result of next_char, and a successful match
+  will discard the matched string.  Otherwise, CH is the result of
+  peek_char, and the input stream is effectively unchanged.  */
+#define MATCH(C, ch, s, consume)                                       \
   ((s)[0] == (ch) \
    && (ch) != '\0' \
-   && ((s)[1] == '\0' \
-       || (match_input ((C), (s) + 1) ? (ch) = m4_peek_input (C), 1 : 0)))
+   && ((s)[1] == '\0' || (match_input (C, (s) + (consume), consume))))
 
 
 
-/* Inititialise input stacks, and quote/comment characters.  */
+/* Inititialize input stacks, and quote/comment characters.  */
 void
 m4_input_init (m4 *context)
 {
@@ -681,10 +745,11 @@
   m4_set_current_line (context, 0);
 
   obstack_init (&token_stack);
-  obstack_init (&input_stack);
-  obstack_init (&wrapup_stack);
 
-  current_input = &input_stack;
+  current_input = (m4_obstack *) xmalloc (sizeof (m4_obstack));
+  obstack_init (current_input);
+  wrapup_stack = (m4_obstack *) xmalloc (sizeof (m4_obstack));
+  obstack_init (wrapup_stack);
 
   obstack_1grow (&token_stack, '\0');
   token_bottom = obstack_finish (&token_stack);
@@ -699,9 +764,8 @@
 void
 m4_input_exit (void)
 {
-  obstack_free (&wrapup_stack, NULL);
-  obstack_free (&input_stack, NULL);
-  obstack_free (&token_stack, NULL);
+  assert (current_input == NULL);
+  assert (wrapup_stack == NULL);
 }
 
 
@@ -713,9 +777,9 @@
    that is not a part of any of the previous types.
 
    M4__next_token () returns the token type, and passes back a pointer to
-   the token data through VALUE.  The token text is collected on the obstack
+   the token data through TOKEN.  The token text is collected on the obstack
    token_stack, which never contains more than one token text at a time.
-   The storage pointed to by the fields in VALUE is therefore subject to
+   The storage pointed to by the fields in TOKEN is therefore subject to
    change the next time m4__next_token () is called.  */
 m4__token_type
 m4__next_token (m4 *context, m4_symbol_value *token)
@@ -725,30 +789,39 @@
   m4__token_type type;
 
   do {
+    const char *file = m4_get_current_file (context);
+    int line = m4_get_current_line (context);
+
     obstack_free (&token_stack, token_bottom);
     obstack_1grow (&token_stack, '\0');
     token_bottom = obstack_finish (&token_stack);
 
-    ch = m4_peek_input (context);
+    /* Must consume an input character, but not until CHAR_BUILTIN is
+       handled.  */
+    ch = peek_char (context);
     if (ch == CHAR_EOF)                        /* EOF */
       {
 #ifdef DEBUG_INPUT
        fprintf (stderr, "next_token -> EOF\n");
 #endif
+       next_char (context);
        return M4_TOKEN_EOF;
       }
 
     if (ch == CHAR_BUILTIN)            /* BUILTIN TOKEN */
       {
        init_builtin_token (context, token);
-       (void) next_char (context);
+       next_char (context);
 #ifdef DEBUG_INPUT
        m4_print_token ("next_token", M4_TOKEN_MACDEF, token);
 #endif
        return M4_TOKEN_MACDEF;
       }
 
-    (void) next_char (context);
+    next_char (context); /* Consume character we already peeked at.  */
+    /* FIXME - other implementations, such as Solaris, parse macro
+       names, then quotes, then comments.  We should probably
+       rearrange this to match.  */
     if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_BCOMM))
       {                                        /* COMMENT, SHORT DELIM */
        obstack_1grow (&token_stack, ch);
@@ -757,22 +830,28 @@
          obstack_1grow (&token_stack, ch);
        if (ch != CHAR_EOF)
          obstack_1grow (&token_stack, ch);
-       type = m4_get_discard_comments_opt (context)
-               ? M4_TOKEN_NONE : M4_TOKEN_STRING;
+       else
+         m4_error_at_line (context, EXIT_FAILURE, 0, file, line,
+                           _("end of file in comment"));
+       type = (m4_get_discard_comments_opt (context)
+               ? M4_TOKEN_NONE : M4_TOKEN_STRING);
       }
     else if (!m4_is_syntax_single_comments (M4SYNTAX)
-            && MATCH (context, ch, context->syntax->bcomm.string))
+            && MATCH (context, ch, context->syntax->bcomm.string, true))
       {                                        /* COMMENT, LONGER DELIM */
        obstack_grow (&token_stack, context->syntax->bcomm.string,
                      context->syntax->bcomm.length);
        while ((ch = next_char (context)) != CHAR_EOF
-              && !MATCH (context, ch, context->syntax->ecomm.string))
+              && !MATCH (context, ch, context->syntax->ecomm.string, true))
          obstack_1grow (&token_stack, ch);
        if (ch != CHAR_EOF)
          obstack_grow (&token_stack, context->syntax->ecomm.string,
                        context->syntax->ecomm.length);
-       type = m4_get_discard_comments_opt (context)
-               ? M4_TOKEN_NONE : M4_TOKEN_STRING;
+       else
+         m4_error_at_line (context, EXIT_FAILURE, 0, file, line,
+                           _("end of file in comment"));
+       type = (m4_get_discard_comments_opt (context)
+               ? M4_TOKEN_NONE : M4_TOKEN_STRING);
       }
     else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ESCAPE))
       {                                        /* ESCAPED WORD */
@@ -790,7 +869,7 @@
                  }
 
                if (ch != CHAR_EOF)
-                 unget_input(ch);
+                 unget_input (ch);
              }
            else
              {
@@ -814,22 +893,19 @@
            obstack_1grow (&token_stack, ch);
          }
        if (ch != CHAR_EOF)
-         unget_input(ch);
+         unget_input (ch);
 
-       type = m4_is_syntax_macro_escaped (M4SYNTAX)
-               ? M4_TOKEN_STRING : M4_TOKEN_WORD;
+       type = (m4_is_syntax_macro_escaped (M4SYNTAX)
+               ? M4_TOKEN_STRING : M4_TOKEN_WORD);
       }
     else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_LQUOTE))
       {                                        /* QUOTED STRING, SINGLE QUOTES 
*/
-       const char *current_file = m4_get_current_file (context);
-       int current_line = m4_get_current_line (context);
        quote_level = 1;
        while (1)
          {
            ch = next_char (context);
            if (ch == CHAR_EOF)
-             error_at_line (EXIT_FAILURE, 0,
-                             current_file, current_line,
+             m4_error_at_line (context, EXIT_FAILURE, 0, file, line,
                              _("end of file in string"));
 
            if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_RQUOTE))
@@ -849,26 +925,23 @@
        type = M4_TOKEN_STRING;
       }
     else if (!m4_is_syntax_single_quotes (M4SYNTAX)
-            && MATCH (context, ch, context->syntax->lquote.string))
+            && MATCH (context, ch, context->syntax->lquote.string, true))
       {                                        /* QUOTED STRING, LONGER QUOTES 
*/
-       const char *current_file = m4_get_current_file (context);
-       int current_line = m4_get_current_line (context);
        quote_level = 1;
        while (1)
          {
            ch = next_char (context);
            if (ch == CHAR_EOF)
-             error_at_line (EXIT_FAILURE, 0,
-                             current_file, current_line,
+             m4_error_at_line (context, EXIT_FAILURE, 0, file, line,
                              _("end of file in string"));
-           if (MATCH (context, ch, context->syntax->rquote.string))
+           if (MATCH (context, ch, context->syntax->rquote.string, true))
              {
                if (--quote_level == 0)
                  break;
                obstack_grow (&token_stack, context->syntax->rquote.string,
                              context->syntax->rquote.length);
              }
-           else if (MATCH (context, ch, context->syntax->lquote.string))
+           else if (MATCH (context, ch, context->syntax->lquote.string, true))
              {
                quote_level++;
                obstack_grow (&token_stack, context->syntax->lquote.string,
@@ -879,6 +952,26 @@
          }
        type = M4_TOKEN_STRING;
       }
+    else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ACTIVE))
+      {                                        /* ACTIVE CHARACTER */
+       obstack_1grow (&token_stack, ch);
+       type = M4_TOKEN_WORD;
+      }
+    else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_OPEN))
+      {                                        /* OPEN PARENTHESIS */
+       obstack_1grow (&token_stack, ch);
+       type = M4_TOKEN_OPEN;
+      }
+    else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_COMMA))
+      {                                        /* COMMA */
+       obstack_1grow (&token_stack, ch);
+       type = M4_TOKEN_COMMA;
+      }
+    else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_CLOSE))
+      {                                        /* CLOSE PARENTHESIS */
+       obstack_1grow (&token_stack, ch);
+       type = M4_TOKEN_CLOSE;
+      }
     else if (m4_is_syntax_single_quotes (M4SYNTAX)
             && m4_is_syntax_single_comments (M4SYNTAX))
       {                        /* EVERYTHING ELSE (SHORT QUOTES AND COMMENTS) 
*/
@@ -888,7 +981,7 @@
            || m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_NUM)
            || m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_DOLLAR))
          {
-           while (((ch = next_char(context)) != CHAR_EOF)
+           while (((ch = next_char (context)) != CHAR_EOF)
                   && (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_OTHER)
                       || m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_NUM)
                       || m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_DOLLAR)))
@@ -897,7 +990,7 @@
              }
 
            if (ch != CHAR_EOF)
-             unget_input(ch);
+             unget_input (ch);
            type = M4_TOKEN_STRING;
          }
        else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_SPACE))
@@ -909,12 +1002,10 @@
                  obstack_1grow (&token_stack, ch);
 
                if (ch != CHAR_EOF)
-                 unget_input(ch);
+                 unget_input (ch);
              }
            type = M4_TOKEN_SPACE;
          }
-       else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ACTIVE))
-         type = M4_TOKEN_WORD;
        else
          type = M4_TOKEN_SIMPLE;
       }
@@ -928,8 +1019,6 @@
          type = M4_TOKEN_STRING;
        else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_SPACE))
          type = M4_TOKEN_SPACE;
-       else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ACTIVE))
-         type = M4_TOKEN_WORD;
        else
          type = M4_TOKEN_SIMPLE;
       }
@@ -943,12 +1032,125 @@
   VALUE_MAX_ARGS (token)       = -1;
 
 #ifdef DEBUG_INPUT
-  m4_print_token("next_token", type, token);
+  m4_print_token ("next_token", type, token);
 #endif
 
   return type;
 }
 
+/* Peek and return the type of the next single token from the input
+   stream.  When peeking to see if changequote (or friends) are
+   followed by an open parentheses, it is possible that the token type
+   we peek at now will change by the time we parse it with
+   next_token.  */
+m4__token_type
+m4__peek_token (m4 *context)
+{
+  int ch = peek_char (context);
+
+  if (ch == CHAR_EOF)
+    {
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> EOF\n");
+#endif
+      return M4_TOKEN_EOF;
+    }
+  if (ch == CHAR_BUILTIN)
+    {
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> BUILTIN\n");
+#endif
+      return M4_TOKEN_MACDEF;
+    }
+  if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_BCOMM)
+      || (!m4_is_syntax_single_comments (M4SYNTAX)
+         && MATCH (context, ch, context->syntax->bcomm.string, false)))
+    {
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> COMMENT\n");
+#endif
+      return M4_TOKEN_STRING;
+    }
+  if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ESCAPE))
+    {
+      int c;
+      next_char (context);
+      c = peek_char (context);
+      unget_input (ch);
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> %s\n",
+              c == CHAR_EOF ? "SIMPLE" : "ESCAPE_WORD");
+#endif
+      return c == CHAR_EOF ? M4_TOKEN_SIMPLE : M4_TOKEN_WORD;
+    }
+  if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ALPHA))
+    {
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> %s\n",
+              m4_is_syntax_macro_escaped (M4SYNTAX) ? "STRING" : "WORD");
+#endif
+      return (m4_is_syntax_macro_escaped (M4SYNTAX)
+             ? M4_TOKEN_STRING : M4_TOKEN_WORD);
+    }
+  if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_LQUOTE)
+      || (!m4_is_syntax_single_quotes (M4SYNTAX)
+         && MATCH (context, ch, context->syntax->lquote.string, false)))
+    {
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> QUOTE\n");
+#endif
+      return M4_TOKEN_STRING;
+    }
+  if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_ACTIVE))
+    {
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> ACTIVE\n");
+#endif
+      return M4_TOKEN_WORD;
+    }
+  if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_OPEN))
+    {
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> OPEN\n");
+#endif
+      return M4_TOKEN_OPEN;
+    }
+  if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_COMMA))
+    {
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> COMMA\n");
+#endif
+      return M4_TOKEN_COMMA;
+    }
+  if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_CLOSE))
+    {
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> CLOSE\n");
+#endif
+      return M4_TOKEN_CLOSE;
+    }
+  if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_OTHER)
+      || m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_NUM)
+      || m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_DOLLAR))
+    {
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> STRING\n");
+#endif
+      return M4_TOKEN_STRING;
+    }
+  if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_SPACE))
+    {
+#ifdef DEBUG_INPUT
+      fprintf (stderr, "peek_token -> SPACE\n");
+#endif
+      return M4_TOKEN_SPACE;
+    }
+#ifdef DEBUG_INPUT
+  fprintf (stderr, "peek_token -> SIMPLE\n");
+#endif
+  return M4_TOKEN_SIMPLE;
+}
+
 
 
 #ifdef DEBUG_INPUT




reply via email to

[Prev in Thread] Current Thread [Next in Thread]