[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Changes to m4/m4/input.c,v
From: |
Eric Blake |
Subject: |
Changes to m4/m4/input.c,v |
Date: |
Thu, 07 Sep 2006 23:53:05 +0000 |
CVSROOT: /sources/m4
Module name: m4
Changes by: Eric Blake <ericb> 06/09/07 23:53:04
Index: m4/input.c
===================================================================
RCS file: /sources/m4/m4/m4/input.c,v
retrieving revision 1.46
retrieving revision 1.47
diff -u -b -r1.46 -r1.47
--- m4/input.c 5 Sep 2006 23:16:39 -0000 1.46
+++ m4/input.c 7 Sep 2006 23:53:04 -0000 1.47
@@ -28,42 +28,44 @@
/*#define DEBUG_INPUT */
/*
- Unread input can be either files, that should be read (eg. included
- files), strings, which should be rescanned (eg. macro expansion
- text), single characters or quoted builtin definitions (as returned by
- the builtin "defn"). Unread input are organised in a stack,
+ Unread input can be either files that should be read (eg. included
+ files), strings which should be rescanned (eg. macro expansion
+ text), single characters, or quoted builtin definitions (as returned by
+ the builtin "defn"). Unread input is organized in a stack,
implemented with an obstack. Each input source is described by a
"struct input_block". The obstack is "input_stack". The top of the
input stack is "isp".
Each input_block has an associated struct input_funcs, that defines
- functions for peeking, reading, unget and cleanup. All input is done
- through the functions pointers of the input_funcs of the top most
- input_block. When a input_block is exausted, its reader returns
- CHAR_RETRY which causes the input_block to be popped from the
- input_stack.
-
- The macro "m4wrap" places the text to be saved on another input stack,
- on the obstack "wrapup_stack", whose top is "wsp". When EOF is seen
- on normal input (eg, when "input_stack" is empty), input is switched
- over to "wrapup_stack". To make this easier, all references to the
- current input stack, whether it be "input_stack" or "wrapup_stack",
- are done through a pointer "current_input", which points to either
- "input_stack" or "wrapup_stack".
+ functions for peeking, reading, unget and cleanup. All input is
+ done through the function pointers of the input_funcs of the top
+ most input_block, and all characters are unsigned. When a
+ input_block is exausted, its reader returns CHAR_RETRY which causes
+ the input_block to be popped from the input_stack.
+
+ The macro "m4wrap" places the text to be saved on another input
+ stack, on the obstack "wrapup_stack", whose top is "wsp". When EOF
+ is seen on normal input (eg, when "current_input" is empty), input is
+ switched over to "wrapup_stack", and the original "current_input" is
+ freed. A new stack is allocated for "wrapup_stack", which will
+ accept any text produced by calls to "m4wrap" from within the
+ wrapped text. This process of shuffling "wrapup_stack" to
+ "current_input" can continue indefinitely, even generating infinite
+ loops (e.g. "define(`f',`m4wrap(`f')')f"), without memory leaks.
Pushing new input on the input stack is done by m4_push_file (),
m4_push_string (), m4_push_single () or m4_push_wrapup () (for wrapup
text), and m4_push_builtin () (for builtin definitions). Because
macro expansion needs direct access to the current input obstack (for
- optimisation), m4_push_string () are split in two functions,
+ optimization), m4_push_string () is split in two functions,
push_string_init (), which returns a pointer to the current input
- stack, and push_string_finish (), which return a pointer to the final
+ stack, and push_string_finish (), which returns a pointer to the final
text. The input_block *next is used to manage the coordination
between the different push routines.
- The current file and line number are stored in two global variables,
+ The current file and line number are stored in the context,
for use by the error handling functions in m4.c. Whenever a file
- input_block is pushed, the current file name and line number is saved
+ input_block is pushed, the current file name and line number are saved
in the input_block, and the two variables are reset to match the new
input file. */
@@ -74,8 +76,10 @@
static void init_builtin_token (m4 *context, m4_symbol_value *token);
static int builtin_peek (void);
static int builtin_read (m4 *);
-static int match_input (m4 *context, const unsigned char *s);
+static bool match_input (m4 *context, const unsigned char *s,
+ bool);
static int next_char (m4 *context);
+static int peek_char (m4 *context);
static void pop_input (m4 *context);
static int single_peek (void);
static int single_read (m4 *);
@@ -113,20 +117,22 @@
struct
{
FILE *file; /* input file handle */
+ bool end; /* true iff peek returned EOF */
+ bool close; /* true if file should be closed on EOF */
const char *name; /* name of PREVIOUS input file */
- int lineno; /* current line number for do */
- /* Yet another attack of "The curse of global variables" (sigh) */
- int out_lineno; /* current output line number do */
+ int lineno; /* current line of previous file */
+ int out_lineno; /* current output line of previous file */
bool advance_line; /* start_of_input_line from next_char () */
}
u_f;
struct
{
- m4_builtin_func *func; /* pointer to builtins function. */
+ m4_builtin_func *func; /* pointer to builtin's function. */
lt_dlhandle handle; /* originating module. */
int flags; /* flags associated with the builtin. */
m4_hash *arg_signature; /* argument signature for builtin. */
- int min_args, max_args; /* argv maxima and minima for the builtin. */
+ unsigned int min_args; /* argv minima for the builtin. */
+ unsigned int max_args; /* argv maxima for the builtin. */
bool traced; /* true iff builtin is traced. */
bool read; /* true iff block has been read. */
}
@@ -141,13 +147,14 @@
/* Obstack for storing individual tokens. */
static m4_obstack token_stack;
-/* Normal input stack. */
-static m4_obstack input_stack;
+/* Wrapup input stack.
-/* Wrapup input stack. */
-static m4_obstack wrapup_stack;
+ FIXME - m4wrap should be FIFO, which implies a queue, not a stack.
+ While fixing this, m4wrap should also remember what the current
+ file and line are for each chunk of wrapped text. */
+static m4_obstack *wrapup_stack;
-/* Input or wrapup. */
+/* Current stack, from input or wrapup. */
static m4_obstack *current_input;
/* Bottom of token_stack, for obstack_free. */
@@ -168,15 +175,6 @@
-/* m4_push_file () pushes an input file on the input stack, saving the
- current file name and line number. If next is non-NULL, this push
- invalidates a call to m4_push_string_init (), whose storage are
- consequentely released.
-
- file_read () manages line numbers for error messages, so they do not
- get wrong, due to lookahead. The token consisting of a newline
- alone is taken as belonging to the line it ends, and the current
- line number is not incremented until the next character is read. */
static int
file_peek (void)
{
@@ -184,7 +182,10 @@
ch = getc (isp->u.u_f.file);
if (ch == EOF)
+ {
+ isp->u.u_f.end = true;
return CHAR_RETRY;
+ }
ungetc (ch, isp->u.u_f.file);
return ch;
@@ -201,7 +202,9 @@
m4_set_current_line (context, m4_get_current_line (context) + 1);
}
- ch = getc (isp->u.u_f.file);
+ /* If stdin is a terminal, calling getc after peek_input already
+ called it would make the user have to hit ^D twice to quit. */
+ ch = isp->u.u_f.end ? EOF : getc (isp->u.u_f.file);
if (ch == EOF)
return CHAR_RETRY;
@@ -211,10 +214,10 @@
}
static void
-file_unget (ch)
- int ch;
+file_unget (int ch)
{
ungetc (ch, isp->u.u_f.file);
+ isp->u.u_f.end = false;
if (ch == '\n')
start_of_input_line = false;
}
@@ -229,7 +232,15 @@
else
m4_debug_message (context, M4_DEBUG_TRACE_INPUT, _("input exhausted"));
+ if (ferror (isp->u.u_f.file))
+ {
+ m4_error (context, 0, 0, _("error reading file `%s'"),
+ m4_get_current_file (context));
fclose (isp->u.u_f.file);
+ }
+ else if (isp->u.u_f.close && fclose (isp->u.u_f.file) == EOF)
+ m4_error (context, 0, errno, _("error reading file `%s'"),
+ m4_get_current_file (context));
m4_set_current_file (context, isp->u.u_f.name);
m4_set_current_line (context, isp->u.u_f.lineno);
m4_output_current_line = isp->u.u_f.out_lineno;
@@ -242,8 +253,18 @@
file_peek, file_read, file_unget, file_clean
};
+/* m4_push_file () pushes an input file FP with name TITLE on the
+ input stack, saving the current file name and line number. If next
+ is non-NULL, this push invalidates a call to m4_push_string_init (),
+ whose storage is consequently released. If CLOSE, then close FP at
+ end of file.
+
+ file_read () manages line numbers for error messages, so they do not
+ get wrong due to lookahead. The token consisting of a newline
+ alone is taken as belonging to the line it ends, and the current
+ line number is not incremented until the next character is read. */
void
-m4_push_file (m4 *context, FILE *fp, const char *title)
+m4_push_file (m4 *context, FILE *fp, const char *title, bool close)
{
input_block *i;
@@ -261,6 +282,7 @@
i->funcs = &file_funcs;
i->u.u_f.file = fp;
+ i->u.u_f.end = false;
i->u.u_f.name = m4_get_current_file (context);
i->u.u_f.lineno = m4_get_current_line (context);
i->u.u_f.out_lineno = m4_output_current_line;
@@ -275,9 +297,6 @@
isp = i;
}
-/* m4_push_builtin () pushes a builtins definition on the input stack. If
- next is non-NULL, this push invalidates a call to m4_push_string_init (),
- whose storage are consequentely released. */
static int
builtin_peek (void)
{
@@ -301,6 +320,10 @@
builtin_peek, builtin_read, NULL, NULL
};
+/* m4_push_builtin () pushes TOKEN, which contains a builtin's
+ definition, on the input stack. If next is non-NULL, this push
+ invalidates a call to m4_push_string_init (), whose storage is
+ consequently released. */
void
m4_push_builtin (m4_symbol_value *token)
{
@@ -331,7 +354,6 @@
isp = i;
}
-/* Push a single character on to the input stack. */
static int
single_peek (void)
{
@@ -353,6 +375,7 @@
single_peek, single_read, NULL, NULL
};
+/* Push a single character CH on to the input stack. */
void
m4_push_single (int ch)
{
@@ -375,12 +398,10 @@
isp = i;
}
-/* First half of m4_push_string (). The pointer next points to the new
- input_block. */
static int
string_peek (void)
{
- int ch = *isp->u.u_s.current;
+ int ch = (unsigned char) *isp->u.u_s.current;
return (ch == '\0') ? CHAR_RETRY : ch;
}
@@ -388,7 +409,7 @@
static int
string_read (m4 *context M4_GNUC_UNUSED)
{
- int ch = *isp->u.u_s.current++;
+ int ch = (unsigned char) *isp->u.u_s.current++;
return (ch == '\0') ? CHAR_RETRY : ch;
@@ -400,13 +421,15 @@
if (isp->u.u_s.current > isp->u.u_s.start)
*--isp->u.u_s.current = ch;
else
- m4_push_single(ch);
+ m4_push_single (ch);
}
static struct input_funcs string_funcs = {
string_peek, string_read, string_unget, NULL
};
+/* First half of m4_push_string (). The pointer next points to the new
+ input_block. */
m4_obstack *
m4_push_string_init (m4 *context)
{
@@ -459,17 +482,19 @@
the input stack, and m4_push_string () and m4_push_file () will
operate on wrapup_stack. M4_push_wrapup should be done as
m4_push_string (), but this will suffice, as long as arguments to
- m4_m4wrap () are moderate in size. */
+ m4_m4wrap () are moderate in size.
+
+ FIXME - we should allow pushing builtins as well as text. */
void
m4_push_wrapup (const char *s)
{
- input_block *i = (input_block *) obstack_alloc (&wrapup_stack,
+ input_block *i = (input_block *) obstack_alloc (wrapup_stack,
sizeof (struct input_block));
i->prev = wsp;
i->funcs = &string_funcs;
- i->u.u_s.start = obstack_copy0 (&wrapup_stack, s, strlen (s));
+ i->u.u_s.start = obstack_copy0 (wrapup_stack, s, strlen (s));
i->u.u_s.current = i->u.u_s.start;
wsp = i;
@@ -478,7 +503,7 @@
/* The function pop_input () pops one level of input sources. If the
popped input_block is a file, current_file and current_line are
- reset to the saved values before the memory for the input_block are
+ reset to the saved values before the memory for the input_block is
released. */
static void
pop_input (m4 *context)
@@ -488,22 +513,37 @@
if (isp->funcs->clean_func != NULL)
(*isp->funcs->clean_func) (context);
+ if (tmp != NULL)
+ {
obstack_free (current_input, isp);
next = NULL; /* might be set in m4_push_string_init () */
+ }
isp = tmp;
}
-/* To switch input over to the wrapup stack, main () calls pop_wrapup
+/* To switch input over to the wrapup stack, main () calls pop_wrapup.
Since wrapup text can install new wrapup text, pop_wrapup () returns
false when there is no wrapup text on the stack, and true otherwise. */
bool
m4_pop_wrapup (void)
{
+ next = NULL;
+ obstack_free (current_input, NULL);
+ free (current_input);
+
if (wsp == NULL)
+ {
+ obstack_free (wrapup_stack, NULL);
+ current_input = NULL;
+ DELETE (wrapup_stack);
return false;
+ }
+
+ current_input = wrapup_stack;
+ wrapup_stack = (m4_obstack *) xmalloc (sizeof (m4_obstack));
+ obstack_init (wrapup_stack);
- current_input = &wrapup_stack;
isp = wsp;
wsp = NULL;
@@ -524,7 +564,7 @@
m4_set_symbol_value_func (token, isp->u.u_b.func);
VALUE_HANDLE (token) = isp->u.u_b.handle;
VALUE_FLAGS (token) = isp->u.u_b.flags;
- VALUE_ARG_SIGNATURE(token) = isp->u.u_b.arg_signature;
+ VALUE_ARG_SIGNATURE (token) = isp->u.u_b.arg_signature;
VALUE_MIN_ARGS (token) = isp->u.u_b.min_args;
VALUE_MAX_ARGS (token) = isp->u.u_b.max_args;
}
@@ -549,7 +589,7 @@
{
while ((ch = f (context)) != CHAR_RETRY)
{
- /* if (!IS_IGNORE(ch)) */
+ /* if (!IS_IGNORE (ch)) */
return ch;
}
}
@@ -564,11 +604,11 @@
}
}
-/* The function m4_peek_input () is used to look at the next character in
+/* The function peek_char () is used to look at the next character in
the input stream. At any given time, it reads from the input_block
on the top of the current input stack. */
-int
-m4_peek_input (m4 *context)
+static int
+peek_char (m4 *context)
{
int ch;
int (*f) (void);
@@ -581,97 +621,121 @@
f = isp->funcs->peek_func;
if (f != NULL)
{
- if ((ch = (*f)()) != CHAR_RETRY)
+ if ((ch = f ()) != CHAR_RETRY)
{
- return /* (IS_IGNORE(ch)) ? next_char () : */ ch;
+ return /* (IS_IGNORE (ch)) ? next_char () : */ ch;
}
}
else
{
- assert (!"INTERNAL ERROR: input stack botch in m4_peek_input ()");
+ assert (!"INTERNAL ERROR: input stack botch in peek_char ()");
abort ();
}
- /* End of input source --- pop one level. */
+ /* End of current input source --- pop one level if another
+ level of input still exists. */
+ if (isp->prev != NULL)
pop_input (context);
+ else
+ return CHAR_EOF;
}
}
/* The function unget_input () puts back a character on the input
- stack, using an existing input_block if possible. */
+ stack, using an existing input_block if possible. This is not safe
+ to call more than once without an intervening next_char. */
static void
unget_input (int ch)
{
if (isp != NULL && isp->funcs->unget_func != NULL)
- (*isp->funcs->unget_func)(ch);
+ isp->funcs->unget_func (ch);
else
- m4_push_single(ch);
+ m4_push_single (ch);
}
-/* skip_line () simply discards all immediately following characters, upto
+/* skip_line () simply discards all immediately following characters, up to
the first newline. It is only used from m4_dnl (). */
void
m4_skip_line (m4 *context)
{
int ch;
+ const char *file = m4_get_current_file (context);
+ int line = m4_get_current_line (context);
while ((ch = next_char (context)) != CHAR_EOF && ch != '\n')
;
+ if (ch == CHAR_EOF)
+ /* current_file changed; use the previous value we cached. */
+ m4_warn_at_line (context, 0, file, line,
+ _("end of file treated as newline"));
}
/* This function is for matching a string against a prefix of the
- input stream. If the string matches the input, the input is
- discarded, otherwise the characters read are pushed back again.
- The function is used only when multicharacter quotes or comment
- delimiters are used.
+ input stream. If the string S matches the input and CONSUME is
+ true, the input is discarded; otherwise any characters read are
+ pushed back again. The function is used only when multicharacter
+ quotes or comment delimiters are used.
All strings herein should be unsigned. Otherwise sign-extension
of individual chars might break quotes with 8-bit chars in it. */
-static int
-match_input (m4 *context, const unsigned char *s)
+static bool
+match_input (m4 *context, const unsigned char *s, bool consume)
{
int n; /* number of characters matched */
int ch; /* input character */
const unsigned char *t;
m4_obstack *st;
+ bool result = false;
- ch = m4_peek_input (context);
+ ch = peek_char (context);
if (ch != *s)
- return 0; /* fail */
- (void) next_char (context);
+ return false; /* fail */
if (s[1] == '\0')
- return 1; /* short match */
+ {
+ if (consume)
+ next_char (context);
+ return true; /* short match */
+ }
- for (n = 1, t = s++; (ch = m4_peek_input (context)) == *s++; n++)
+ next_char (context);
+ for (n = 1, t = s++; (ch = peek_char (context)) == *s++; )
{
- (void) next_char (context);
+ next_char (context);
+ n++;
if (*s == '\0') /* long match */
- return 1;
+ {
+ if (consume)
+ return true;
+ result = true;
+ break;
+ }
}
- /* Failed, push back input. */
+ /* Failed or shouldn't consume, push back input. */
st = m4_push_string_init (context);
obstack_grow (st, t, n);
m4_push_string_finish ();
- return 0;
+ return result;
}
-/* The macro MATCH() is used to match a string against the input. The
- first character is handled inline, for speed. Hopefully, this will not
- hurt efficiency too much when single character quotes and comment
- delimiters are used. */
-#define MATCH(C, ch, s) \
+/* The macro MATCH() is used to match an unsigned char string S
+ against the input. The first character is handled inline, for
+ speed. Hopefully, this will not hurt efficiency too much when
+ single character quotes and comment delimiters are used. If
+ CONSUME, then CH is the result of next_char, and a successful match
+ will discard the matched string. Otherwise, CH is the result of
+ peek_char, and the input stream is effectively unchanged. */
+#define MATCH(C, ch, s, consume) \
((s)[0] == (ch) \
&& (ch) != '\0' \
- && ((s)[1] == '\0' \
- || (match_input ((C), (s) + 1) ? (ch) = m4_peek_input (C), 1 : 0)))
+ && ((s)[1] == '\0' || (match_input (C, (s) + (consume), consume))))
-/* Inititialise input stacks, and quote/comment characters. */
+/* Inititialize input stacks, and quote/comment characters. */
void
m4_input_init (m4 *context)
{
@@ -681,10 +745,11 @@
m4_set_current_line (context, 0);
obstack_init (&token_stack);
- obstack_init (&input_stack);
- obstack_init (&wrapup_stack);
- current_input = &input_stack;
+ current_input = (m4_obstack *) xmalloc (sizeof (m4_obstack));
+ obstack_init (current_input);
+ wrapup_stack = (m4_obstack *) xmalloc (sizeof (m4_obstack));
+ obstack_init (wrapup_stack);
obstack_1grow (&token_stack, '\0');
token_bottom = obstack_finish (&token_stack);
@@ -699,9 +764,8 @@
void
m4_input_exit (void)
{
- obstack_free (&wrapup_stack, NULL);
- obstack_free (&input_stack, NULL);
- obstack_free (&token_stack, NULL);
+ assert (current_input == NULL);
+ assert (wrapup_stack == NULL);
}
@@ -713,9 +777,9 @@
that is not a part of any of the previous types.
M4__next_token () returns the token type, and passes back a pointer to
- the token data through VALUE. The token text is collected on the obstack
+ the token data through TOKEN. The token text is collected on the obstack
token_stack, which never contains more than one token text at a time.
- The storage pointed to by the fields in VALUE is therefore subject to
+ The storage pointed to by the fields in TOKEN is therefore subject to
change the next time m4__next_token () is called. */
m4__token_type
m4__next_token (m4 *context, m4_symbol_value *token)
@@ -725,30 +789,39 @@
m4__token_type type;
do {
+ const char *file = m4_get_current_file (context);
+ int line = m4_get_current_line (context);
+
obstack_free (&token_stack, token_bottom);
obstack_1grow (&token_stack, '\0');
token_bottom = obstack_finish (&token_stack);
- ch = m4_peek_input (context);
+ /* Must consume an input character, but not until CHAR_BUILTIN is
+ handled. */
+ ch = peek_char (context);
if (ch == CHAR_EOF) /* EOF */
{
#ifdef DEBUG_INPUT
fprintf (stderr, "next_token -> EOF\n");
#endif
+ next_char (context);
return M4_TOKEN_EOF;
}
if (ch == CHAR_BUILTIN) /* BUILTIN TOKEN */
{
init_builtin_token (context, token);
- (void) next_char (context);
+ next_char (context);
#ifdef DEBUG_INPUT
m4_print_token ("next_token", M4_TOKEN_MACDEF, token);
#endif
return M4_TOKEN_MACDEF;
}
- (void) next_char (context);
+ next_char (context); /* Consume character we already peeked at. */
+ /* FIXME - other implementations, such as Solaris, parse macro
+ names, then quotes, then comments. We should probably
+ rearrange this to match. */
if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_BCOMM))
{ /* COMMENT, SHORT DELIM */
obstack_1grow (&token_stack, ch);
@@ -757,22 +830,28 @@
obstack_1grow (&token_stack, ch);
if (ch != CHAR_EOF)
obstack_1grow (&token_stack, ch);
- type = m4_get_discard_comments_opt (context)
- ? M4_TOKEN_NONE : M4_TOKEN_STRING;
+ else
+ m4_error_at_line (context, EXIT_FAILURE, 0, file, line,
+ _("end of file in comment"));
+ type = (m4_get_discard_comments_opt (context)
+ ? M4_TOKEN_NONE : M4_TOKEN_STRING);
}
else if (!m4_is_syntax_single_comments (M4SYNTAX)
- && MATCH (context, ch, context->syntax->bcomm.string))
+ && MATCH (context, ch, context->syntax->bcomm.string, true))
{ /* COMMENT, LONGER DELIM */
obstack_grow (&token_stack, context->syntax->bcomm.string,
context->syntax->bcomm.length);
while ((ch = next_char (context)) != CHAR_EOF
- && !MATCH (context, ch, context->syntax->ecomm.string))
+ && !MATCH (context, ch, context->syntax->ecomm.string, true))
obstack_1grow (&token_stack, ch);
if (ch != CHAR_EOF)
obstack_grow (&token_stack, context->syntax->ecomm.string,
context->syntax->ecomm.length);
- type = m4_get_discard_comments_opt (context)
- ? M4_TOKEN_NONE : M4_TOKEN_STRING;
+ else
+ m4_error_at_line (context, EXIT_FAILURE, 0, file, line,
+ _("end of file in comment"));
+ type = (m4_get_discard_comments_opt (context)
+ ? M4_TOKEN_NONE : M4_TOKEN_STRING);
}
else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ESCAPE))
{ /* ESCAPED WORD */
@@ -790,7 +869,7 @@
}
if (ch != CHAR_EOF)
- unget_input(ch);
+ unget_input (ch);
}
else
{
@@ -814,22 +893,19 @@
obstack_1grow (&token_stack, ch);
}
if (ch != CHAR_EOF)
- unget_input(ch);
+ unget_input (ch);
- type = m4_is_syntax_macro_escaped (M4SYNTAX)
- ? M4_TOKEN_STRING : M4_TOKEN_WORD;
+ type = (m4_is_syntax_macro_escaped (M4SYNTAX)
+ ? M4_TOKEN_STRING : M4_TOKEN_WORD);
}
else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_LQUOTE))
{ /* QUOTED STRING, SINGLE QUOTES
*/
- const char *current_file = m4_get_current_file (context);
- int current_line = m4_get_current_line (context);
quote_level = 1;
while (1)
{
ch = next_char (context);
if (ch == CHAR_EOF)
- error_at_line (EXIT_FAILURE, 0,
- current_file, current_line,
+ m4_error_at_line (context, EXIT_FAILURE, 0, file, line,
_("end of file in string"));
if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_RQUOTE))
@@ -849,26 +925,23 @@
type = M4_TOKEN_STRING;
}
else if (!m4_is_syntax_single_quotes (M4SYNTAX)
- && MATCH (context, ch, context->syntax->lquote.string))
+ && MATCH (context, ch, context->syntax->lquote.string, true))
{ /* QUOTED STRING, LONGER QUOTES
*/
- const char *current_file = m4_get_current_file (context);
- int current_line = m4_get_current_line (context);
quote_level = 1;
while (1)
{
ch = next_char (context);
if (ch == CHAR_EOF)
- error_at_line (EXIT_FAILURE, 0,
- current_file, current_line,
+ m4_error_at_line (context, EXIT_FAILURE, 0, file, line,
_("end of file in string"));
- if (MATCH (context, ch, context->syntax->rquote.string))
+ if (MATCH (context, ch, context->syntax->rquote.string, true))
{
if (--quote_level == 0)
break;
obstack_grow (&token_stack, context->syntax->rquote.string,
context->syntax->rquote.length);
}
- else if (MATCH (context, ch, context->syntax->lquote.string))
+ else if (MATCH (context, ch, context->syntax->lquote.string, true))
{
quote_level++;
obstack_grow (&token_stack, context->syntax->lquote.string,
@@ -879,6 +952,26 @@
}
type = M4_TOKEN_STRING;
}
+ else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ACTIVE))
+ { /* ACTIVE CHARACTER */
+ obstack_1grow (&token_stack, ch);
+ type = M4_TOKEN_WORD;
+ }
+ else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_OPEN))
+ { /* OPEN PARENTHESIS */
+ obstack_1grow (&token_stack, ch);
+ type = M4_TOKEN_OPEN;
+ }
+ else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_COMMA))
+ { /* COMMA */
+ obstack_1grow (&token_stack, ch);
+ type = M4_TOKEN_COMMA;
+ }
+ else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_CLOSE))
+ { /* CLOSE PARENTHESIS */
+ obstack_1grow (&token_stack, ch);
+ type = M4_TOKEN_CLOSE;
+ }
else if (m4_is_syntax_single_quotes (M4SYNTAX)
&& m4_is_syntax_single_comments (M4SYNTAX))
{ /* EVERYTHING ELSE (SHORT QUOTES AND COMMENTS)
*/
@@ -888,7 +981,7 @@
|| m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_NUM)
|| m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_DOLLAR))
{
- while (((ch = next_char(context)) != CHAR_EOF)
+ while (((ch = next_char (context)) != CHAR_EOF)
&& (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_OTHER)
|| m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_NUM)
|| m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_DOLLAR)))
@@ -897,7 +990,7 @@
}
if (ch != CHAR_EOF)
- unget_input(ch);
+ unget_input (ch);
type = M4_TOKEN_STRING;
}
else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_SPACE))
@@ -909,12 +1002,10 @@
obstack_1grow (&token_stack, ch);
if (ch != CHAR_EOF)
- unget_input(ch);
+ unget_input (ch);
}
type = M4_TOKEN_SPACE;
}
- else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ACTIVE))
- type = M4_TOKEN_WORD;
else
type = M4_TOKEN_SIMPLE;
}
@@ -928,8 +1019,6 @@
type = M4_TOKEN_STRING;
else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_SPACE))
type = M4_TOKEN_SPACE;
- else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ACTIVE))
- type = M4_TOKEN_WORD;
else
type = M4_TOKEN_SIMPLE;
}
@@ -943,12 +1032,125 @@
VALUE_MAX_ARGS (token) = -1;
#ifdef DEBUG_INPUT
- m4_print_token("next_token", type, token);
+ m4_print_token ("next_token", type, token);
#endif
return type;
}
+/* Peek and return the type of the next single token from the input
+ stream. When peeking to see if changequote (or friends) are
+ followed by an open parentheses, it is possible that the token type
+ we peek at now will change by the time we parse it with
+ next_token. */
+m4__token_type
+m4__peek_token (m4 *context)
+{
+ int ch = peek_char (context);
+
+ if (ch == CHAR_EOF)
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> EOF\n");
+#endif
+ return M4_TOKEN_EOF;
+ }
+ if (ch == CHAR_BUILTIN)
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> BUILTIN\n");
+#endif
+ return M4_TOKEN_MACDEF;
+ }
+ if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_BCOMM)
+ || (!m4_is_syntax_single_comments (M4SYNTAX)
+ && MATCH (context, ch, context->syntax->bcomm.string, false)))
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> COMMENT\n");
+#endif
+ return M4_TOKEN_STRING;
+ }
+ if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ESCAPE))
+ {
+ int c;
+ next_char (context);
+ c = peek_char (context);
+ unget_input (ch);
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> %s\n",
+ c == CHAR_EOF ? "SIMPLE" : "ESCAPE_WORD");
+#endif
+ return c == CHAR_EOF ? M4_TOKEN_SIMPLE : M4_TOKEN_WORD;
+ }
+ if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_ALPHA))
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> %s\n",
+ m4_is_syntax_macro_escaped (M4SYNTAX) ? "STRING" : "WORD");
+#endif
+ return (m4_is_syntax_macro_escaped (M4SYNTAX)
+ ? M4_TOKEN_STRING : M4_TOKEN_WORD);
+ }
+ if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_LQUOTE)
+ || (!m4_is_syntax_single_quotes (M4SYNTAX)
+ && MATCH (context, ch, context->syntax->lquote.string, false)))
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> QUOTE\n");
+#endif
+ return M4_TOKEN_STRING;
+ }
+ if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_ACTIVE))
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> ACTIVE\n");
+#endif
+ return M4_TOKEN_WORD;
+ }
+ if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_OPEN))
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> OPEN\n");
+#endif
+ return M4_TOKEN_OPEN;
+ }
+ if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_COMMA))
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> COMMA\n");
+#endif
+ return M4_TOKEN_COMMA;
+ }
+ if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_CLOSE))
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> CLOSE\n");
+#endif
+ return M4_TOKEN_CLOSE;
+ }
+ if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_OTHER)
+ || m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_NUM)
+ || m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_DOLLAR))
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> STRING\n");
+#endif
+ return M4_TOKEN_STRING;
+ }
+ if (m4_is_syntax (M4SYNTAX, ch, M4_SYNTAX_SPACE))
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> SPACE\n");
+#endif
+ return M4_TOKEN_SPACE;
+ }
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> SIMPLE\n");
+#endif
+ return M4_TOKEN_SIMPLE;
+}
+
#ifdef DEBUG_INPUT