[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Changes to m4/src/Attic/input.c,v [branch-1_4]
From: |
Eric Blake |
Subject: |
Changes to m4/src/Attic/input.c,v [branch-1_4] |
Date: |
Thu, 03 Aug 2006 13:31:42 +0000 |
CVSROOT: /sources/m4
Module name: m4
Branch: branch-1_4
Changes by: Eric Blake <ericb> 06/08/03 13:31:40
Index: src/input.c
===================================================================
RCS file: /sources/m4/m4/src/Attic/input.c,v
retrieving revision 1.1.1.1.2.16
retrieving revision 1.1.1.1.2.17
diff -u -b -r1.1.1.1.2.16 -r1.1.1.1.2.17
--- src/input.c 2 Aug 2006 15:11:58 -0000 1.1.1.1.2.16
+++ src/input.c 3 Aug 2006 13:31:40 -0000 1.1.1.1.2.17
@@ -140,14 +140,20 @@
#ifdef ENABLE_CHANGEWORD
-#define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
+# define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
static char *word_start;
static struct re_pattern_buffer word_regexp;
static int default_word_regexp;
static struct re_registers regs;
-#endif /* ENABLE_CHANGEWORD */
+#else /* ! ENABLE_CHANGEWORD */
+# define default_word_regexp 1
+#endif /* ! ENABLE_CHANGEWORD */
+
+#ifdef DEBUG_INPUT
+static const char *token_type_string (token_type);
+#endif
/*-------------------------------------------------------------------------.
@@ -409,7 +415,7 @@
| input stack. |
`------------------------------------------------------------------------*/
-int
+static int
peek_input (void)
{
int ch;
@@ -536,36 +542,48 @@
}
-/*----------------------------------------------------------------------.
-| This function is for matching a string against a prefix of the input |
-| stream. If the string matches the input, the input is discarded, |
-| otherwise the characters read are pushed back again. The function is |
-| used only when multicharacter quotes or comment delimiters are used. |
-`----------------------------------------------------------------------*/
+/*------------------------------------------------------------------.
+| This function is for matching a string against a prefix of the |
+| input stream. If the string matches the input and consume is |
+| TRUE, the input is discarded; otherwise any characters read are |
+| pushed back again. The function is used only when multicharacter |
+| quotes or comment delimiters are used. |
+`------------------------------------------------------------------*/
-static int
-match_input (const char *s)
+static boolean
+match_input (const char *s, boolean consume)
{
int n; /* number of characters matched */
int ch; /* input character */
const char *t;
+ boolean result = FALSE;
ch = peek_input ();
if (ch != to_uchar (*s))
- return 0; /* fail */
- (void) next_char ();
+ return FALSE; /* fail */
if (s[1] == '\0')
- return 1; /* short match */
+ {
+ if (consume)
+ (void) next_char ();
+ return TRUE; /* short match */
+ }
- for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); n++)
+ (void) next_char ();
+ for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); )
{
(void) next_char ();
+ n++;
if (*s == '\0') /* long match */
- return 1;
+ {
+ if (consume)
+ return TRUE;
+ result = TRUE;
+ break;
+ }
}
- /* Failed, push back input. */
+ /* Failed or shouldn't consume, push back input. */
{
struct obstack *h = push_string_init ();
@@ -573,20 +591,23 @@
obstack_grow (h, t, n);
}
push_string_finish ();
- return 0;
+ return result;
}
-/*------------------------------------------------------------------------.
-| The macro MATCH() is used to match a string against the input. The |
-| first character is handled inline, for speed. Hopefully, this will not |
-| hurt efficiency too much when single character quotes and comment |
-| delimiters are used. |
-`------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------.
+| The macro MATCH() is used to match a string S against the input. |
+| The first character is handled inline, for speed. Hopefully, this |
+| will not hurt efficiency too much when single character quotes and |
+| comment delimiters are used. If CONSUME, then CH is the result of |
+| next_char, and a successful match will discard the matched string. |
+| Otherwise, CH is the result of peek_char, and the input stream is |
+| effectively unchanged. |
+`--------------------------------------------------------------------*/
-#define MATCH(ch, s) \
+#define MATCH(ch, s, consume) \
(to_uchar ((s)[0]) == (ch) \
&& (ch) != '\0' \
- && ((s)[1] == '\0' || (match_input ((s) + 1))))
+ && ((s)[1] == '\0' || (match_input ((s) + (consume), consume))))
/*----------------------------------------------------------.
@@ -776,10 +797,11 @@
}
(void) next_char ();
- if (MATCH (ch, bcomm.string))
+ if (MATCH (ch, bcomm.string, TRUE))
{
obstack_grow (&token_stack, bcomm.string, bcomm.length);
- while ((ch = next_char ()) != CHAR_EOF && !MATCH (ch, ecomm.string))
+ while ((ch = next_char ()) != CHAR_EOF
+ && !MATCH (ch, ecomm.string, TRUE))
obstack_1grow (&token_stack, ch);
if (ch != CHAR_EOF)
obstack_grow (&token_stack, ecomm.string, ecomm.length);
@@ -791,11 +813,7 @@
type = TOKEN_STRING;
}
-#ifdef ENABLE_CHANGEWORD
else if (default_word_regexp && (isalpha (ch) || ch == '_'))
-#else
- else if (isalpha (ch) || ch == '_')
-#endif
{
obstack_1grow (&token_stack, ch);
while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
@@ -844,9 +862,23 @@
#endif /* ENABLE_CHANGEWORD */
- else if (!MATCH (ch, lquote.string))
+ else if (!MATCH (ch, lquote.string, TRUE))
+ {
+ switch (ch)
{
+ case '(':
+ type = TOKEN_OPEN;
+ break;
+ case ',':
+ type = TOKEN_COMMA;
+ break;
+ case ')':
+ type = TOKEN_CLOSE;
+ break;
+ default:
type = TOKEN_SIMPLE;
+ break;
+ }
obstack_1grow (&token_stack, ch);
}
else
@@ -861,13 +893,13 @@
error_at_line (EXIT_FAILURE, 0, file, line,
"ERROR: end of file in string");
- if (MATCH (ch, rquote.string))
+ if (MATCH (ch, rquote.string, TRUE))
{
if (--quote_level == 0)
break;
obstack_grow (&token_stack, rquote.string, rquote.length);
}
- else if (MATCH (ch, lquote.string))
+ else if (MATCH (ch, lquote.string, TRUE))
{
quote_level++;
obstack_grow (&token_stack, lquote.string, lquote.length);
@@ -888,20 +920,127 @@
TOKEN_DATA_ORIG_TEXT (td) = orig_text;
#endif
#ifdef DEBUG_INPUT
- fprintf (stderr, "next_token -> %d (%s)\n", type, TOKEN_DATA_TEXT (td));
+ fprintf (stderr, "next_token -> %s (%s)\n",
+ token_type_string (type), TOKEN_DATA_TEXT (td));
#endif
return type;
}
+/*-----------------------------------------------.
+| Peek at the next token from the input stream. |
+`-----------------------------------------------*/
+
+token_type
+peek_token (void)
+{
+ int ch = peek_input ();
+
+ if (ch == CHAR_EOF)
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> EOF\n");
+#endif
+ return TOKEN_EOF;
+ }
+ if (ch == CHAR_MACRO)
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> MACDEF\n");
+#endif
+ return TOKEN_MACDEF;
+ }
+
+ if (MATCH (ch, bcomm.string, FALSE))
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> COMMENT\n");
+#endif
+ return TOKEN_STRING;
+ }
+
+ if ((default_word_regexp && (isalpha (ch) || ch == '_'))
+#ifdef ENABLE_CHANGEWORD
+ || (! default_word_regexp && strchr (word_start, ch))
+#endif /* ENABLE_CHANGEWORD */
+ )
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> WORD\n");
+#endif
+ return TOKEN_WORD;
+ }
+
+ if (MATCH (ch, lquote.string, FALSE))
+ {
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> QUOTE\n");
+#endif
+ return TOKEN_STRING;
+ }
+
+ switch (ch)
+ {
+ case '(':
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> OPEN\n");
+#endif
+ return TOKEN_OPEN;
+ case ',':
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> COMMA\n");
+#endif
+ return TOKEN_COMMA;
+ case ')':
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> CLOSE\n");
+#endif
+ return TOKEN_CLOSE;
+ default:
+#ifdef DEBUG_INPUT
+ fprintf (stderr, "peek_token -> SIMPLE\n");
+#endif
+ return TOKEN_SIMPLE;
+ }
+}
+
#ifdef DEBUG_INPUT
+static const char *
+token_type_string (token_type t)
+{
+ switch (t)
+ { /* TOKSW */
+ case TOKEN_EOF:
+ return "EOF";
+ case TOKEN_STRING:
+ return "STRING";
+ case TOKEN_WORD:
+ return "WORD";
+ case TOKEN_OPEN:
+ return "OPEN";
+ case TOKEN_COMMA:
+ return "COMMA";
+ case TOKEN_CLOSE:
+ return "CLOSE";
+ case TOKEN_SIMPLE:
+ return "SIMPLE";
+ case TOKEN_MACDEF:
+ return "MACDEF";
+ default:
+ abort ();
+ }
+ }
+
static void
print_token (const char *s, token_type t, token_data *td)
{
fprintf (stderr, "%s: ", s);
switch (t)
{ /* TOKSW */
+ case TOKEN_OPEN:
+ case TOKEN_COMMA:
+ case TOKEN_CLOSE:
case TOKEN_SIMPLE:
fprintf (stderr, "char:");
break;
- Changes to m4/src/Attic/input.c,v [branch-1_4], Eric Blake, 2006/08/01
- Changes to m4/src/Attic/input.c,v [branch-1_4], Eric Blake, 2006/08/02
- Changes to m4/src/Attic/input.c,v [branch-1_4],
Eric Blake <=
- Changes to m4/src/Attic/input.c,v [branch-1_4], Eric Blake, 2006/08/04
- Changes to m4/src/Attic/input.c,v [branch-1_4], Eric Blake, 2006/08/08
- Changes to m4/src/Attic/input.c,v [branch-1_4], Eric Blake, 2006/08/18
- Changes to m4/src/Attic/input.c,v [branch-1_4], Eric Blake, 2006/08/23