[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
fix for dfa when mixed with libsigsegv on Windows
From: |
Aharon Robbins |
Subject: |
fix for dfa when mixed with libsigsegv on Windows |
Date: |
Tue, 06 Apr 2010 20:02:27 +0300 |
Hi. The Cygwin maintainer tells me that libsigsegv on Windows pulls in the
dreaded <windows.h> header file which defines WCHAR as wchar_t, causing a
conflict with the WCHAR in the enum in dfa.h. I propose the following diff
which compiles OK under Linux and moves the details into dfa.c.
Thanks,
Arnold
---------------------------------------------------------------
Index: dfa.h
===================================================================
RCS file: /d/mongo/cvsrep/gawk-stable/dfa.h,v
retrieving revision 1.5
diff -u -r1.5 dfa.h
--- dfa.h 23 Mar 2010 17:46:27 -0000 1.5
+++ dfa.h 4 Apr 2010 12:26:36 -0000
@@ -50,98 +50,8 @@
are operators and others are terminal symbols. Most (but not all) of these
codes are returned by the lexical analyzer. */
-typedef enum
-{
- END = -1, /* END is a terminal symbol that matches the
- end of input; any value of END or less in
- the parse tree is such a symbol. Accepting
- states of the DFA are those that would have
- a transition on END. */
-
- /* Ordinary character values are terminal symbols that match themselves. */
-
- EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
- the empty string. */
-
- BACKREF, /* BACKREF is generated by \<digit>; it
- it not completely handled. If the scanner
- detects a transition on backref, it returns
- a kind of "semi-success" indicating that
- the match will have to be verified with
- a backtracking matcher. */
-
- BEGLINE, /* BEGLINE is a terminal symbol that matches
- the empty string if it is at the beginning
- of a line. */
-
- ENDLINE, /* ENDLINE is a terminal symbol that matches
- the empty string if it is at the end of
- a line. */
-
- BEGWORD, /* BEGWORD is a terminal symbol that matches
- the empty string if it is at the beginning
- of a word. */
-
- ENDWORD, /* ENDWORD is a terminal symbol that matches
- the empty string if it is at the end of
- a word. */
-
- LIMWORD, /* LIMWORD is a terminal symbol that matches
- the empty string if it is at the beginning
- or the end of a word. */
-
- NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
- matches the empty string if it is not at
- the beginning or end of a word. */
-
- QMARK, /* QMARK is an operator of one argument that
- matches zero or one occurences of its
- argument. */
-
- STAR, /* STAR is an operator of one argument
that
- matches the Kleene closure (zero or more
- occurrences) of its argument. */
-
- PLUS, /* PLUS is an operator of one argument
that
- matches the positive closure (one or more
- occurrences) of its argument. */
-
- REPMN, /* REPMN is a lexical token corresponding
- to the {m,n} construct. REPMN never
- appears in the compiled token vector. */
-
- CAT, /* CAT is an operator of two arguments that
- matches the concatenation of its
- arguments. CAT is never returned by the
- lexical analyzer. */
-
- OR, /* OR is an operator of two arguments that
- matches either of its arguments. */
-
- ORTOP, /* OR at the toplevel in the parse tree.
- This is used for a boyer-moore heuristic. */
-
- LPAREN, /* LPAREN never appears in the parse tree,
- it is only a lexeme. */
-
- RPAREN, /* RPAREN never appears in the parse tree. */
-
-#ifdef MBS_SUPPORT
- ANYCHAR, /* ANYCHAR is a terminal symbol that matches
- any multibyte (or single byte) characters.
- It is used only if MB_CUR_MAX > 1. */
-
- MBCSET, /* MBCSET is similar to CSET, but for
- multibyte characters. */
-
- WCHAR, /* Only returned by lex. wctok contains
- the wide character representation. */
-#endif /* MBS_SUPPORT */
-
- CSET /* CSET and (and any value greater) is a
- terminal symbol that matches any of a
- class of characters. */
-} token;
+enum token_enum;
+typedef enum token_enum token;
/* Sets are stored in an array in the compiled dfa; the index of the
array corresponding to a given set token is given by SET_INDEX(t). */
Index: dfa.c
===================================================================
RCS file: /d/mongo/cvsrep/gawk-stable/dfa.c,v
retrieving revision 1.29
diff -u -r1.29 dfa.c
--- dfa.c 2 Apr 2010 09:33:41 -0000 1.29
+++ dfa.c 4 Apr 2010 12:29:47 -0000
@@ -102,6 +102,99 @@
# undef clrbit
#endif
+enum token_enum
+{
+ END = -1, /* END is a terminal symbol that matches the
+ end of input; any value of END or less in
+ the parse tree is such a symbol. Accepting
+ states of the DFA are those that would have
+ a transition on END. */
+
+ /* Ordinary character values are terminal symbols that match themselves. */
+
+ EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
+ the empty string. */
+
+ BACKREF, /* BACKREF is generated by \<digit>; it
+ it not completely handled. If the scanner
+ detects a transition on backref, it returns
+ a kind of "semi-success" indicating that
+ the match will have to be verified with
+ a backtracking matcher. */
+
+ BEGLINE, /* BEGLINE is a terminal symbol that matches
+ the empty string if it is at the beginning
+ of a line. */
+
+ ENDLINE, /* ENDLINE is a terminal symbol that matches
+ the empty string if it is at the end of
+ a line. */
+
+ BEGWORD, /* BEGWORD is a terminal symbol that matches
+ the empty string if it is at the beginning
+ of a word. */
+
+ ENDWORD, /* ENDWORD is a terminal symbol that matches
+ the empty string if it is at the end of
+ a word. */
+
+ LIMWORD, /* LIMWORD is a terminal symbol that matches
+ the empty string if it is at the beginning
+ or the end of a word. */
+
+ NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
+ matches the empty string if it is not at
+ the beginning or end of a word. */
+
+ QMARK, /* QMARK is an operator of one argument that
+ matches zero or one occurences of its
+ argument. */
+
+ STAR, /* STAR is an operator of one argument
that
+ matches the Kleene closure (zero or more
+ occurrences) of its argument. */
+
+ PLUS, /* PLUS is an operator of one argument
that
+ matches the positive closure (one or more
+ occurrences) of its argument. */
+
+ REPMN, /* REPMN is a lexical token corresponding
+ to the {m,n} construct. REPMN never
+ appears in the compiled token vector. */
+
+ CAT, /* CAT is an operator of two arguments that
+ matches the concatenation of its
+ arguments. CAT is never returned by the
+ lexical analyzer. */
+
+ OR, /* OR is an operator of two arguments that
+ matches either of its arguments. */
+
+ ORTOP, /* OR at the toplevel in the parse tree.
+ This is used for a boyer-moore heuristic. */
+
+ LPAREN, /* LPAREN never appears in the parse tree,
+ it is only a lexeme. */
+
+ RPAREN, /* RPAREN never appears in the parse tree. */
+
+#ifdef MBS_SUPPORT
+ ANYCHAR, /* ANYCHAR is a terminal symbol that matches
+ any multibyte (or single byte) characters.
+ It is used only if MB_CUR_MAX > 1. */
+
+ MBCSET, /* MBCSET is similar to CSET, but for
+ multibyte characters. */
+
+ WCHAR, /* Only returned by lex. wctok contains
+ the wide character representation. */
+#endif /* MBS_SUPPORT */
+
+ CSET /* CSET and (and any value greater) is a
+ terminal symbol that matches any of a
+ class of characters. */
+};
+
static void dfamust (struct dfa *dfa);
static void regexp (int toplevel);
- fix for dfa when mixed with libsigsegv on Windows,
Aharon Robbins <=
Re: fix for dfa when mixed with libsigsegv on Windows, Jim Meyering, 2010/04/06