[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gawk-diffs] [SCM] gawk branch, gawk-4.0-stable, updated. 19a22c41661c3b
From: |
Arnold Robbins |
Subject: |
[gawk-diffs] [SCM] gawk branch, gawk-4.0-stable, updated. 19a22c41661c3b032c9cdcfd6913373526c025c8 |
Date: |
Thu, 29 Mar 2012 19:33:47 +0000 |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".
The branch, gawk-4.0-stable has been updated
via 19a22c41661c3b032c9cdcfd6913373526c025c8 (commit)
from b85b04e80466b60892bb1533e33af267ecb1c2a6 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=19a22c41661c3b032c9cdcfd6913373526c025c8
commit 19a22c41661c3b032c9cdcfd6913373526c025c8
Author: Arnold D. Robbins <address@hidden>
Date: Thu Mar 29 21:32:53 2012 +0200
Sync dfa with grep. Update configure machinery.
diff --git a/ChangeLog b/ChangeLog
index 40a5c0e..bc2c677 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2012-03-29 Arnold D. Robbins <address@hidden>
+
+ * dfa.h, dfa.c: Sync with grep. Major cleanups and some changes
+ there.
+ * re.c (research): Pass size_t* to dfaexec to match type change.
+ * configure.ac (AH_VERBATIM[_Noreturn]): Added from Paul Eggert to
+ ease compiling.
+ (AC_INIT): Bump version.
+ * configure, configh.in, version.c: Regenerated.
+
2012-03-28 Arnold D. Robbins <address@hidden>
* 4.0.1: Release tar ball made.
diff --git a/configh.in b/configh.in
index 5ab1832..557a57c 100644
--- a/configh.in
+++ b/configh.in
@@ -411,6 +411,19 @@
/* Define to 1 if on MINIX. */
#undef _MINIX
+/* The _Noreturn keyword of C11. */
+#ifndef _Noreturn
+# if (3 <= __GNUC__ || (__GNUC__ == 2 && 8 <= __GNUC_MINOR__) \
+ || 0x5110 <= __SUNPRO_C)
+# define _Noreturn __attribute__ ((__noreturn__))
+# elif defined _MSC_VER && 1200 <= _MSC_VER
+# define _Noreturn __declspec (noreturn)
+# else
+# define _Noreturn
+# endif
+#endif
+
+
/* Define to 2 if the system does not provide POSIX.1 features except with
this defined. */
#undef _POSIX_1_SOURCE
diff --git a/configure b/configure
index 54c4431..c7873bc 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.68 for GNU Awk 4.0.1.
+# Generated by GNU Autoconf 2.68 for GNU Awk 4.0.1a.
#
# Report bugs to <address@hidden>.
#
@@ -560,8 +560,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='GNU Awk'
PACKAGE_TARNAME='gawk'
-PACKAGE_VERSION='4.0.1'
-PACKAGE_STRING='GNU Awk 4.0.1'
+PACKAGE_VERSION='4.0.1a'
+PACKAGE_STRING='GNU Awk 4.0.1a'
PACKAGE_BUGREPORT='address@hidden'
PACKAGE_URL='http://www.gnu.org/software/gawk/'
@@ -1290,7 +1290,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures GNU Awk 4.0.1 to adapt to many kinds of systems.
+\`configure' configures GNU Awk 4.0.1a to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1360,7 +1360,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of GNU Awk 4.0.1:";;
+ short | recursive ) echo "Configuration of GNU Awk 4.0.1a:";;
esac
cat <<\_ACEOF
@@ -1472,7 +1472,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-GNU Awk configure 4.0.1
+GNU Awk configure 4.0.1a
generated by GNU Autoconf 2.68
Copyright (C) 2010 Free Software Foundation, Inc.
@@ -2176,7 +2176,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by GNU Awk $as_me 4.0.1, which was
+It was created by GNU Awk $as_me 4.0.1a, which was
generated by GNU Autoconf 2.68. Invocation command line was
$ $0 $@
@@ -3011,7 +3011,7 @@ fi
# Define the identity of the package.
PACKAGE='gawk'
- VERSION='4.0.1'
+ VERSION='4.0.1a'
cat >>confdefs.h <<_ACEOF
@@ -8816,6 +8816,9 @@ fi
+
+
+
for ac_func in vprintf
do :
ac_fn_c_check_func "$LINENO" "vprintf" "ac_cv_func_vprintf"
@@ -11221,7 +11224,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by GNU Awk $as_me 4.0.1, which was
+This file was extended by GNU Awk $as_me 4.0.1a, which was
generated by GNU Autoconf 2.68. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -11289,7 +11292,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //;
s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-GNU Awk config.status 4.0.1
+GNU Awk config.status 4.0.1a
configured by $0, generated by GNU Autoconf 2.68,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index bc317cf..8fe1d23 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@ dnl
dnl Process this file with autoconf to produce a configure script.
-AC_INIT([GNU Awk], 4.0.1, address@hidden, gawk)
+AC_INIT([GNU Awk], 4.0.1a, address@hidden, gawk)
# This is a hack. Different versions of install on different systems
# are just too different. Chuck it and use install-sh.
@@ -237,6 +237,44 @@ dnl AC_MSG_ERROR([Cannot find a type to use in
place of socklen_t])
])
TYPE_SOCKLEN_T
+dnl Date: Sun, 26 Feb 2012 11:31:50 -0800
+dnl From: Paul Eggert <address@hidden>
+dnl To: address@hidden
+dnl CC: address@hidden
+dnl Subject: Re: avoid gcc 4.6.2 'may be used before set' warnings in dfa.c
+dnl
+dnl On 02/26/2012 01:18 AM, address@hidden wrote:
+dnl > It looks like I can just use the code as it is now in grep. I have asked
+dnl > for compile failures and haven't gotten any.
+dnl
+dnl Sure, but the gnulib support for this is better
+dnl than what's in the dfa code. It could be that
+dnl your correspondents aren't using the less-common hosts
+dnl or compiler warning options that gnulib is ported to.
+dnl
+dnl In the long run stdnoreturn.h or _Noreturn is the way to go,
+dnl since they're part of the C standard.
+dnl
+dnl If you'd rather not create a separate file, how about if
+dnl we change the dfa code to use _Noreturn instead of
+dnl <stdnoreturn.h> and noreturn, and you can put the
+dnl following into your configure.ac so that config.h
+dnl defines _Noreturn the same way gnulib does:
+
+AH_VERBATIM([_Noreturn],
+[/* The _Noreturn keyword of C11. */
+#ifndef _Noreturn
+# if (3 <= __GNUC__ || (__GNUC__ == 2 && 8 <= __GNUC_MINOR__) \
+ || 0x5110 <= __SUNPRO_C)
+# define _Noreturn __attribute__ ((__noreturn__))
+# elif defined _MSC_VER && 1200 <= _MSC_VER
+# define _Noreturn __declspec (noreturn)
+# else
+# define _Noreturn
+# endif
+#endif
+])
+
dnl checks for functions
AC_FUNC_VPRINTF
AC_FUNC_MKTIME
diff --git a/dfa.c b/dfa.c
index 64ce8f7..21cabc3 100644
--- a/dfa.c
+++ b/dfa.c
@@ -58,18 +58,25 @@
/* We can handle multibyte strings. */
#include <wchar.h>
#include <wctype.h>
-
-#if HAVE_LANGINFO_CODESET
-# include <langinfo.h>
-#endif
#endif
#ifdef GAWK
#define bool int
#define true (1)
#define false (0)
+
+/* The __pure__ attribute was added in gcc 2.96. */
+#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
+# define _GL_ATTRIBUTE_PURE __attribute__ ((__pure__))
+#else
+# define _GL_ATTRIBUTE_PURE /* empty */
+#endif
#endif /* GAWK */
+#if HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+#endif
+
#include "regex.h"
#include "dfa.h"
#include "xalloc.h"
@@ -112,15 +119,23 @@ typedef int charclass[CHARCLASS_INTS];
/* Convert a possibly-signed character to an unsigned character. This is
a bit safer than casting to unsigned char, since it catches some type
errors that the cast doesn't. */
-static inline unsigned char to_uchar (char ch) { return ch; }
+static inline unsigned char
+to_uchar (char ch)
+{
+ return ch;
+}
/* Contexts tell us whether a character is a newline or a word constituent.
Word-constituent characters are those that satisfy iswalnum(), plus '_'.
+ Each character has a single CTX_* value; bitmasks of CTX_* values denote
+ a particular character class.
- A state also stores a context value, which is nonzero if its
- predecessors always matches a newline or a word constituent.
- The definition of a state's context is a bit unclear, but will be
- modified soon anyway. */
+ A state also stores a context value, which is a bitmask of CTX_* values.
+ A state's context represents a set of characters that the state's
+ predecessors must match. For example, a state whose context does not
+ include CTX_LETTER will never have transitions where the previous
+ character is a word constituent. A state whose context is CTX_ANY
+ might have transitions from any character. */
#define CTX_NONE 1
#define CTX_LETTER 2
@@ -134,53 +149,55 @@ static inline unsigned char to_uchar (char ch) { return
ch; }
is set indicates that the constraint succeeds in the corresponding
context.
- bit 7 - previous and current are newlines
- bit 6 - previous was newline, current isn't
- bit 5 - previous wasn't newline, current is
- bit 4 - neither previous nor current is a newline
- bit 3 - previous and current are word-constituents
- bit 2 - previous was word-constituent, current isn't
- bit 1 - previous wasn't word-constituent, current is
- bit 0 - neither previous nor current is word-constituent
+ bit 8-11 - valid contexts when next character is CTX_NEWLINE
+ bit 4-7 - valid contexts when next character is CTX_LETTER
+ bit 0-3 - valid contexts when next character is CTX_NONE
The macro SUCCEEDS_IN_CONTEXT determines whether a given constraint
- succeeds in a particular context. Prev is the context value for
- the previous character, curr is the context value for the lookahead
- character. */
-#define MATCHES_NEWLINE_CONTEXT(constraint, prev, curr) \
- ((constraint) & \
- 1 << (((prev & CTX_NEWLINE) ? 2 : 0) + ((curr & CTX_NEWLINE) ? 1 : 0) + 4))
-#define MATCHES_LETTER_CONTEXT(constraint, prev, curr) \
- ((constraint) & \
- 1 << (((prev & CTX_LETTER) ? 2 : 0) + ((curr & CTX_LETTER) ? 1 : 0)))
+ succeeds in a particular context. Prev is a bitmask of possible
+ context values for the previous character, curr is the (single-bit)
+ context value for the lookahead character. */
+#define NEWLINE_CONSTRAINT(constraint) (((constraint) >> 8) & 0xf)
+#define LETTER_CONSTRAINT(constraint) (((constraint) >> 4) & 0xf)
+#define OTHER_CONSTRAINT(constraint) ((constraint) & 0xf)
+
#define SUCCEEDS_IN_CONTEXT(constraint, prev, curr) \
- (MATCHES_NEWLINE_CONTEXT(constraint, prev, curr) \
- && MATCHES_LETTER_CONTEXT(constraint, prev, curr))
+ ((((curr) & CTX_NONE ? OTHER_CONSTRAINT(constraint) : 0) \
+ | ((curr) & CTX_LETTER ? LETTER_CONSTRAINT(constraint) : 0) \
+ | ((curr) & CTX_NEWLINE ? NEWLINE_CONSTRAINT(constraint) : 0)) & (prev))
/* The following macros give information about what a constraint depends on. */
+#define PREV_NEWLINE_CONSTRAINT(constraint) (((constraint) >> 2) & 0x111)
+#define PREV_LETTER_CONSTRAINT(constraint) (((constraint) >> 1) & 0x111)
+#define PREV_OTHER_CONSTRAINT(constraint) ((constraint) & 0x111)
+
#define PREV_NEWLINE_DEPENDENT(constraint) \
- (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
+ (PREV_NEWLINE_CONSTRAINT (constraint) != PREV_OTHER_CONSTRAINT (constraint))
#define PREV_LETTER_DEPENDENT(constraint) \
- (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
+ (PREV_LETTER_CONSTRAINT (constraint) != PREV_OTHER_CONSTRAINT (constraint))
/* Tokens that match the empty string subject to some constraint actually
work by applying that constraint to determine what may follow them,
taking into account what has gone before. The following values are
the constraints corresponding to the special tokens previously defined. */
-#define NO_CONSTRAINT 0xff
-#define BEGLINE_CONSTRAINT 0xcf
-#define ENDLINE_CONSTRAINT 0xaf
-#define BEGWORD_CONSTRAINT 0xf2
-#define ENDWORD_CONSTRAINT 0xf4
-#define LIMWORD_CONSTRAINT 0xf6
-#define NOTLIMWORD_CONSTRAINT 0xf9
+#define NO_CONSTRAINT 0x777
+#define BEGLINE_CONSTRAINT 0x444
+#define ENDLINE_CONSTRAINT 0x700
+#define BEGWORD_CONSTRAINT 0x050
+#define ENDWORD_CONSTRAINT 0x202
+#define LIMWORD_CONSTRAINT 0x252
+#define NOTLIMWORD_CONSTRAINT 0x525
/* The regexp is parsed into an array of tokens in postfix form. Some tokens
are operators and others are terminal symbols. Most (but not all) of these
codes are returned by the lexical analyzer. */
-typedef enum
+
+typedef ptrdiff_t token;
+
+/* Predefined token values. */
+enum
{
- END = -1, /* END is a terminal symbol that matches the
+ END = -1, /* END is a terminal symbol that matches the
end of input; any value of END or less in
the parse tree is such a symbol. Accepting
states of the DFA are those that would have
@@ -188,83 +205,83 @@ typedef enum
/* Ordinary character values are terminal symbols that match themselves. */
- EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
+ EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
the empty string. */
- BACKREF, /* BACKREF is generated by \<digit>; it
+ BACKREF, /* BACKREF is generated by \<digit>; it
is not completely handled. If the scanner
detects a transition on backref, it returns
a kind of "semi-success" indicating that
the match will have to be verified with
a backtracking matcher. */
- BEGLINE, /* BEGLINE is a terminal symbol that matches
+ BEGLINE, /* BEGLINE is a terminal symbol that matches
the empty string if it is at the beginning
of a line. */
- ENDLINE, /* ENDLINE is a terminal symbol that matches
+ ENDLINE, /* ENDLINE is a terminal symbol that matches
the empty string if it is at the end of
a line. */
- BEGWORD, /* BEGWORD is a terminal symbol that matches
+ BEGWORD, /* BEGWORD is a terminal symbol that matches
the empty string if it is at the beginning
of a word. */
- ENDWORD, /* ENDWORD is a terminal symbol that matches
+ ENDWORD, /* ENDWORD is a terminal symbol that matches
the empty string if it is at the end of
a word. */
- LIMWORD, /* LIMWORD is a terminal symbol that matches
+ LIMWORD, /* LIMWORD is a terminal symbol that matches
the empty string if it is at the beginning
or the end of a word. */
- NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
+ NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
matches the empty string if it is not at
the beginning or end of a word. */
- QMARK, /* QMARK is an operator of one argument that
- matches zero or one occurences of its
+ QMARK, /* QMARK is an operator of one argument that
+ matches zero or one occurrences of its
argument. */
- STAR, /* STAR is an operator of one argument
that
+ STAR, /* STAR is an operator of one argument that
matches the Kleene closure (zero or more
occurrences) of its argument. */
- PLUS, /* PLUS is an operator of one argument
that
+ PLUS, /* PLUS is an operator of one argument that
matches the positive closure (one or more
occurrences) of its argument. */
- REPMN, /* REPMN is a lexical token corresponding
+ REPMN, /* REPMN is a lexical token corresponding
to the {m,n} construct. REPMN never
appears in the compiled token vector. */
- CAT, /* CAT is an operator of two arguments that
+ CAT, /* CAT is an operator of two arguments that
matches the concatenation of its
arguments. CAT is never returned by the
lexical analyzer. */
- OR, /* OR is an operator of two arguments that
+ OR, /* OR is an operator of two arguments that
matches either of its arguments. */
- LPAREN, /* LPAREN never appears in the parse tree,
+ LPAREN, /* LPAREN never appears in the parse tree,
it is only a lexeme. */
- RPAREN, /* RPAREN never appears in the parse tree. */
+ RPAREN, /* RPAREN never appears in the parse tree. */
- ANYCHAR, /* ANYCHAR is a terminal symbol that matches
- any multibyte (or single byte) characters.
- It is used only if MB_CUR_MAX > 1. */
+ ANYCHAR, /* ANYCHAR is a terminal symbol that matches
+ any multibyte (or single byte) characters.
+ It is used only if MB_CUR_MAX > 1. */
- MBCSET, /* MBCSET is similar to CSET, but for
+ MBCSET, /* MBCSET is similar to CSET, but for
multibyte characters. */
- WCHAR, /* Only returned by lex. wctok contains
+ WCHAR, /* Only returned by lex. wctok contains
the wide character representation. */
- CSET /* CSET and (and any value greater) is a
+ CSET /* CSET and (and any value greater) is a
terminal symbol that matches any of a
class of characters. */
-} token;
+};
/* States of the recognizer correspond to sets of positions in the parse
@@ -273,23 +290,23 @@ typedef enum
a constraint. */
typedef struct
{
- unsigned int index; /* Index into the parse array. */
- unsigned int constraint; /* Constraint for matching this position. */
+ size_t index; /* Index into the parse array. */
+ unsigned int constraint; /* Constraint for matching this position. */
} position;
/* Sets of positions are stored as arrays. */
typedef struct
{
- position *elems; /* Elements of this position set. */
- size_t nelem; /* Number of elements in this set. */
- size_t alloc; /* Number of elements allocated in
ELEMS. */
+ position *elems; /* Elements of this position set. */
+ size_t nelem; /* Number of elements in this set. */
+ size_t alloc; /* Number of elements allocated in ELEMS. */
} position_set;
/* Sets of leaves are also stored as arrays. */
typedef struct
{
- unsigned int *elems; /* Elements of this position set. */
- size_t nelem; /* Number of elements in this set. */
+ size_t *elems; /* Elements of this position set. */
+ size_t nelem; /* Number of elements in this set. */
} leaf_set;
/* A state of the dfa consists of a set of positions, some flags,
@@ -297,99 +314,103 @@ typedef struct
contains an END token. */
typedef struct
{
- int hash; /* Hash of the positions of this state. */
- position_set elems; /* Positions this state could match. */
- unsigned char context; /* Context from previous state. */
- char backref; /* True if this state matches a
\<digit>. */
- unsigned char constraint; /* Constraint for this state to accept. */
- int first_end; /* Token value of the first END in elems. */
- position_set mbps; /* Positions which can match multibyte
- characters. e.g. period.
- These staff are used only if
- MB_CUR_MAX > 1. */
+ size_t hash; /* Hash of the positions of this state. */
+ position_set elems; /* Positions this state could match. */
+ unsigned char context; /* Context from previous state. */
+ char backref; /* True if this state matches a \<digit>. */
+ unsigned short constraint; /* Constraint for this state to accept. */
+ token first_end; /* Token value of the first END in elems. */
+ position_set mbps; /* Positions which can match multibyte
+ characters. e.g. period.
+ These staff are used only if
+ MB_CUR_MAX > 1. */
} dfa_state;
+/* States are indexed by state_num values. These are normally
+ nonnegative but -1 is used as a special value. */
+typedef ptrdiff_t state_num;
+
/* A bracket operator.
e.g. [a-c], [[:alpha:]], etc. */
struct mb_char_classes
{
- int cset;
+ ptrdiff_t cset;
int invert;
- wchar_t *chars; /* Normal characters. */
- int nchars;
- wctype_t *ch_classes; /* Character classes. */
- int nch_classes;
- wchar_t *range_sts; /* Range characters (start of the range). */
- wchar_t *range_ends; /* Range characters (end of the range). */
- int nranges;
- char **equivs; /* Equivalent classes. */
- int nequivs;
+ wchar_t *chars; /* Normal characters. */
+ size_t nchars;
+ wctype_t *ch_classes; /* Character classes. */
+ size_t nch_classes;
+ wchar_t *range_sts; /* Range characters (start of the range). */
+ wchar_t *range_ends; /* Range characters (end of the range). */
+ size_t nranges;
+ char **equivs; /* Equivalent classes. */
+ size_t nequivs;
char **coll_elems;
- int ncoll_elems; /* Collating elements. */
+ size_t ncoll_elems; /* Collating elements. */
};
/* A compiled regular expression. */
struct dfa
{
/* Fields filled by the scanner. */
- charclass *charclasses; /* Array of character sets for CSET tokens. */
- int cindex; /* Index for adding new charclasses. */
- int calloc; /* Number of charclasses currently allocated. */
+ charclass *charclasses; /* Array of character sets for CSET tokens. */
+ size_t cindex; /* Index for adding new charclasses. */
+ size_t calloc; /* Number of charclasses currently allocated.
*/
/* Fields filled by the parser. */
- token *tokens; /* Postfix parse array. */
- int tindex; /* Index for adding new tokens. */
- int talloc; /* Number of tokens currently allocated. */
- int depth; /* Depth required of an evaluation stack
+ token *tokens; /* Postfix parse array. */
+ size_t tindex; /* Index for adding new tokens. */
+ size_t talloc; /* Number of tokens currently allocated. */
+ size_t depth; /* Depth required of an evaluation stack
used for depth-first traversal of the
parse tree. */
- int nleaves; /* Number of leaves on the parse tree. */
- int nregexps; /* Count of parallel regexps being built
+ size_t nleaves; /* Number of leaves on the parse tree. */
+ size_t nregexps; /* Count of parallel regexps being built
with dfaparse(). */
- unsigned int mb_cur_max; /* Cached value of MB_CUR_MAX. */
- int utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */
+ unsigned int mb_cur_max; /* Cached value of MB_CUR_MAX. */
+ token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales.
*/
/* The following are used only if MB_CUR_MAX > 1. */
/* The value of multibyte_prop[i] is defined by following rule.
- if tokens[i] < NOTCHAR
- bit 0 : tokens[i] is the first byte of a character, including
- single-byte characters.
- bit 1 : tokens[i] is the last byte of a character, including
- single-byte characters.
+ if tokens[i] < NOTCHAR
+ bit 0 : tokens[i] is the first byte of a character, including
+ single-byte characters.
+ bit 1 : tokens[i] is the last byte of a character, including
+ single-byte characters.
- if tokens[i] = MBCSET
- ("the index of mbcsets correspnd to this operator" << 2) + 3
+ if tokens[i] = MBCSET
+ ("the index of mbcsets corresponding to this operator" << 2) + 3
e.g.
tokens
- = 'single_byte_a', 'multi_byte_A', single_byte_b'
- = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b'
+ = 'single_byte_a', 'multi_byte_A', single_byte_b'
+ = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b'
multibyte_prop
- = 3 , 1 , 0 , 2 , 3
- */
- int nmultibyte_prop;
+ = 3 , 1 , 0 , 2 , 3
+ */
+ size_t nmultibyte_prop;
int *multibyte_prop;
/* Array of the bracket expression in the DFA. */
struct mb_char_classes *mbcsets;
- int nmbcsets;
- int mbcsets_alloc;
+ size_t nmbcsets;
+ size_t mbcsets_alloc;
/* Fields filled by the state builder. */
- dfa_state *states; /* States of the dfa. */
- int sindex; /* Index for adding new states. */
- int salloc; /* Number of states currently allocated. */
+ dfa_state *states; /* States of the dfa. */
+ state_num sindex; /* Index for adding new states. */
+ state_num salloc; /* Number of states currently allocated. */
/* Fields filled by the parse tree->NFA conversion. */
- position_set *follows; /* Array of follow sets, indexed by position
+ position_set *follows; /* Array of follow sets, indexed by position
index. The follow of a position is the set
of positions containing characters that
could conceivably follow a character
matching the given position in a string
matching the regexp. Allocated to the
maximum possible position index. */
- int searchflag; /* True if we are supposed to build a searching
+ int searchflag; /* True if we are supposed to build a searching
as opposed to an exact matcher. A searching
matcher finds the first and shortest string
matching a regexp anywhere in the buffer,
@@ -398,29 +419,29 @@ struct dfa
beginning of the buffer. */
/* Fields filled by dfaexec. */
- int tralloc; /* Number of transition tables that have
+ state_num tralloc; /* Number of transition tables that have
slots so far. */
- int trcount; /* Number of transition tables that have
+ int trcount; /* Number of transition tables that have
actually been built. */
- int **trans; /* Transition tables for states that can
+ state_num **trans; /* Transition tables for states that can
never accept. If the transitions for a
state have not yet been computed, or the
state could possibly accept, its entry in
this table is NULL. */
- int **realtrans; /* Trans always points to realtrans + 1; this
+ state_num **realtrans; /* Trans always points to realtrans + 1; this
is so trans[-1] can contain NULL. */
- int **fails; /* Transition tables after failing to accept
+ state_num **fails; /* Transition tables after failing to accept
on a state that potentially could do so. */
- int *success; /* Table of acceptance conditions used
in
+ int *success; /* Table of acceptance conditions used in
dfaexec and computed in build_state. */
- int *newlines; /* Transitions on newlines. The entry for a
+ state_num *newlines; /* Transitions on newlines. The entry for a
newline in any transition table is always
-1 so we can count lines without wasting
too many cycles. The transition for a
newline is stored separately and handled
as a special case. Newline is also used
as a sentinel at the end of the buffer. */
- struct dfamust *musts; /* List of strings, at least one of which
+ struct dfamust *musts; /* List of strings, at least one of which
is known to appear in any r.e. matching
the dfa. */
};
@@ -433,7 +454,7 @@ struct dfa
/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
specified context. */
#define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \
- SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint, prev, curr)
+ SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr)
static void dfamust (struct dfa *dfa);
static void regexp (void);
@@ -481,33 +502,72 @@ prtok (token t)
char const *s;
if (t < 0)
- fprintf(stderr, "END");
+ fprintf (stderr, "END");
else if (t < NOTCHAR)
- fprintf(stderr, "%c", t);
+ {
+ int ch = t;
+ fprintf (stderr, "%c", ch);
+ }
else
{
switch (t)
{
- case EMPTY: s = "EMPTY"; break;
- case BACKREF: s = "BACKREF"; break;
- case BEGLINE: s = "BEGLINE"; break;
- case ENDLINE: s = "ENDLINE"; break;
- case BEGWORD: s = "BEGWORD"; break;
- case ENDWORD: s = "ENDWORD"; break;
- case LIMWORD: s = "LIMWORD"; break;
- case NOTLIMWORD: s = "NOTLIMWORD"; break;
- case QMARK: s = "QMARK"; break;
- case STAR: s = "STAR"; break;
- case PLUS: s = "PLUS"; break;
- case CAT: s = "CAT"; break;
- case OR: s = "OR"; break;
- case LPAREN: s = "LPAREN"; break;
- case RPAREN: s = "RPAREN"; break;
- case ANYCHAR: s = "ANYCHAR"; break;
- case MBCSET: s = "MBCSET"; break;
- default: s = "CSET"; break;
+ case EMPTY:
+ s = "EMPTY";
+ break;
+ case BACKREF:
+ s = "BACKREF";
+ break;
+ case BEGLINE:
+ s = "BEGLINE";
+ break;
+ case ENDLINE:
+ s = "ENDLINE";
+ break;
+ case BEGWORD:
+ s = "BEGWORD";
+ break;
+ case ENDWORD:
+ s = "ENDWORD";
+ break;
+ case LIMWORD:
+ s = "LIMWORD";
+ break;
+ case NOTLIMWORD:
+ s = "NOTLIMWORD";
+ break;
+ case QMARK:
+ s = "QMARK";
+ break;
+ case STAR:
+ s = "STAR";
+ break;
+ case PLUS:
+ s = "PLUS";
+ break;
+ case CAT:
+ s = "CAT";
+ break;
+ case OR:
+ s = "OR";
+ break;
+ case LPAREN:
+ s = "LPAREN";
+ break;
+ case RPAREN:
+ s = "RPAREN";
+ break;
+ case ANYCHAR:
+ s = "ANYCHAR";
+ break;
+ case MBCSET:
+ s = "MBCSET";
+ break;
+ default:
+ s = "CSET";
+ break;
}
- fprintf(stderr, "%s", s);
+ fprintf (stderr, "%s", s);
}
}
#endif /* DEBUG */
@@ -563,17 +623,17 @@ equal (charclass const s1, charclass const s2)
static struct dfa *dfa;
/* Find the index of charclass s in dfa->charclasses, or allocate a new
charclass. */
-static int
+static size_t
charclass_index (charclass const s)
{
- int i;
+ size_t i;
for (i = 0; i < dfa->cindex; ++i)
- if (equal(s, dfa->charclasses[i]))
+ if (equal (s, dfa->charclasses[i]))
return i;
- REALLOC_IF_NECESSARY(dfa->charclasses, dfa->calloc, dfa->cindex + 1);
+ REALLOC_IF_NECESSARY (dfa->charclasses, dfa->calloc, dfa->cindex + 1);
++dfa->cindex;
- copyset(s, dfa->charclasses[i]);
+ copyset (s, dfa->charclasses[i]);
return i;
}
@@ -619,9 +679,9 @@ char_context (unsigned char c)
}
static int
-wchar_context(wint_t wc)
+wchar_context (wint_t wc)
{
- if (wc == (wchar_t)eolbyte || wc == 0)
+ if (wc == (wchar_t) eolbyte || wc == 0)
return CTX_NEWLINE;
if (wc == L'_' || iswalnum (wc))
return CTX_LETTER;
@@ -687,8 +747,7 @@ static inline bool
setbit_wc (wint_t wc, charclass c)
{
abort ();
- /*NOTREACHED*/
- return false;
+ /*NOTREACHED*/ return false;
}
#endif
@@ -741,38 +800,38 @@ using_utf8 (void)
reader is referred to the GNU Regex documentation for the
meaning of the @address@hidden@ syntax bits. */
-static char const *lexptr; /* Pointer to next input character. */
-static int lexleft; /* Number of characters remaining. */
-static token lasttok; /* Previous token returned; initially END. */
-static int laststart; /* True if we're separated from beginning or (,
|
+static char const *lexptr; /* Pointer to next input character. */
+static size_t lexleft; /* Number of characters remaining. */
+static token lasttok; /* Previous token returned; initially END. */
+static int laststart; /* True if we're separated from beginning or
(, |
only by zero-width characters. */
-static int parens; /* Count of outstanding left parens. */
-static int minrep, maxrep; /* Repeat counts for {m,n}. */
+static size_t parens; /* Count of outstanding left parens. */
+static int minrep, maxrep; /* Repeat counts for {m,n}. */
-static int cur_mb_len = 1; /* Length of the multibyte representation of
+static int cur_mb_len = 1; /* Length of the multibyte representation of
wctok. */
/* These variables are used only if (MB_CUR_MAX > 1). */
-static mbstate_t mbs; /* Mbstate for mbrlen(). */
-static wchar_t wctok; /* Wide character representation of the current
+static mbstate_t mbs; /* Mbstate for mbrlen(). */
+static wchar_t wctok; /* Wide character representation of the current
multibyte character. */
-static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec().
- Each element store the amount of remain
- byte of corresponding multibyte character
- in the input string. A element's value
- is 0 if corresponding character is a
- single byte chracter.
- e.g. input : 'a', <mb(0)>, <mb(1)>, <mb(2)>
- mblen_buf : 0, 3, 2, 1
- */
-static wchar_t *inputwcs; /* Wide character representation of input
+static unsigned char *mblen_buf; /* Correspond to the input buffer in
dfaexec().
+ Each element store the amount of
remain
+ byte of corresponding multibyte
character
+ in the input string. A element's
value
+ is 0 if corresponding character is a
+ single byte character.
+ e.g. input : 'a', <mb(0)>, <mb(1)>,
<mb(2)>
+ mblen_buf : 0, 3, 2,
1
+ */
+static wchar_t *inputwcs; /* Wide character representation of input
string in dfaexec().
The length of this array is same as
the length of input string(char array).
inputstring[i] is a single-byte char,
or 1st byte of a multibyte char.
And inputwcs[i] is the codepoint. */
-static unsigned char const *buf_begin; /* reference to begin in dfaexec(). */
-static unsigned char const *buf_end; /* reference to end in dfaexec(). */
+static unsigned char const *buf_begin; /* reference to begin in dfaexec(). */
+static unsigned char const *buf_end; /* reference to end in dfaexec(). */
#if MBS_SUPPORT
@@ -789,7 +848,7 @@ static unsigned char const *buf_end; /* reference to
end in dfaexec(). */
else \
{ \
wchar_t _wc; \
- cur_mb_len = mbrtowc(&_wc, lexptr, lexleft, &mbs); \
+ cur_mb_len = mbrtowc (&_wc, lexptr, lexleft, &mbs); \
if (cur_mb_len <= 0) \
{ \
cur_mb_len = 1; \
@@ -801,7 +860,7 @@ static unsigned char const *buf_end; /* reference to
end in dfaexec(). */
lexptr += cur_mb_len; \
lexleft -= cur_mb_len; \
(wc) = _wc; \
- (c) = wctob(wc); \
+ (c) = wctob (wc); \
} \
} \
} while(0)
@@ -809,8 +868,8 @@ static unsigned char const *buf_end; /* reference to
end in dfaexec(). */
# define FETCH(c, eoferr) \
do { \
wint_t wc; \
- FETCH_WC(c, wc, eoferr); \
- } while(0)
+ FETCH_WC (c, wc, eoferr); \
+ } while (0)
#else
/* Note that characters become unsigned here. */
@@ -831,34 +890,39 @@ static unsigned char const *buf_end; /* reference to
end in dfaexec(). */
#endif /* MBS_SUPPORT */
+#ifndef MIN
+# define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
typedef int predicate (int);
/* The following list maps the names of the Posix named character classes
to predicate functions that determine whether a given character is in
the class. The leading [ has already been eaten by the lexical analyzer. */
-struct dfa_ctype {
+struct dfa_ctype
+{
const char *name;
predicate *func;
bool single_byte_only;
};
static const struct dfa_ctype prednames[] = {
- { "alpha", isalpha, false },
- { "upper", isupper, false },
- { "lower", islower, false },
- { "digit", isdigit, true },
- { "xdigit", isxdigit, true },
- { "space", isspace, false },
- { "punct", ispunct, false },
- { "alnum", isalnum, false },
- { "print", isprint, false },
- { "graph", isgraph, false },
- { "cntrl", iscntrl, false },
- { "blank", is_blank, false },
- { NULL, NULL, false }
+ {"alpha", isalpha, false},
+ {"upper", isupper, false},
+ {"lower", islower, false},
+ {"digit", isdigit, true},
+ {"xdigit", isxdigit, true},
+ {"space", isspace, false},
+ {"punct", ispunct, false},
+ {"alnum", isalnum, false},
+ {"print", isprint, false},
+ {"graph", isgraph, false},
+ {"cntrl", iscntrl, false},
+ {"blank", is_blank, false},
+ {NULL, NULL, false}
};
-static const struct dfa_ctype * _GL_ATTRIBUTE_PURE
+static const struct dfa_ctype *_GL_ATTRIBUTE_PURE
find_pred (const char *str)
{
unsigned int i;
@@ -892,7 +956,7 @@ parse_bracket_exp (void)
/* Work area to build a mb_char_classes. */
struct mb_char_classes *work_mbc;
- int chars_al, range_sts_al, range_ends_al, ch_classes_al,
+ size_t chars_al, range_sts_al, range_ends_al, ch_classes_al,
equivs_al, coll_elems_al;
chars_al = 0;
@@ -900,7 +964,8 @@ parse_bracket_exp (void)
ch_classes_al = equivs_al = coll_elems_al = 0;
if (MB_CUR_MAX > 1)
{
- REALLOC_IF_NECESSARY(dfa->mbcsets, dfa->mbcsets_alloc, dfa->nmbcsets +
1);
+ REALLOC_IF_NECESSARY (dfa->mbcsets, dfa->mbcsets_alloc,
+ dfa->nmbcsets + 1);
/* dfa->multibyte_prop[] hold the index of dfa->mbcsets.
We will update dfa->multibyte_prop[] in addtok(), because we can't
@@ -926,7 +991,7 @@ parse_bracket_exp (void)
colon_warning_state = (c == ':');
do
{
- c1 = EOF; /* mark c1 is not initialized". */
+ c1 = EOF; /* mark c1 is not initialized". */
colon_warning_state &= ~2;
/* Note that if we're looking at some other [:...:] construct,
@@ -942,8 +1007,7 @@ parse_bracket_exp (void)
/* If pattern contains `[[:', `[[.', or `[[='. */
if (c1 == ':'
/* TODO: handle `[[.' and `[[=' also for MB_CUR_MAX == 1. */
- || (MB_CUR_MAX > 1 && (c1 == '.' || c1 == '='))
- )
+ || (MB_CUR_MAX > 1 && (c1 == '.' || c1 == '=')))
{
size_t len = 0;
for (;;)
@@ -965,51 +1029,48 @@ parse_bracket_exp (void)
/* build character class. */
{
char const *class
- = (case_fold && (STREQ (str, "upper")
- || STREQ (str, "lower"))
- ? "alpha"
- : str);
+ = (case_fold && (STREQ (str, "upper")
+ || STREQ (str, "lower")) ? "alpha" : str);
const struct dfa_ctype *pred = find_pred (class);
if (!pred)
- dfaerror(_("invalid character class"));
+ dfaerror (_("invalid character class"));
if (MB_CUR_MAX > 1 && !pred->single_byte_only)
{
/* Store the character class as wctype_t. */
wctype_t wt = wctype (class);
- REALLOC_IF_NECESSARY(work_mbc->ch_classes,
- ch_classes_al,
- work_mbc->nch_classes + 1);
+ REALLOC_IF_NECESSARY (work_mbc->ch_classes,
+ ch_classes_al,
+ work_mbc->nch_classes + 1);
work_mbc->ch_classes[work_mbc->nch_classes++] = wt;
}
for (c2 = 0; c2 < NOTCHAR; ++c2)
- if (pred->func(c2))
+ if (pred->func (c2))
setbit_case_fold_c (c2, ccl);
}
else if (MBS_SUPPORT && (c1 == '=' || c1 == '.'))
{
char *elem;
- MALLOC(elem, len + 1);
- strncpy(elem, str, len + 1);
+ MALLOC (elem, len + 1);
+ strncpy (elem, str, len + 1);
if (c1 == '=')
/* build equivalent class. */
{
- REALLOC_IF_NECESSARY(work_mbc->equivs,
- equivs_al,
- work_mbc->nequivs + 1);
+ REALLOC_IF_NECESSARY (work_mbc->equivs,
+ equivs_al, work_mbc->nequivs + 1);
work_mbc->equivs[work_mbc->nequivs++] = elem;
}
if (c1 == '.')
/* build collating element. */
{
- REALLOC_IF_NECESSARY(work_mbc->coll_elems,
- coll_elems_al,
- work_mbc->ncoll_elems + 1);
+ REALLOC_IF_NECESSARY (work_mbc->coll_elems,
+ coll_elems_al,
+ work_mbc->ncoll_elems + 1);
work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
}
}
@@ -1025,15 +1086,15 @@ parse_bracket_exp (void)
}
if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
- FETCH_WC(c, wc, _("unbalanced ["));
+ FETCH_WC (c, wc, _("unbalanced ["));
if (c1 == EOF)
- FETCH_WC(c1, wc1, _("unbalanced ["));
+ FETCH_WC (c1, wc1, _("unbalanced ["));
if (c1 == '-')
/* build range characters. */
{
- FETCH_WC(c2, wc2, _("unbalanced ["));
+ FETCH_WC (c2, wc2, _("unbalanced ["));
if (c2 == ']')
{
/* In the case [x-], the - is an ordinary hyphen,
@@ -1045,32 +1106,31 @@ parse_bracket_exp (void)
if (c1 == '-' && c2 != ']')
{
- if (c2 == '\\'
- && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
- FETCH_WC(c2, wc2, _("unbalanced ["));
+ if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ FETCH_WC (c2, wc2, _("unbalanced ["));
if (MB_CUR_MAX > 1)
{
/* When case folding map a range, say [m-z] (or even [M-z])
to the pair of ranges, [m-z] [M-Z]. */
- REALLOC_IF_NECESSARY(work_mbc->range_sts,
- range_sts_al, work_mbc->nranges + 1);
- REALLOC_IF_NECESSARY(work_mbc->range_ends,
- range_ends_al, work_mbc->nranges + 1);
+ REALLOC_IF_NECESSARY (work_mbc->range_sts,
+ range_sts_al, work_mbc->nranges + 1);
+ REALLOC_IF_NECESSARY (work_mbc->range_ends,
+ range_ends_al, work_mbc->nranges + 1);
work_mbc->range_sts[work_mbc->nranges] =
- case_fold ? towlower(wc) : (wchar_t)wc;
+ case_fold ? towlower (wc) : (wchar_t) wc;
work_mbc->range_ends[work_mbc->nranges++] =
- case_fold ? towlower(wc2) : (wchar_t)wc2;
+ case_fold ? towlower (wc2) : (wchar_t) wc2;
#ifndef GREP
- if (case_fold && (iswalpha(wc) || iswalpha(wc2)))
+ if (case_fold && (iswalpha (wc) || iswalpha (wc2)))
{
- REALLOC_IF_NECESSARY(work_mbc->range_sts,
- range_sts_al, work_mbc->nranges + 1);
- work_mbc->range_sts[work_mbc->nranges] = towupper(wc);
- REALLOC_IF_NECESSARY(work_mbc->range_ends,
- range_ends_al, work_mbc->nranges + 1);
- work_mbc->range_ends[work_mbc->nranges++] = towupper(wc2);
+ REALLOC_IF_NECESSARY (work_mbc->range_sts,
+ range_sts_al, work_mbc->nranges + 1);
+ work_mbc->range_sts[work_mbc->nranges] = towupper (wc);
+ REALLOC_IF_NECESSARY (work_mbc->range_ends,
+ range_ends_al, work_mbc->nranges + 1);
+ work_mbc->range_ends[work_mbc->nranges++] = towupper (wc2);
}
#endif
}
@@ -1087,7 +1147,7 @@ parse_bracket_exp (void)
}
colon_warning_state |= 8;
- FETCH_WC(c1, wc1, _("unbalanced ["));
+ FETCH_WC (c1, wc1, _("unbalanced ["));
continue;
}
@@ -1099,25 +1159,25 @@ parse_bracket_exp (void)
continue;
}
- if (case_fold && iswalpha(wc))
+ if (case_fold && iswalpha (wc))
{
- wc = towlower(wc);
+ wc = towlower (wc);
if (!setbit_wc (wc, ccl))
{
- REALLOC_IF_NECESSARY(work_mbc->chars, chars_al,
- work_mbc->nchars + 1);
+ REALLOC_IF_NECESSARY (work_mbc->chars, chars_al,
+ work_mbc->nchars + 1);
work_mbc->chars[work_mbc->nchars++] = wc;
}
#ifdef GREP
continue;
#else
- wc = towupper(wc);
+ wc = towupper (wc);
#endif
}
if (!setbit_wc (wc, ccl))
{
- REALLOC_IF_NECESSARY(work_mbc->chars, chars_al,
- work_mbc->nchars + 1);
+ REALLOC_IF_NECESSARY (work_mbc->chars, chars_al,
+ work_mbc->nchars + 1);
work_mbc->chars[work_mbc->nchars++] = wc;
}
}
@@ -1130,19 +1190,19 @@ parse_bracket_exp (void)
{
static charclass zeroclass;
work_mbc->invert = invert;
- work_mbc->cset = equal(ccl, zeroclass) ? -1 : charclass_index(ccl);
+ work_mbc->cset = equal (ccl, zeroclass) ? -1 : charclass_index (ccl);
return MBCSET;
}
if (invert)
{
- assert(MB_CUR_MAX == 1);
- notset(ccl);
+ assert (MB_CUR_MAX == 1);
+ notset (ccl);
if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
- clrbit(eolbyte, ccl);
+ clrbit (eolbyte, ccl);
}
- return CSET + charclass_index(ccl);
+ return CSET + charclass_index (ccl);
}
static token
@@ -1164,11 +1224,11 @@ lex (void)
if (MB_CUR_MAX > 1)
{
FETCH_WC (c, wctok, NULL);
- if ((int)c == EOF)
+ if ((int) c == EOF)
goto normal_char;
}
else
- FETCH(c, NULL);
+ FETCH (c, NULL);
switch (c)
{
@@ -1176,7 +1236,7 @@ lex (void)
if (backslash)
goto normal_char;
if (lexleft == 0)
- dfaerror(_("unfinished \\ escape"));
+ dfaerror (_("unfinished \\ escape"));
backslash = 1;
break;
@@ -1184,9 +1244,7 @@ lex (void)
if (backslash)
goto normal_char;
if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
- || lasttok == END
- || lasttok == LPAREN
- || lasttok == OR)
+ || lasttok == END || lasttok == LPAREN || lasttok == OR)
return lasttok = BEGLINE;
goto normal_char;
@@ -1224,12 +1282,12 @@ lex (void)
case '`':
if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
- return lasttok = BEGLINE; /* FIXME: should be beginning of string
*/
+ return lasttok = BEGLINE; /* FIXME: should be beginning of
string */
goto normal_char;
case '\'':
if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
- return lasttok = ENDLINE; /* FIXME: should be end of string */
+ return lasttok = ENDLINE; /* FIXME: should be end of string */
goto normal_char;
case '<':
@@ -1285,73 +1343,53 @@ lex (void)
if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
goto normal_char;
- if (syntax_bits & RE_NO_BK_BRACES)
- {
- /* Scan ahead for a valid interval; if it's not valid,
- treat it as a literal '{'. */
- int lo = -1, hi = -1;
- char const *p = lexptr;
- char const *lim = p + lexleft;
- for (; p != lim && ISASCIIDIGIT (*p); p++)
- lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
- if (p != lim && *p == ',')
- while (++p != lim && ISASCIIDIGIT (*p))
- hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
- else
- hi = lo;
- if (p == lim || *p != '}'
- || lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo))
- goto normal_char;
- }
-
- minrep = 0;
/* Cases:
{M} - exact count
{M,} - minimum count, maximum is infinity
+ {,N} - 0 through N
+ {,} - 0 to infinity (same as '*')
{M,N} - M through N */
- FETCH(c, _("unfinished repeat count"));
- if (ISASCIIDIGIT (c))
- {
- minrep = c - '0';
- for (;;)
- {
- FETCH(c, _("unfinished repeat count"));
- if (! ISASCIIDIGIT (c))
- break;
- minrep = 10 * minrep + c - '0';
- }
- }
- else
- dfaerror(_("malformed repeat count"));
- if (c == ',')
- {
- FETCH (c, _("unfinished repeat count"));
- if (! ISASCIIDIGIT (c))
- maxrep = -1;
- else
- {
- maxrep = c - '0';
- for (;;)
- {
- FETCH (c, _("unfinished repeat count"));
- if (! ISASCIIDIGIT (c))
- break;
- maxrep = 10 * maxrep + c - '0';
- }
- if (0 <= maxrep && maxrep < minrep)
- dfaerror (_("malformed repeat count"));
- }
- }
- else
- maxrep = minrep;
- if (!(syntax_bits & RE_NO_BK_BRACES))
- {
- if (c != '\\')
- dfaerror(_("malformed repeat count"));
- FETCH(c, _("unfinished repeat count"));
- }
- if (c != '}')
- dfaerror(_("malformed repeat count"));
+ {
+ char const *p = lexptr;
+ char const *lim = p + lexleft;
+ minrep = maxrep = -1;
+ for (; p != lim && ISASCIIDIGIT (*p); p++)
+ {
+ if (minrep < 0)
+ minrep = *p - '0';
+ else
+ minrep = MIN (RE_DUP_MAX + 1, minrep * 10 + *p - '0');
+ }
+ if (p != lim)
+ {
+ if (*p != ',')
+ maxrep = minrep;
+ else
+ {
+ if (minrep < 0)
+ minrep = 0;
+ while (++p != lim && ISASCIIDIGIT (*p))
+ {
+ if (maxrep < 0)
+ maxrep = *p - '0';
+ else
+ maxrep = MIN (RE_DUP_MAX + 1, maxrep * 10 + *p -
'0');
+ }
+ }
+ }
+ if (! ((! backslash || (p != lim && *p++ == '\\'))
+ && p != lim && *p++ == '}'
+ && 0 <= minrep && (maxrep < 0 || minrep <= maxrep)))
+ {
+ if (syntax_bits & RE_INVALID_INTERVAL_ORD)
+ goto normal_char;
+ dfaerror (_("Invalid content of \\{\\}"));
+ }
+ if (RE_DUP_MAX < maxrep)
+ dfaerror (_("Regular expression too big"));
+ lexptr = p;
+ lexleft = lim - p;
+ }
laststart = 0;
return lasttok = REPMN;
@@ -1365,8 +1403,7 @@ lex (void)
case '\n':
if (syntax_bits & RE_LIMITED_OPS
- || backslash
- || !(syntax_bits & RE_NEWLINE_ALT))
+ || backslash || !(syntax_bits & RE_NEWLINE_ALT))
goto normal_char;
laststart = 1;
return lasttok = OR;
@@ -1397,46 +1434,46 @@ lex (void)
laststart = 0;
return lasttok = ANYCHAR;
}
- zeroset(ccl);
- notset(ccl);
+ zeroset (ccl);
+ notset (ccl);
if (!(syntax_bits & RE_DOT_NEWLINE))
- clrbit(eolbyte, ccl);
+ clrbit (eolbyte, ccl);
if (syntax_bits & RE_DOT_NOT_NULL)
- clrbit('\0', ccl);
+ clrbit ('\0', ccl);
laststart = 0;
- return lasttok = CSET + charclass_index(ccl);
+ return lasttok = CSET + charclass_index (ccl);
case 's':
case 'S':
if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
goto normal_char;
- zeroset(ccl);
+ zeroset (ccl);
for (c2 = 0; c2 < NOTCHAR; ++c2)
- if (isspace(c2))
- setbit(c2, ccl);
+ if (isspace (c2))
+ setbit (c2, ccl);
if (c == 'S')
- notset(ccl);
+ notset (ccl);
laststart = 0;
- return lasttok = CSET + charclass_index(ccl);
+ return lasttok = CSET + charclass_index (ccl);
case 'w':
case 'W':
if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
goto normal_char;
- zeroset(ccl);
+ zeroset (ccl);
for (c2 = 0; c2 < NOTCHAR; ++c2)
- if (IS_WORD_CONSTITUENT(c2))
- setbit(c2, ccl);
+ if (IS_WORD_CONSTITUENT (c2))
+ setbit (c2, ccl);
if (c == 'W')
- notset(ccl);
+ notset (ccl);
laststart = 0;
- return lasttok = CSET + charclass_index(ccl);
+ return lasttok = CSET + charclass_index (ccl);
case '[':
if (backslash)
goto normal_char;
laststart = 0;
- return lasttok = parse_bracket_exp();
+ return lasttok = parse_bracket_exp ();
default:
normal_char:
@@ -1446,11 +1483,11 @@ lex (void)
if (MB_CUR_MAX > 1)
return lasttok = WCHAR;
- if (case_fold && isalpha(c))
+ if (case_fold && isalpha (c))
{
- zeroset(ccl);
+ zeroset (ccl);
setbit_case_fold_c (c, ccl);
- return lasttok = CSET + charclass_index(ccl);
+ return lasttok = CSET + charclass_index (ccl);
}
return lasttok = c;
@@ -1459,14 +1496,14 @@ lex (void)
/* The above loop should consume at most a backslash
and some other character. */
- abort();
- return END; /* keeps pedantic compilers happy. */
+ abort ();
+ return END; /* keeps pedantic compilers happy. */
}
/* Recursive descent parser for regular expressions. */
-static token tok; /* Lookahead token. */
-static int depth; /* Current depth of a hypothetical stack
+static token tok; /* Lookahead token. */
+static size_t depth; /* Current depth of a hypothetical stack
holding deferred productions. This is
used to determine the depth that will be
required of the real stack later on in
@@ -1477,12 +1514,12 @@ addtok_mb (token t, int mbprop)
{
if (MB_CUR_MAX > 1)
{
- REALLOC_IF_NECESSARY(dfa->multibyte_prop, dfa->nmultibyte_prop,
- dfa->tindex + 1);
+ REALLOC_IF_NECESSARY (dfa->multibyte_prop, dfa->nmultibyte_prop,
+ dfa->tindex + 1);
dfa->multibyte_prop[dfa->tindex] = mbprop;
}
- REALLOC_IF_NECESSARY(dfa->tokens, dfa->talloc, dfa->tindex + 1);
+ REALLOC_IF_NECESSARY (dfa->tokens, dfa->talloc, dfa->tindex + 1);
dfa->tokens[dfa->tindex++] = t;
switch (t)
@@ -1523,7 +1560,7 @@ addtok (token t)
This does not require UTF-8. */
if (!work_mbc->invert)
{
- int i;
+ size_t i;
for (i = 0; i < work_mbc->nchars; i++)
{
addtok_wc (work_mbc->chars[i]);
@@ -1536,12 +1573,11 @@ addtok (token t)
/* UTF-8 allows treating a simple, non-inverted MBCSET like a CSET. */
if (work_mbc->invert
- || (!using_utf8() && work_mbc->cset != -1)
+ || (!using_utf8 () && work_mbc->cset != -1)
|| work_mbc->nchars != 0
|| work_mbc->nch_classes != 0
|| work_mbc->nranges != 0
- || work_mbc->nequivs != 0
- || work_mbc->ncoll_elems != 0)
+ || work_mbc->nequivs != 0 || work_mbc->ncoll_elems != 0)
{
addtok_mb (MBCSET, ((dfa->nmbcsets - 1) << 2) + 3);
if (need_or)
@@ -1588,15 +1624,18 @@ addtok_wc (wint_t wc)
if (cur_mb_len <= 0)
buf[0] = 0;
- addtok_mb(buf[0], cur_mb_len == 1 ? 3 : 1);
+ addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1);
for (i = 1; i < cur_mb_len; i++)
{
- addtok_mb(buf[i], i == cur_mb_len - 1 ? 2 : 0);
- addtok(CAT);
+ addtok_mb (buf[i], i == cur_mb_len - 1 ? 2 : 0);
+ addtok (CAT);
}
}
#else
-static void addtok_wc (wint_t wc) {}
+static void
+addtok_wc (wint_t wc)
+{
+}
#endif
static void
@@ -1604,11 +1643,11 @@ add_utf8_anychar (void)
{
#if MBS_SUPPORT
static const charclass utf8_classes[5] = {
- { 0, 0, 0, 0, ~0, ~0, 0, 0 }, /* 80-bf: non-lead bytes */
- { ~0, ~0, ~0, ~0, 0, 0, 0, 0 }, /* 00-7f: 1-byte sequence */
- { 0, 0, 0, 0, 0, 0, 0xfffffffcU, 0 }, /* c2-df: 2-byte sequence */
- { 0, 0, 0, 0, 0, 0, 0, 0xffff }, /* e0-ef: 3-byte sequence */
- { 0, 0, 0, 0, 0, 0, 0, 0xff0000 } /* f0-f7: 4-byte sequence */
+ {0, 0, 0, 0, ~0, ~0, 0, 0}, /* 80-bf: non-lead bytes */
+ {~0, ~0, ~0, ~0, 0, 0, 0, 0}, /* 00-7f: 1-byte sequence */
+ {0, 0, 0, 0, 0, 0, 0xfffffffcU, 0}, /* c2-df: 2-byte sequence */
+ {0, 0, 0, 0, 0, 0, 0, 0xffff}, /* e0-ef: 3-byte sequence */
+ {0, 0, 0, 0, 0, 0, 0, 0xff0000} /* f0-f7: 4-byte sequence */
};
const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]);
unsigned int i;
@@ -1626,15 +1665,15 @@ add_utf8_anychar (void)
if (syntax_bits & RE_DOT_NOT_NULL)
clrbit ('\0', c);
}
- dfa->utf8_anychar_classes[i] = CSET + charclass_index(c);
+ dfa->utf8_anychar_classes[i] = CSET + charclass_index (c);
}
/* A valid UTF-8 character is
- ([0x00-0x7f]
- |[0xc2-0xdf][0x80-0xbf]
- |[0xe0-0xef[0x80-0xbf][0x80-0xbf]
- |[0xf0-f7][0x80-0xbf][0x80-0xbf][0x80-0xbf])
+ ([0x00-0x7f]
+ |[0xc2-0xdf][0x80-0xbf]
+ |[0xe0-0xef[0x80-0xbf][0x80-0xbf]
+ |[0xf0-f7][0x80-0xbf][0x80-0xbf][0x80-0xbf])
which I'll write more concisely "B|CA|DAA|EAAA". Factor the [0x00-0x7f]
and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse
@@ -1694,28 +1733,28 @@ atom (void)
}
else if (MBS_SUPPORT && tok == WCHAR)
{
- addtok_wc (case_fold ? towlower(wctok) : wctok);
+ addtok_wc (case_fold ? towlower (wctok) : wctok);
#ifndef GREP
- if (case_fold && iswalpha(wctok))
+ if (case_fold && iswalpha (wctok))
{
- addtok_wc (towupper(wctok));
+ addtok_wc (towupper (wctok));
addtok (OR);
}
#endif
- tok = lex();
+ tok = lex ();
}
- else if (MBS_SUPPORT && tok == ANYCHAR && using_utf8())
+ else if (MBS_SUPPORT && tok == ANYCHAR && using_utf8 ())
{
/* For UTF-8 expand the period to a series of CSETs that define a valid
UTF-8 character. This avoids using the slow multibyte path. I'm
pretty sure it would be both profitable and correct to do it for
any encoding; however, the optimization must be done manually as
- it is done above in add_utf8_anychar. So, let's start with
+ it is done above in add_utf8_anychar. So, let's start with
UTF-8: it is the most used, and the structure of the encoding
makes the correctness more obvious. */
- add_utf8_anychar();
- tok = lex();
+ add_utf8_anychar ();
+ tok = lex ();
}
else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|| tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
@@ -1724,26 +1763,26 @@ atom (void)
#endif /* MBS_SUPPORT */
|| tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
{
- addtok(tok);
- tok = lex();
+ addtok (tok);
+ tok = lex ();
}
else if (tok == LPAREN)
{
- tok = lex();
- regexp();
+ tok = lex ();
+ regexp ();
if (tok != RPAREN)
- dfaerror(_("unbalanced ("));
- tok = lex();
+ dfaerror (_("unbalanced ("));
+ tok = lex ();
}
else
- addtok(EMPTY);
+ addtok (EMPTY);
}
/* Return the number of tokens in the given subexpression. */
-static int _GL_ATTRIBUTE_PURE
-nsubtoks (int tindex)
+static size_t _GL_ATTRIBUTE_PURE
+nsubtoks (size_t tindex)
{
- int ntoks1;
+ size_t ntoks1;
switch (dfa->tokens[tindex - 1])
{
@@ -1752,23 +1791,23 @@ nsubtoks (int tindex)
case QMARK:
case STAR:
case PLUS:
- return 1 + nsubtoks(tindex - 1);
+ return 1 + nsubtoks (tindex - 1);
case CAT:
case OR:
- ntoks1 = nsubtoks(tindex - 1);
- return 1 + ntoks1 + nsubtoks(tindex - 1 - ntoks1);
+ ntoks1 = nsubtoks (tindex - 1);
+ return 1 + ntoks1 + nsubtoks (tindex - 1 - ntoks1);
}
}
/* Copy the given subexpression to the top of the tree. */
static void
-copytoks (int tindex, int ntokens)
+copytoks (size_t tindex, size_t ntokens)
{
- int i;
+ size_t i;
for (i = 0; i < ntokens; ++i)
{
- addtok(dfa->tokens[tindex + i]);
+ addtok (dfa->tokens[tindex + i]);
/* Update index into multibyte csets. */
if (MB_CUR_MAX > 1 && dfa->tokens[tindex + i] == MBCSET)
dfa->multibyte_prop[dfa->tindex - 1] = dfa->multibyte_prop[tindex + i];
@@ -1778,64 +1817,65 @@ copytoks (int tindex, int ntokens)
static void
closure (void)
{
- int tindex, ntokens, i;
+ int i;
+ size_t tindex, ntokens;
- atom();
+ atom ();
while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
if (tok == REPMN && (minrep || maxrep))
{
- ntokens = nsubtoks(dfa->tindex);
+ ntokens = nsubtoks (dfa->tindex);
tindex = dfa->tindex - ntokens;
if (maxrep < 0)
- addtok(PLUS);
+ addtok (PLUS);
if (minrep == 0)
- addtok(QMARK);
+ addtok (QMARK);
for (i = 1; i < minrep; ++i)
{
- copytoks(tindex, ntokens);
- addtok(CAT);
+ copytoks (tindex, ntokens);
+ addtok (CAT);
}
for (; i < maxrep; ++i)
{
- copytoks(tindex, ntokens);
- addtok(QMARK);
- addtok(CAT);
+ copytoks (tindex, ntokens);
+ addtok (QMARK);
+ addtok (CAT);
}
- tok = lex();
+ tok = lex ();
}
else if (tok == REPMN)
{
- dfa->tindex -= nsubtoks(dfa->tindex);
- tok = lex();
- closure();
+ dfa->tindex -= nsubtoks (dfa->tindex);
+ tok = lex ();
+ closure ();
}
else
{
- addtok(tok);
- tok = lex();
+ addtok (tok);
+ tok = lex ();
}
}
static void
branch (void)
{
- closure();
+ closure ();
while (tok != RPAREN && tok != OR && tok >= 0)
{
- closure();
- addtok(CAT);
+ closure ();
+ addtok (CAT);
}
}
static void
regexp (void)
{
- branch();
+ branch ();
while (tok == OR)
{
- tok = lex();
- branch();
- addtok(OR);
+ tok = lex ();
+ branch ();
+ addtok (OR);
}
}
@@ -1854,25 +1894,25 @@ dfaparse (char const *s, size_t len, struct dfa *d)
if (MB_CUR_MAX > 1)
{
cur_mb_len = 0;
- memset(&mbs, 0, sizeof mbs);
+ memset (&mbs, 0, sizeof mbs);
}
- if (! syntax_bits_set)
- dfaerror(_("no syntax specified"));
+ if (!syntax_bits_set)
+ dfaerror (_("no syntax specified"));
- tok = lex();
+ tok = lex ();
depth = d->depth;
- regexp();
+ regexp ();
if (tok != END)
- dfaerror(_("unbalanced )"));
+ dfaerror (_("unbalanced )"));
- addtok(END - d->nregexps);
- addtok(CAT);
+ addtok (END - d->nregexps);
+ addtok (CAT);
if (d->nregexps)
- addtok(OR);
+ addtok (OR);
++d->nregexps;
}
@@ -1881,17 +1921,17 @@ dfaparse (char const *s, size_t len, struct dfa *d)
/* Copy one set to another; the destination must be large enough. */
static void
-copy (position_set const *src, position_set *dst)
+copy (position_set const *src, position_set * dst)
{
- REALLOC_IF_NECESSARY(dst->elems, dst->alloc, src->nelem);
- memcpy(dst->elems, src->elems, sizeof(dst->elems[0]) * src->nelem);
+ REALLOC_IF_NECESSARY (dst->elems, dst->alloc, src->nelem);
+ memcpy (dst->elems, src->elems, sizeof (dst->elems[0]) * src->nelem);
dst->nelem = src->nelem;
}
static void
-alloc_position_set (position_set *s, size_t size)
+alloc_position_set (position_set * s, size_t size)
{
- MALLOC(s->elems, size);
+ MALLOC (s->elems, size);
s->alloc = size;
s->nelem = 0;
}
@@ -1901,14 +1941,14 @@ alloc_position_set (position_set *s, size_t size)
then merge (logically-OR) P's constraints into the one in S.
S->elems must point to an array large enough to hold the resulting set. */
static void
-insert (position p, position_set *s)
+insert (position p, position_set * s)
{
- int count = s->nelem;
- int lo = 0, hi = count;
- int i;
+ size_t count = s->nelem;
+ size_t lo = 0, hi = count;
+ size_t i;
while (lo < hi)
{
- int mid = ((unsigned) lo + (unsigned) hi) >> 1;
+ size_t mid = (lo + hi) >> 1;
if (s->elems[mid].index > p.index)
lo = mid + 1;
else
@@ -1921,7 +1961,7 @@ insert (position p, position_set *s)
return;
}
- REALLOC_IF_NECESSARY(s->elems, s->alloc, count + 1);
+ REALLOC_IF_NECESSARY (s->elems, s->alloc, count + 1);
for (i = count; i > lo; i--)
s->elems[i] = s->elems[i - 1];
s->elems[lo] = p;
@@ -1931,11 +1971,11 @@ insert (position p, position_set *s)
/* Merge two sets of positions into a third. The result is exactly as if
the positions of both sets were inserted into an initially empty set. */
static void
-merge (position_set const *s1, position_set const *s2, position_set *m)
+merge (position_set const *s1, position_set const *s2, position_set * m)
{
- int i = 0, j = 0;
+ size_t i = 0, j = 0;
- REALLOC_IF_NECESSARY(m->elems, m->alloc, s1->nelem + s2->nelem);
+ REALLOC_IF_NECESSARY (m->elems, m->alloc, s1->nelem + s2->nelem);
m->nelem = 0;
while (i < s1->nelem && j < s2->nelem)
if (s1->elems[i].index > s2->elems[j].index)
@@ -1955,9 +1995,9 @@ merge (position_set const *s1, position_set const *s2,
position_set *m)
/* Delete a position from a set. */
static void
-delete (position p, position_set *s)
+delete (position p, position_set * s)
{
- int i;
+ size_t i;
for (i = 0; i < s->nelem; ++i)
if (p.index == s->elems[i].index)
@@ -1970,14 +2010,13 @@ delete (position p, position_set *s)
/* Find the index of the state corresponding to the given position set with
the given preceding context, or create a new state if there is no such
state. Context tells whether we got here on a newline or letter. */
-static int
+static state_num
state_index (struct dfa *d, position_set const *s, int context)
{
- int hash = 0;
+ size_t hash = 0;
int constraint;
- int i, j;
+ state_num i, j;
- context &= ~CTX_NONE;
for (i = 0; i < s->nelem; ++i)
hash ^= s->elems[i].index + s->elems[i].constraint;
@@ -1997,10 +2036,10 @@ state_index (struct dfa *d, position_set const *s, int
context)
}
/* We'll have to create a new state. */
- REALLOC_IF_NECESSARY(d->states, d->salloc, d->sindex + 1);
+ REALLOC_IF_NECESSARY (d->states, d->salloc, d->sindex + 1);
d->states[i].hash = hash;
- alloc_position_set(&d->states[i].elems, s->nelem);
- copy(s, &d->states[i].elems);
+ alloc_position_set (&d->states[i].elems, s->nelem);
+ copy (s, &d->states[i].elems);
d->states[i].context = context;
d->states[i].backref = 0;
d->states[i].constraint = 0;
@@ -2014,11 +2053,9 @@ state_index (struct dfa *d, position_set const *s, int
context)
if (d->tokens[s->elems[j].index] < 0)
{
constraint = s->elems[j].constraint;
- if (SUCCEEDS_IN_CONTEXT(constraint, context, CTX_NONE)
- || SUCCEEDS_IN_CONTEXT(constraint, context, CTX_NEWLINE)
- || SUCCEEDS_IN_CONTEXT(constraint, context, CTX_LETTER))
+ if (SUCCEEDS_IN_CONTEXT (constraint, context, CTX_ANY))
d->states[i].constraint |= constraint;
- if (! d->states[i].first_end)
+ if (!d->states[i].first_end)
d->states[i].first_end = d->tokens[s->elems[j].index];
}
else if (d->tokens[s->elems[j].index] == BACKREF)
@@ -2038,13 +2075,13 @@ state_index (struct dfa *d, position_set const *s, int
context)
constraint. Repeat exhaustively until no funny positions are left.
S->elems must be large enough to hold the result. */
static void
-epsclosure (position_set *s, struct dfa const *d)
+epsclosure (position_set * s, struct dfa const *d)
{
- int i, j;
- char *visited; /* array of booleans, enough to use char, not int */
+ size_t i, j;
+ char *visited; /* array of booleans, enough to use char, not
int */
position p, old;
- CALLOC(visited, d->tindex);
+ CALLOC (visited, d->tindex);
for (i = 0; i < s->nelem; ++i)
if (d->tokens[s->elems[i].index] >= NOTCHAR
@@ -2057,7 +2094,7 @@ epsclosure (position_set *s, struct dfa const *d)
{
old = s->elems[i];
p.constraint = old.constraint;
- delete(s->elems[i], s);
+ delete (s->elems[i], s);
if (visited[old.index])
{
--i;
@@ -2090,25 +2127,25 @@ epsclosure (position_set *s, struct dfa const *d)
for (j = 0; j < d->follows[old.index].nelem; ++j)
{
p.index = d->follows[old.index].elems[j].index;
- insert(p, s);
+ insert (p, s);
}
/* Force rescan to start at the beginning. */
i = -1;
}
- free(visited);
+ free (visited);
}
/* Returns the set of contexts for which there is at least one
character included in C. */
static int
-charclass_context(charclass c)
+charclass_context (charclass c)
{
int context = 0;
unsigned int j;
- if (tstbit(eolbyte, c))
+ if (tstbit (eolbyte, c))
context |= CTX_NEWLINE;
for (j = 0; j < CHARCLASS_INTS; ++j)
@@ -2122,29 +2159,27 @@ charclass_context(charclass c)
return context;
}
-/* Returns the subset of POSSIBLE_CONTEXTS on which the position set S
- depends. Each context in the set of returned contexts (let's call it
- SC) may have a different follow set than other contexts in SC, and
- also different from the follow set of the complement set. However,
- all contexts in the complement set will have the same follow set. */
+/* Returns the contexts on which the position set S depends. Each context
+ in the set of returned contexts (let's call it SC) may have a different
+ follow set than other contexts in SC, and also different from the
+ follow set of the complement set (sc ^ CTX_ANY). However, all contexts
+ in the complement set will have the same follow set. */
static int _GL_ATTRIBUTE_PURE
-state_separate_contexts (position_set *s, int possible_contexts)
+state_separate_contexts (position_set const *s)
{
- int separate_context = 0;
- unsigned int j;
+ int separate_contexts = 0;
+ size_t j;
for (j = 0; j < s->nelem; ++j)
{
- if ((possible_contexts & CTX_NEWLINE)
- && PREV_NEWLINE_DEPENDENT(s->elems[j].constraint))
- separate_context |= CTX_NEWLINE;
- if ((possible_contexts & CTX_LETTER)
- && PREV_LETTER_DEPENDENT(s->elems[j].constraint))
- separate_context |= CTX_LETTER;
+ if (PREV_NEWLINE_DEPENDENT (s->elems[j].constraint))
+ separate_contexts |= CTX_NEWLINE;
+ if (PREV_LETTER_DEPENDENT (s->elems[j].constraint))
+ separate_contexts |= CTX_LETTER;
}
- return separate_context;
+ return separate_contexts;
}
@@ -2203,167 +2238,167 @@ state_separate_contexts (position_set *s, int
possible_contexts)
void
dfaanalyze (struct dfa *d, int searchflag)
{
- int *nullable; /* Nullable stack. */
- int *nfirstpos; /* Element count stack for firstpos sets. */
- position *firstpos; /* Array where firstpos elements are stored. */
- int *nlastpos; /* Element count stack for lastpos sets. */
- position *lastpos; /* Array where lastpos elements are stored. */
- position_set tmp; /* Temporary set for merging sets. */
- position_set merged; /* Result of merging sets. */
- int separate_contexts; /* Context wanted by some position. */
+ int *nullable; /* Nullable stack. */
+ size_t *nfirstpos; /* Element count stack for firstpos sets. */
+ position *firstpos; /* Array where firstpos elements are stored. */
+ size_t *nlastpos; /* Element count stack for lastpos sets. */
+ position *lastpos; /* Array where lastpos elements are stored. */
+ position_set tmp; /* Temporary set for merging sets. */
+ position_set merged; /* Result of merging sets. */
+ int separate_contexts; /* Context wanted by some position. */
int *o_nullable;
- int *o_nfirst, *o_nlast;
+ size_t *o_nfirst, *o_nlast;
position *o_firstpos, *o_lastpos;
- int i, j;
+ size_t i, j;
position *pos;
#ifdef DEBUG
- fprintf(stderr, "dfaanalyze:\n");
+ fprintf (stderr, "dfaanalyze:\n");
for (i = 0; i < d->tindex; ++i)
{
- fprintf(stderr, " %d:", i);
- prtok(d->tokens[i]);
+ fprintf (stderr, " %zd:", i);
+ prtok (d->tokens[i]);
}
- putc('\n', stderr);
+ putc ('\n', stderr);
#endif
d->searchflag = searchflag;
- MALLOC(nullable, d->depth);
+ MALLOC (nullable, d->depth);
o_nullable = nullable;
- MALLOC(nfirstpos, d->depth);
+ MALLOC (nfirstpos, d->depth);
o_nfirst = nfirstpos;
- MALLOC(firstpos, d->nleaves);
+ MALLOC (firstpos, d->nleaves);
o_firstpos = firstpos, firstpos += d->nleaves;
- MALLOC(nlastpos, d->depth);
+ MALLOC (nlastpos, d->depth);
o_nlast = nlastpos;
- MALLOC(lastpos, d->nleaves);
+ MALLOC (lastpos, d->nleaves);
o_lastpos = lastpos, lastpos += d->nleaves;
- alloc_position_set(&merged, d->nleaves);
+ alloc_position_set (&merged, d->nleaves);
- CALLOC(d->follows, d->tindex);
+ CALLOC (d->follows, d->tindex);
for (i = 0; i < d->tindex; ++i)
{
- switch (d->tokens[i])
- {
- case EMPTY:
- /* The empty set is nullable. */
- *nullable++ = 1;
+ switch (d->tokens[i])
+ {
+ case EMPTY:
+ /* The empty set is nullable. */
+ *nullable++ = 1;
- /* The firstpos and lastpos of the empty leaf are both empty. */
- *nfirstpos++ = *nlastpos++ = 0;
- break;
+ /* The firstpos and lastpos of the empty leaf are both empty. */
+ *nfirstpos++ = *nlastpos++ = 0;
+ break;
- case STAR:
- case PLUS:
- /* Every element in the firstpos of the argument is in the follow
- of every element in the lastpos. */
- tmp.nelem = nfirstpos[-1];
- tmp.elems = firstpos;
- pos = lastpos;
- for (j = 0; j < nlastpos[-1]; ++j)
- {
- merge(&tmp, &d->follows[pos[j].index], &merged);
- copy(&merged, &d->follows[pos[j].index]);
- }
+ case STAR:
+ case PLUS:
+ /* Every element in the firstpos of the argument is in the follow
+ of every element in the lastpos. */
+ tmp.nelem = nfirstpos[-1];
+ tmp.elems = firstpos;
+ pos = lastpos;
+ for (j = 0; j < nlastpos[-1]; ++j)
+ {
+ merge (&tmp, &d->follows[pos[j].index], &merged);
+ copy (&merged, &d->follows[pos[j].index]);
+ }
- case QMARK:
- /* A QMARK or STAR node is automatically nullable. */
- if (d->tokens[i] != PLUS)
- nullable[-1] = 1;
- break;
+ case QMARK:
+ /* A QMARK or STAR node is automatically nullable. */
+ if (d->tokens[i] != PLUS)
+ nullable[-1] = 1;
+ break;
- case CAT:
- /* Every element in the firstpos of the second argument is in the
- follow of every element in the lastpos of the first argument. */
- tmp.nelem = nfirstpos[-1];
- tmp.elems = firstpos;
- pos = lastpos + nlastpos[-1];
- for (j = 0; j < nlastpos[-2]; ++j)
- {
- merge(&tmp, &d->follows[pos[j].index], &merged);
- copy(&merged, &d->follows[pos[j].index]);
- }
+ case CAT:
+ /* Every element in the firstpos of the second argument is in the
+ follow of every element in the lastpos of the first argument. */
+ tmp.nelem = nfirstpos[-1];
+ tmp.elems = firstpos;
+ pos = lastpos + nlastpos[-1];
+ for (j = 0; j < nlastpos[-2]; ++j)
+ {
+ merge (&tmp, &d->follows[pos[j].index], &merged);
+ copy (&merged, &d->follows[pos[j].index]);
+ }
- /* The firstpos of a CAT node is the firstpos of the first argument,
- union that of the second argument if the first is nullable. */
- if (nullable[-2])
- nfirstpos[-2] += nfirstpos[-1];
- else
- firstpos += nfirstpos[-1];
- --nfirstpos;
+ /* The firstpos of a CAT node is the firstpos of the first argument,
+ union that of the second argument if the first is nullable. */
+ if (nullable[-2])
+ nfirstpos[-2] += nfirstpos[-1];
+ else
+ firstpos += nfirstpos[-1];
+ --nfirstpos;
- /* The lastpos of a CAT node is the lastpos of the second argument,
- union that of the first argument if the second is nullable. */
- if (nullable[-1])
- nlastpos[-2] += nlastpos[-1];
- else
- {
- pos = lastpos + nlastpos[-2];
- for (j = nlastpos[-1] - 1; j >= 0; --j)
- pos[j] = lastpos[j];
- lastpos += nlastpos[-2];
- nlastpos[-2] = nlastpos[-1];
- }
- --nlastpos;
+ /* The lastpos of a CAT node is the lastpos of the second argument,
+ union that of the first argument if the second is nullable. */
+ if (nullable[-1])
+ nlastpos[-2] += nlastpos[-1];
+ else
+ {
+ pos = lastpos + nlastpos[-2];
+ for (j = nlastpos[-1]; j-- > 0;)
+ pos[j] = lastpos[j];
+ lastpos += nlastpos[-2];
+ nlastpos[-2] = nlastpos[-1];
+ }
+ --nlastpos;
- /* A CAT node is nullable if both arguments are nullable. */
- nullable[-2] = nullable[-1] && nullable[-2];
- --nullable;
- break;
+ /* A CAT node is nullable if both arguments are nullable. */
+ nullable[-2] = nullable[-1] && nullable[-2];
+ --nullable;
+ break;
- case OR:
- /* The firstpos is the union of the firstpos of each argument. */
- nfirstpos[-2] += nfirstpos[-1];
- --nfirstpos;
+ case OR:
+ /* The firstpos is the union of the firstpos of each argument. */
+ nfirstpos[-2] += nfirstpos[-1];
+ --nfirstpos;
- /* The lastpos is the union of the lastpos of each argument. */
- nlastpos[-2] += nlastpos[-1];
- --nlastpos;
+ /* The lastpos is the union of the lastpos of each argument. */
+ nlastpos[-2] += nlastpos[-1];
+ --nlastpos;
- /* An OR node is nullable if either argument is nullable. */
- nullable[-2] = nullable[-1] || nullable[-2];
- --nullable;
- break;
+ /* An OR node is nullable if either argument is nullable. */
+ nullable[-2] = nullable[-1] || nullable[-2];
+ --nullable;
+ break;
- default:
- /* Anything else is a nonempty position. (Note that special
- constructs like \< are treated as nonempty strings here;
- an "epsilon closure" effectively makes them nullable later.
- Backreferences have to get a real position so we can detect
- transitions on them later. But they are nullable. */
- *nullable++ = d->tokens[i] == BACKREF;
-
- /* This position is in its own firstpos and lastpos. */
- *nfirstpos++ = *nlastpos++ = 1;
- --firstpos, --lastpos;
- firstpos->index = lastpos->index = i;
- firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
-
- /* Allocate the follow set for this position. */
- alloc_position_set(&d->follows[i], 1);
- break;
- }
+ default:
+ /* Anything else is a nonempty position. (Note that special
+ constructs like \< are treated as nonempty strings here;
+ an "epsilon closure" effectively makes them nullable later.
+ Backreferences have to get a real position so we can detect
+ transitions on them later. But they are nullable. */
+ *nullable++ = d->tokens[i] == BACKREF;
+
+ /* This position is in its own firstpos and lastpos. */
+ *nfirstpos++ = *nlastpos++ = 1;
+ --firstpos, --lastpos;
+ firstpos->index = lastpos->index = i;
+ firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
+
+ /* Allocate the follow set for this position. */
+ alloc_position_set (&d->follows[i], 1);
+ break;
+ }
#ifdef DEBUG
- /* ... balance the above nonsyntactic #ifdef goo... */
- fprintf(stderr, "node %d:", i);
- prtok(d->tokens[i]);
- putc('\n', stderr);
- fprintf(stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
- fprintf(stderr, " firstpos:");
- for (j = nfirstpos[-1] - 1; j >= 0; --j)
+ /* ... balance the above nonsyntactic #ifdef goo... */
+ fprintf (stderr, "node %zd:", i);
+ prtok (d->tokens[i]);
+ putc ('\n', stderr);
+ fprintf (stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
+ fprintf (stderr, " firstpos:");
+ for (j = nfirstpos[-1]; j-- > 0;)
{
- fprintf(stderr, " %d:", firstpos[j].index);
- prtok(d->tokens[firstpos[j].index]);
+ fprintf (stderr, " %zd:", firstpos[j].index);
+ prtok (d->tokens[firstpos[j].index]);
}
- fprintf(stderr, "\n lastpos:");
- for (j = nlastpos[-1] - 1; j >= 0; --j)
+ fprintf (stderr, "\n lastpos:");
+ for (j = nlastpos[-1]; j-- > 0;)
{
- fprintf(stderr, " %d:", lastpos[j].index);
- prtok(d->tokens[lastpos[j].index]);
+ fprintf (stderr, " %zd:", lastpos[j].index);
+ prtok (d->tokens[lastpos[j].index]);
}
- putc('\n', stderr);
+ putc ('\n', stderr);
#endif
}
@@ -2372,48 +2407,49 @@ dfaanalyze (struct dfa *d, int searchflag)
for (i = 0; i < d->tindex; ++i)
if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
#if MBS_SUPPORT
- || d->tokens[i] == ANYCHAR
- || d->tokens[i] == MBCSET
+ || d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET
#endif
|| d->tokens[i] >= CSET)
{
#ifdef DEBUG
- fprintf(stderr, "follows(%d:", i);
- prtok(d->tokens[i]);
- fprintf(stderr, "):");
- for (j = d->follows[i].nelem - 1; j >= 0; --j)
+ fprintf (stderr, "follows(%zd:", i);
+ prtok (d->tokens[i]);
+ fprintf (stderr, "):");
+ for (j = d->follows[i].nelem; j-- > 0;)
{
- fprintf(stderr, " %d:", d->follows[i].elems[j].index);
- prtok(d->tokens[d->follows[i].elems[j].index]);
+ fprintf (stderr, " %zd:", d->follows[i].elems[j].index);
+ prtok (d->tokens[d->follows[i].elems[j].index]);
}
- putc('\n', stderr);
+ putc ('\n', stderr);
#endif
- copy(&d->follows[i], &merged);
- epsclosure(&merged, d);
- copy(&merged, &d->follows[i]);
+ copy (&d->follows[i], &merged);
+ epsclosure (&merged, d);
+ copy (&merged, &d->follows[i]);
}
/* Get the epsilon closure of the firstpos of the regexp. The result will
be the set of positions of state 0. */
merged.nelem = 0;
for (i = 0; i < nfirstpos[-1]; ++i)
- insert(firstpos[i], &merged);
- epsclosure(&merged, d);
+ insert (firstpos[i], &merged);
+ epsclosure (&merged, d);
/* Build the initial state. */
d->salloc = 1;
d->sindex = 0;
- MALLOC(d->states, d->salloc);
+ MALLOC (d->states, d->salloc);
- separate_contexts = state_separate_contexts(&merged, CTX_NEWLINE);
- state_index(d, &merged, separate_contexts);
+ separate_contexts = state_separate_contexts (&merged);
+ state_index (d, &merged,
+ (separate_contexts & CTX_NEWLINE
+ ? CTX_NEWLINE : separate_contexts ^ CTX_ANY));
- free(o_nullable);
- free(o_nfirst);
- free(o_firstpos);
- free(o_nlast);
- free(o_lastpos);
- free(merged.elems);
+ free (o_nullable);
+ free (o_nfirst);
+ free (o_firstpos);
+ free (o_nlast);
+ free (o_lastpos);
+ free (merged.elems);
}
@@ -2448,40 +2484,40 @@ dfaanalyze (struct dfa *d, int searchflag)
create a new group labeled with the characters of C and insert this
position in that group. */
void
-dfastate (int s, struct dfa *d, int trans[])
-{
- leaf_set *grps; /* As many as will ever be needed. */
- charclass *labels; /* Labels corresponding to the groups. */
- int ngrps = 0; /* Number of groups actually used. */
- position pos; /* Current position being considered. */
- charclass matches; /* Set of matching characters. */
- int matchesf; /* True if matches is nonempty. */
- charclass intersect; /* Intersection with some label set. */
- int intersectf; /* True if intersect is nonempty. */
- charclass leftovers; /* Stuff in the label that didn't match. */
- int leftoversf; /* True if leftovers is nonempty. */
- position_set follows; /* Union of the follows of some group.
*/
- position_set tmp; /* Temporary space for merging sets. */
- int possible_contexts; /* Contexts that this group can match. */
- int separate_contexts; /* Context that new state wants to know. */
- int state; /* New state. */
- int state_newline; /* New state on a newline transition. */
- int state_letter; /* New state on a letter transition. */
- int next_isnt_1st_byte = 0; /* Flag if we can't add state0. */
- int i, j, k;
+dfastate (state_num s, struct dfa *d, state_num trans[])
+{
+ leaf_set *grps; /* As many as will ever be needed. */
+ charclass *labels; /* Labels corresponding to the groups. */
+ size_t ngrps = 0; /* Number of groups actually used. */
+ position pos; /* Current position being considered. */
+ charclass matches; /* Set of matching characters. */
+ int matchesf; /* True if matches is nonempty. */
+ charclass intersect; /* Intersection with some label set. */
+ int intersectf; /* True if intersect is nonempty. */
+ charclass leftovers; /* Stuff in the label that didn't match. */
+ int leftoversf; /* True if leftovers is nonempty. */
+ position_set follows; /* Union of the follows of some group. */
+ position_set tmp; /* Temporary space for merging sets. */
+ int possible_contexts; /* Contexts that this group can match. */
+ int separate_contexts; /* Context that new state wants to know. */
+ state_num state; /* New state. */
+ state_num state_newline; /* New state on a newline transition. */
+ state_num state_letter; /* New state on a letter transition. */
+ int next_isnt_1st_byte = 0; /* Flag if we can't add state0. */
+ size_t i, j, k;
MALLOC (grps, NOTCHAR);
MALLOC (labels, NOTCHAR);
- zeroset(matches);
+ zeroset (matches);
for (i = 0; i < d->states[s].elems.nelem; ++i)
{
pos = d->states[s].elems.elems[i];
if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
- setbit(d->tokens[pos.index], matches);
+ setbit (d->tokens[pos.index], matches);
else if (d->tokens[pos.index] >= CSET)
- copyset(d->charclasses[d->tokens[pos.index] - CSET], matches);
+ copyset (d->charclasses[d->tokens[pos.index] - CSET], matches);
else if (MBS_SUPPORT
&& (d->tokens[pos.index] == ANYCHAR
|| d->tokens[pos.index] == MBCSET))
@@ -2491,8 +2527,8 @@ dfastate (int s, struct dfa *d, int trans[])
must put it to d->states[s].mbps, which contains the positions
which can match with a single character not a byte. */
if (d->states[s].mbps.nelem == 0)
- alloc_position_set(&d->states[s].mbps, 1);
- insert(pos, &(d->states[s].mbps));
+ alloc_position_set (&d->states[s].mbps, 1);
+ insert (pos, &(d->states[s].mbps));
continue;
}
else
@@ -2500,25 +2536,20 @@ dfastate (int s, struct dfa *d, int trans[])
/* Some characters may need to be eliminated from matches because
they fail in the current context. */
- if (pos.constraint != 0xFF)
+ if (pos.constraint != NO_CONSTRAINT)
{
- if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
- d->states[s].context & CTX_NEWLINE,
- CTX_NEWLINE))
- clrbit(eolbyte, matches);
- if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
- d->states[s].context & CTX_NEWLINE, 0))
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
+ d->states[s].context, CTX_NEWLINE))
for (j = 0; j < CHARCLASS_INTS; ++j)
- matches[j] &= newline[j];
- if (! MATCHES_LETTER_CONTEXT(pos.constraint,
- d->states[s].context & CTX_LETTER,
- CTX_LETTER))
+ matches[j] &= ~newline[j];
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
+ d->states[s].context, CTX_LETTER))
for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= ~letters[j];
- if (! MATCHES_LETTER_CONTEXT(pos.constraint,
- d->states[s].context & CTX_LETTER, 0))
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
+ d->states[s].context, CTX_NONE))
for (j = 0; j < CHARCLASS_INTS; ++j)
- matches[j] &= letters[j];
+ matches[j] &= letters[j] | newline[j];
/* If there are no characters left, there's no point in going on. */
for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
@@ -2533,7 +2564,7 @@ dfastate (int s, struct dfa *d, int trans[])
group's label doesn't contain that character, go on to the
next group. */
if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR
- && !tstbit(d->tokens[pos.index], labels[j]))
+ && !tstbit (d->tokens[pos.index], labels[j]))
continue;
/* Check if this group's label has a nonempty intersection with
@@ -2541,7 +2572,7 @@ dfastate (int s, struct dfa *d, int trans[])
intersectf = 0;
for (k = 0; k < CHARCLASS_INTS; ++k)
(intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
- if (! intersectf)
+ if (!intersectf)
continue;
/* It does; now find the set differences both ways. */
@@ -2558,11 +2589,11 @@ dfastate (int s, struct dfa *d, int trans[])
/* If there were leftovers, create a new group labeled with them. */
if (leftoversf)
{
- copyset(leftovers, labels[ngrps]);
- copyset(intersect, labels[j]);
- MALLOC(grps[ngrps].elems, d->nleaves);
- memcpy(grps[ngrps].elems, grps[j].elems,
- sizeof (grps[j].elems[0]) * grps[j].nelem);
+ copyset (leftovers, labels[ngrps]);
+ copyset (intersect, labels[j]);
+ MALLOC (grps[ngrps].elems, d->nleaves);
+ memcpy (grps[ngrps].elems, grps[j].elems,
+ sizeof (grps[j].elems[0]) * grps[j].nelem);
grps[ngrps].nelem = grps[j].nelem;
++ngrps;
}
@@ -2573,7 +2604,7 @@ dfastate (int s, struct dfa *d, int trans[])
/* If every character matching the current position has been
accounted for, we're done. */
- if (! matchesf)
+ if (!matchesf)
break;
}
@@ -2581,17 +2612,17 @@ dfastate (int s, struct dfa *d, int trans[])
unaccounted for, then we'll have to create a new group. */
if (j == ngrps)
{
- copyset(matches, labels[ngrps]);
- zeroset(matches);
- MALLOC(grps[ngrps].elems, d->nleaves);
+ copyset (matches, labels[ngrps]);
+ zeroset (matches);
+ MALLOC (grps[ngrps].elems, d->nleaves);
grps[ngrps].nelem = 1;
grps[ngrps].elems[0] = pos.index;
++ngrps;
}
}
- alloc_position_set(&follows, d->nleaves);
- alloc_position_set(&tmp, d->nleaves);
+ alloc_position_set (&follows, d->nleaves);
+ alloc_position_set (&tmp, d->nleaves);
/* If we are a searching matcher, the default transition is to a state
containing the positions of state 0, otherwise the default transition
@@ -2599,20 +2630,20 @@ dfastate (int s, struct dfa *d, int trans[])
if (d->searchflag)
{
/* Find the state(s) corresponding to the positions of state 0. */
- copy(&d->states[0].elems, &follows);
- separate_contexts = state_separate_contexts(&follows, CTX_ANY);
- state = state_index(d, &follows, 0);
+ copy (&d->states[0].elems, &follows);
+ separate_contexts = state_separate_contexts (&follows);
+ state = state_index (d, &follows, separate_contexts ^ CTX_ANY);
if (separate_contexts & CTX_NEWLINE)
- state_newline = state_index(d, &follows, CTX_NEWLINE);
+ state_newline = state_index (d, &follows, CTX_NEWLINE);
else
state_newline = state;
if (separate_contexts & CTX_LETTER)
- state_letter = state_index(d, &follows, CTX_LETTER);
+ state_letter = state_index (d, &follows, CTX_LETTER);
else
state_letter = state;
for (i = 0; i < NOTCHAR; ++i)
- trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
+ trans[i] = (IS_WORD_CONSTITUENT (i)) ? state_letter : state;
trans[eolbyte] = state_newline;
}
else
@@ -2627,7 +2658,7 @@ dfastate (int s, struct dfa *d, int trans[])
This is a hideously inefficient loop. Fix it someday. */
for (j = 0; j < grps[i].nelem; ++j)
for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k)
- insert(d->follows[grps[i].elems[j]].elems[k], &follows);
+ insert (d->follows[grps[i].elems[j]].elems[k], &follows);
if (d->mb_cur_max > 1)
{
@@ -2663,23 +2694,25 @@ dfastate (int s, struct dfa *d, int trans[])
/* If we are building a searching matcher, throw in the positions
of state 0 as well. */
if (d->searchflag
- && (! MBS_SUPPORT
- || (d->mb_cur_max == 1 || !next_isnt_1st_byte)))
+ && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte)))
for (j = 0; j < d->states[0].elems.nelem; ++j)
- insert(d->states[0].elems.elems[j], &follows);
+ insert (d->states[0].elems.elems[j], &follows);
/* Find out if the new state will want any context information. */
- possible_contexts = charclass_context(labels[i]);
- separate_contexts = state_separate_contexts(&follows, possible_contexts);
+ possible_contexts = charclass_context (labels[i]);
+ separate_contexts = state_separate_contexts (&follows);
/* Find the state(s) corresponding to the union of the follows. */
- state = state_index(d, &follows, 0);
- if (separate_contexts & CTX_NEWLINE)
- state_newline = state_index(d, &follows, CTX_NEWLINE);
+ if ((separate_contexts & possible_contexts) != possible_contexts)
+ state = state_index (d, &follows, separate_contexts ^ CTX_ANY);
+ else
+ state = -1;
+ if (separate_contexts & possible_contexts & CTX_NEWLINE)
+ state_newline = state_index (d, &follows, CTX_NEWLINE);
else
state_newline = state;
- if (separate_contexts & CTX_LETTER)
- state_letter = state_index(d, &follows, CTX_LETTER);
+ if (separate_contexts & possible_contexts & CTX_LETTER)
+ state_letter = state_index (d, &follows, CTX_LETTER);
else
state_letter = state;
@@ -2692,7 +2725,7 @@ dfastate (int s, struct dfa *d, int trans[])
if (c == eolbyte)
trans[c] = state_newline;
- else if (IS_WORD_CONSTITUENT(c))
+ else if (IS_WORD_CONSTITUENT (c))
trans[c] = state_letter;
else if (c < NOTCHAR)
trans[c] = state;
@@ -2700,11 +2733,11 @@ dfastate (int s, struct dfa *d, int trans[])
}
for (i = 0; i < ngrps; ++i)
- free(grps[i].elems);
- free(follows.elems);
- free(tmp.elems);
- free(grps);
- free(labels);
+ free (grps[i].elems);
+ free (follows.elems);
+ free (tmp.elems);
+ free (grps);
+ free (labels);
}
/* Some routines for manipulating a compiled dfa's transition tables.
@@ -2715,10 +2748,10 @@ dfastate (int s, struct dfa *d, int trans[])
TODO: Improve this comment, get rid of the unnecessary redundancy. */
static void
-build_state (int s, struct dfa *d)
+build_state (state_num s, struct dfa *d)
{
- int *trans; /* The new transition table. */
- int i;
+ state_num *trans; /* The new transition table. */
+ state_num i;
/* Set an upper limit on the number of transition tables that will ever
exist at once. 1024 is arbitrary. The idea is that the frequently
@@ -2728,8 +2761,8 @@ build_state (int s, struct dfa *d)
{
for (i = 0; i < d->tralloc; ++i)
{
- free(d->trans[i]);
- free(d->fails[i]);
+ free (d->trans[i]);
+ free (d->fails[i]);
d->trans[i] = d->fails[i] = NULL;
}
d->trcount = 0;
@@ -2739,15 +2772,15 @@ build_state (int s, struct dfa *d)
/* Set up the success bits for this state. */
d->success[s] = 0;
- if (ACCEPTS_IN_CONTEXT(d->states[s].context, CTX_NEWLINE, s, *d))
+ if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NEWLINE, s, *d))
d->success[s] |= CTX_NEWLINE;
- if (ACCEPTS_IN_CONTEXT(d->states[s].context, CTX_LETTER, s, *d))
+ if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_LETTER, s, *d))
d->success[s] |= CTX_LETTER;
- if (ACCEPTS_IN_CONTEXT(d->states[s].context, CTX_NONE, s, *d))
+ if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NONE, s, *d))
d->success[s] |= CTX_NONE;
- MALLOC(trans, NOTCHAR);
- dfastate(s, d, trans);
+ MALLOC (trans, NOTCHAR);
+ dfastate (s, d, trans);
/* Now go through the new transition table, and make sure that the trans
and fail arrays are allocated large enough to hold a pointer for the
@@ -2755,15 +2788,15 @@ build_state (int s, struct dfa *d)
for (i = 0; i < NOTCHAR; ++i)
if (trans[i] >= d->tralloc)
{
- int oldalloc = d->tralloc;
+ state_num oldalloc = d->tralloc;
while (trans[i] >= d->tralloc)
d->tralloc *= 2;
- REALLOC(d->realtrans, d->tralloc + 1);
+ REALLOC (d->realtrans, d->tralloc + 1);
d->trans = d->realtrans + 1;
- REALLOC(d->fails, d->tralloc);
- REALLOC(d->success, d->tralloc);
- REALLOC(d->newlines, d->tralloc);
+ REALLOC (d->fails, d->tralloc);
+ REALLOC (d->success, d->tralloc);
+ REALLOC (d->newlines, d->tralloc);
while (oldalloc < d->tralloc)
{
d->trans[oldalloc] = NULL;
@@ -2776,7 +2809,7 @@ build_state (int s, struct dfa *d)
d->newlines[s] = trans[eolbyte];
trans[eolbyte] = -1;
- if (ACCEPTING(s, *d))
+ if (ACCEPTING (s, *d))
d->fails[s] = trans;
else
d->trans[s] = trans;
@@ -2787,12 +2820,12 @@ build_state_zero (struct dfa *d)
{
d->tralloc = 1;
d->trcount = 0;
- CALLOC(d->realtrans, d->tralloc + 1);
+ CALLOC (d->realtrans, d->tralloc + 1);
d->trans = d->realtrans + 1;
- CALLOC(d->fails, d->tralloc);
- MALLOC(d->success, d->tralloc);
- MALLOC(d->newlines, d->tralloc);
- build_state(0, d);
+ CALLOC (d->fails, d->tralloc);
+ MALLOC (d->success, d->tralloc);
+ MALLOC (d->newlines, d->tralloc);
+ build_state (0, d);
}
/* Multibyte character handling sub-routines for dfaexec. */
@@ -2813,29 +2846,29 @@ build_state_zero (struct dfa *d)
++p; \
if ((char *) p >= end) \
{ \
- free(mblen_buf); \
- free(inputwcs); \
+ free (mblen_buf); \
+ free (inputwcs); \
*end = saved_end; \
return NULL; \
} \
}
static void
-realloc_trans_if_necessary(struct dfa *d, int new_state)
+realloc_trans_if_necessary (struct dfa *d, state_num new_state)
{
/* Make sure that the trans and fail arrays are allocated large enough
to hold a pointer for the new state. */
if (new_state >= d->tralloc)
{
- int oldalloc = d->tralloc;
+ state_num oldalloc = d->tralloc;
while (new_state >= d->tralloc)
d->tralloc *= 2;
- REALLOC(d->realtrans, d->tralloc + 1);
+ REALLOC (d->realtrans, d->tralloc + 1);
d->trans = d->realtrans + 1;
- REALLOC(d->fails, d->tralloc);
- REALLOC(d->success, d->tralloc);
- REALLOC(d->newlines, d->tralloc);
+ REALLOC (d->fails, d->tralloc);
+ REALLOC (d->success, d->tralloc);
+ REALLOC (d->newlines, d->tralloc);
while (oldalloc < d->tralloc)
{
d->trans[oldalloc] = NULL;
@@ -2848,9 +2881,9 @@ realloc_trans_if_necessary(struct dfa *d, int new_state)
transit_state_consume_1char. */
typedef enum
{
- TRANSIT_STATE_IN_PROGRESS, /* State transition has not finished. */
- TRANSIT_STATE_DONE, /* State transition has finished. */
- TRANSIT_STATE_END_BUFFER /* Reach the end of the buffer. */
+ TRANSIT_STATE_IN_PROGRESS, /* State transition has not finished. */
+ TRANSIT_STATE_DONE, /* State transition has finished. */
+ TRANSIT_STATE_END_BUFFER /* Reach the end of the buffer. */
} status_transit_state;
/* Consume a single byte and transit state from 's' to '*next_state'.
@@ -2858,11 +2891,11 @@ typedef enum
But state transition is done just once, otherwise matching succeed or
reach the end of the buffer. */
static status_transit_state
-transit_state_singlebyte (struct dfa *d, int s, unsigned char const *p,
- int *next_state)
+transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const *p,
+ state_num * next_state)
{
- int *t;
- int works = s;
+ state_num *t;
+ state_num works = s;
status_transit_state rval = TRANSIT_STATE_IN_PROGRESS;
@@ -2891,7 +2924,7 @@ transit_state_singlebyte (struct dfa *d, int s, unsigned
char const *p,
}
else
{
- build_state(works, d);
+ build_state (works, d);
}
}
*next_state = works;
@@ -2902,29 +2935,29 @@ transit_state_singlebyte (struct dfa *d, int s,
unsigned char const *p,
current position. Return the length of the match, in bytes.
POS is the position of the ".". */
static int
-match_anychar (struct dfa *d, int s, position pos, int idx)
+match_anychar (struct dfa *d, state_num s, position pos, size_t idx)
{
int context;
wchar_t wc;
int mbclen;
wc = inputwcs[idx];
- mbclen = (mblen_buf[idx] == 0)? 1 : mblen_buf[idx];
+ mbclen = (mblen_buf[idx] == 0) ? 1 : mblen_buf[idx];
/* Check syntax bits. */
- if (wc == (wchar_t)eolbyte)
+ if (wc == (wchar_t) eolbyte)
{
if (!(syntax_bits & RE_DOT_NEWLINE))
return 0;
}
- else if (wc == (wchar_t)'\0')
+ else if (wc == (wchar_t) '\0')
{
if (syntax_bits & RE_DOT_NOT_NULL)
return 0;
}
- context = wchar_context(wc);
- if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].context, context))
+ context = wchar_context (wc);
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context))
return 0;
return mbclen;
@@ -2935,66 +2968,66 @@ match_anychar (struct dfa *d, int s, position pos, int
idx)
Return the length of the match, in bytes.
POS is the position of the bracket expression. */
static int
-match_mb_charset (struct dfa *d, int s, position pos, int idx)
+match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
{
- int i;
- int match; /* Flag which represent that matching succeed. */
- int match_len; /* Length of the character (or collating element)
- with which this operator match. */
- int op_len; /* Length of the operator. */
+ size_t i;
+ int match; /* Flag which represent that matching succeed.
*/
+ int match_len; /* Length of the character (or collating
element)
+ with which this operator match. */
+ int op_len; /* Length of the operator. */
char buffer[128];
- /* Pointer to the structure to which we are currently refering. */
+ /* Pointer to the structure to which we are currently referring. */
struct mb_char_classes *work_mbc;
int context;
- wchar_t wc; /* Current refering character. */
+ wchar_t wc; /* Current referring character. */
wc = inputwcs[idx];
/* Check syntax bits. */
- if (wc == (wchar_t)eolbyte)
+ if (wc == (wchar_t) eolbyte)
{
if (!(syntax_bits & RE_DOT_NEWLINE))
return 0;
}
- else if (wc == (wchar_t)'\0')
+ else if (wc == (wchar_t) '\0')
{
if (syntax_bits & RE_DOT_NOT_NULL)
return 0;
}
- context = wchar_context(wc);
- if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].context, context))
+ context = wchar_context (wc);
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context))
return 0;
- /* Assign the current refering operator to work_mbc. */
+ /* Assign the current referring operator to work_mbc. */
work_mbc = &(d->mbcsets[(d->multibyte_prop[pos.index]) >> 2]);
match = !work_mbc->invert;
- match_len = (mblen_buf[idx] == 0)? 1 : mblen_buf[idx];
+ match_len = (mblen_buf[idx] == 0) ? 1 : mblen_buf[idx];
/* Match in range 0-255? */
if (wc < NOTCHAR && work_mbc->cset != -1
- && tstbit((unsigned char)wc, d->charclasses[work_mbc->cset]))
+ && tstbit ((unsigned char) wc, d->charclasses[work_mbc->cset]))
goto charset_matched;
/* match with a character class? */
- for (i = 0; i<work_mbc->nch_classes; i++)
+ for (i = 0; i < work_mbc->nch_classes; i++)
{
- if (iswctype((wint_t)wc, work_mbc->ch_classes[i]))
+ if (iswctype ((wint_t) wc, work_mbc->ch_classes[i]))
goto charset_matched;
}
- strncpy(buffer, (char const *) buf_begin + idx, match_len);
+ strncpy (buffer, (char const *) buf_begin + idx, match_len);
buffer[match_len] = '\0';
/* match with an equivalent class? */
- for (i = 0; i<work_mbc->nequivs; i++)
+ for (i = 0; i < work_mbc->nequivs; i++)
{
- op_len = strlen(work_mbc->equivs[i]);
- strncpy(buffer, (char const *) buf_begin + idx, op_len);
+ op_len = strlen (work_mbc->equivs[i]);
+ strncpy (buffer, (char const *) buf_begin + idx, op_len);
buffer[op_len] = '\0';
- if (strcoll(work_mbc->equivs[i], buffer) == 0)
+ if (strcoll (work_mbc->equivs[i], buffer) == 0)
{
match_len = op_len;
goto charset_matched;
@@ -3002,13 +3035,13 @@ match_mb_charset (struct dfa *d, int s, position pos,
int idx)
}
/* match with a collating element? */
- for (i = 0; i<work_mbc->ncoll_elems; i++)
+ for (i = 0; i < work_mbc->ncoll_elems; i++)
{
- op_len = strlen(work_mbc->coll_elems[i]);
- strncpy(buffer, (char const *) buf_begin + idx, op_len);
+ op_len = strlen (work_mbc->coll_elems[i]);
+ strncpy (buffer, (char const *) buf_begin + idx, op_len);
buffer[op_len] = '\0';
- if (strcoll(work_mbc->coll_elems[i], buffer) == 0)
+ if (strcoll (work_mbc->coll_elems[i], buffer) == 0)
{
match_len = op_len;
goto charset_matched;
@@ -3016,7 +3049,7 @@ match_mb_charset (struct dfa *d, int s, position pos, int
idx)
}
/* match with a range? */
- for (i = 0; i<work_mbc->nranges; i++)
+ for (i = 0; i < work_mbc->nranges; i++)
{
if (work_mbc->range_sts[i] <= wc &&
wc <= work_mbc->range_ends[i])
@@ -3024,7 +3057,7 @@ match_mb_charset (struct dfa *d, int s, position pos, int
idx)
}
/* match with a character? */
- for (i = 0; i<work_mbc->nchars; i++)
+ for (i = 0; i < work_mbc->nchars; i++)
{
if (wc == work_mbc->chars[i])
goto charset_matched;
@@ -3032,7 +3065,7 @@ match_mb_charset (struct dfa *d, int s, position pos, int
idx)
match = !match;
- charset_matched:
+charset_matched:
return match ? match_len : 0;
}
@@ -3043,26 +3076,26 @@ match_mb_charset (struct dfa *d, int s, position pos,
int idx)
`idx' is the index from the buf_begin, and it is the current position
in the buffer.
Caller MUST free the array which this function return. */
-static int*
-check_matching_with_multibyte_ops (struct dfa *d, int s, int idx)
+static int *
+check_matching_with_multibyte_ops (struct dfa *d, state_num s, size_t idx)
{
- int i;
- int* rarray;
+ size_t i;
+ int *rarray;
- MALLOC(rarray, d->states[s].mbps.nelem);
+ MALLOC (rarray, d->states[s].mbps.nelem);
for (i = 0; i < d->states[s].mbps.nelem; ++i)
{
position pos = d->states[s].mbps.elems[i];
- switch(d->tokens[pos.index])
+ switch (d->tokens[pos.index])
{
case ANYCHAR:
- rarray[i] = match_anychar(d, s, pos, idx);
+ rarray[i] = match_anychar (d, s, pos, idx);
break;
case MBCSET:
- rarray[i] = match_mb_charset(d, s, pos, idx);
+ rarray[i] = match_mb_charset (d, s, pos, idx);
break;
default:
- break; /* cannot happen. */
+ break; /* cannot happen. */
}
}
return rarray;
@@ -3075,64 +3108,64 @@ check_matching_with_multibyte_ops (struct dfa *d, int
s, int idx)
`mbclen' and `pps' are the output. `mbclen' is the length of the
character consumed, and `pps' is the set this function enumerate. */
static status_transit_state
-transit_state_consume_1char (struct dfa *d, int s, unsigned char const **pp,
- int *match_lens, int *mbclen, position_set *pps)
-{
- int i, j;
- int s1, s2;
- int* work_mbls;
+transit_state_consume_1char (struct dfa *d, state_num s,
+ unsigned char const **pp,
+ int *match_lens, int *mbclen, position_set * pps)
+{
+ size_t i, j;
+ int k;
+ state_num s1, s2;
+ int *work_mbls;
status_transit_state rs = TRANSIT_STATE_DONE;
/* Calculate the length of the (single/multi byte) character
to which p points. */
- *mbclen = (mblen_buf[*pp - buf_begin] == 0)? 1
- : mblen_buf[*pp - buf_begin];
+ *mbclen = (mblen_buf[*pp - buf_begin] == 0) ? 1 : mblen_buf[*pp - buf_begin];
/* Calculate the state which can be reached from the state `s' by
consuming `*mbclen' single bytes from the buffer. */
s1 = s;
- for (i = 0; i < *mbclen; i++)
+ for (k = 0; k < *mbclen; k++)
{
s2 = s1;
- rs = transit_state_singlebyte(d, s2, (*pp)++, &s1);
+ rs = transit_state_singlebyte (d, s2, (*pp)++, &s1);
}
/* Copy the positions contained by `s1' to the set `pps'. */
- copy(&(d->states[s1].elems), pps);
+ copy (&(d->states[s1].elems), pps);
- /* Check (inputed)match_lens, and initialize if it is NULL. */
+ /* Check (input) match_lens, and initialize if it is NULL. */
if (match_lens == NULL && d->states[s].mbps.nelem != 0)
- work_mbls = check_matching_with_multibyte_ops(d, s, *pp - buf_begin);
+ work_mbls = check_matching_with_multibyte_ops (d, s, *pp - buf_begin);
else
work_mbls = match_lens;
/* Add all of the positions which can be reached from `s' by consuming
a single character. */
- for (i = 0; i < d->states[s].mbps.nelem ; i++)
- {
+ for (i = 0; i < d->states[s].mbps.nelem; i++)
+ {
if (work_mbls[i] == *mbclen)
for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem;
j++)
- insert(d->follows[d->states[s].mbps.elems[i].index].elems[j],
- pps);
+ insert (d->follows[d->states[s].mbps.elems[i].index].elems[j], pps);
}
if (match_lens == NULL && work_mbls != NULL)
- free(work_mbls);
+ free (work_mbls);
return rs;
}
/* Transit state from s, then return new state and update the pointer of the
buffer. This function is for some operator which can match with a multi-
byte character or a collating element (which may be multi characters). */
-static int
-transit_state (struct dfa *d, int s, unsigned char const **pp)
+static state_num
+transit_state (struct dfa *d, state_num s, unsigned char const **pp)
{
- int s1;
- int mbclen; /* The length of current input multibyte character. */
+ state_num s1;
+ int mbclen; /* The length of current input multibyte
character. */
int maxlen = 0;
- int i, j;
+ size_t i, j;
int *match_lens = NULL;
- int nelem = d->states[s].mbps.nelem; /* Just a alias. */
+ size_t nelem = d->states[s].mbps.nelem; /* Just a alias. */
position_set follows;
unsigned char const *p1 = *pp;
wchar_t wc;
@@ -3142,7 +3175,7 @@ transit_state (struct dfa *d, int s, unsigned char const
**pp)
We check whether each of them can match or not. */
{
/* Note: caller must free the return value of this function. */
- match_lens = check_matching_with_multibyte_ops(d, s, *pp - buf_begin);
+ match_lens = check_matching_with_multibyte_ops (d, s, *pp - buf_begin);
for (i = 0; i < nelem; i++)
/* Search the operator which match the longest string,
@@ -3158,48 +3191,48 @@ transit_state (struct dfa *d, int s, unsigned char
const **pp)
We need to check only one single byte character. */
{
status_transit_state rs;
- rs = transit_state_singlebyte(d, s, *pp, &s1);
+ rs = transit_state_singlebyte (d, s, *pp, &s1);
/* We must update the pointer if state transition succeeded. */
if (rs == TRANSIT_STATE_DONE)
- ++*pp;
+ ++ * pp;
- free(match_lens);
+ free (match_lens);
return s1;
}
/* This state has some operators which can match a multibyte character. */
- alloc_position_set(&follows, d->nleaves);
+ alloc_position_set (&follows, d->nleaves);
/* `maxlen' may be longer than the length of a character, because it may
not be a character but a (multi character) collating element.
We enumerate all of the positions which `s' can reach by consuming
`maxlen' bytes. */
- transit_state_consume_1char(d, s, pp, match_lens, &mbclen, &follows);
+ transit_state_consume_1char (d, s, pp, match_lens, &mbclen, &follows);
wc = inputwcs[*pp - mbclen - buf_begin];
- s1 = state_index(d, &follows, wchar_context (wc));
- realloc_trans_if_necessary(d, s1);
+ s1 = state_index (d, &follows, wchar_context (wc));
+ realloc_trans_if_necessary (d, s1);
while (*pp - p1 < maxlen)
{
- transit_state_consume_1char(d, s1, pp, NULL, &mbclen, &follows);
+ transit_state_consume_1char (d, s1, pp, NULL, &mbclen, &follows);
- for (i = 0; i < nelem ; i++)
+ for (i = 0; i < nelem; i++)
{
if (match_lens[i] == *pp - p1)
for (j = 0;
j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++)
- insert(d->follows[d->states[s1].mbps.elems[i].index].elems[j],
- &follows);
+ insert (d->follows[d->states[s1].mbps.elems[i].index].elems[j],
+ &follows);
}
wc = inputwcs[*pp - mbclen - buf_begin];
- s1 = state_index(d, &follows, wchar_context (wc));
- realloc_trans_if_necessary(d, s1);
+ s1 = state_index (d, &follows, wchar_context (wc));
+ realloc_trans_if_necessary (d, s1);
}
- free(match_lens);
- free(follows.elems);
+ free (match_lens);
+ free (follows.elems);
return s1;
}
@@ -3221,14 +3254,14 @@ prepare_wc_buf (const char *begin, const char *end)
if (remain_bytes == 0)
{
remain_bytes
- = mbrtowc(inputwcs + i, begin + i, end - begin - i + 1, &mbs);
+ = mbrtowc (inputwcs + i, begin + i, end - begin - i + 1, &mbs);
if (remain_bytes < 1
|| remain_bytes == (size_t) -1
|| remain_bytes == (size_t) -2
- || (remain_bytes == 1 && inputwcs[i] == (wchar_t)begin[i]))
+ || (remain_bytes == 1 && inputwcs[i] == (wchar_t) begin[i]))
{
remain_bytes = 0;
- inputwcs[i] = (wchar_t)begin[i];
+ inputwcs[i] = (wchar_t) begin[i];
mblen_buf[i] = 0;
if (begin[i] == eol)
break;
@@ -3249,7 +3282,7 @@ prepare_wc_buf (const char *begin, const char *end)
buf_end = (unsigned char *) (begin + i);
mblen_buf[i] = 0;
- inputwcs[i] = 0; /* sentinel */
+ inputwcs[i] = 0; /* sentinel */
#endif /* MBS_SUPPORT */
}
@@ -3267,17 +3300,17 @@ prepare_wc_buf (const char *begin, const char *end)
to decide whether to fall back on a backtracking matcher. */
char *
dfaexec (struct dfa *d, char const *begin, char *end,
- int allow_nl, int *count, int *backref)
+ int allow_nl, size_t *count, int *backref)
{
- int s, s1; /* Current state. */
- unsigned char const *p; /* Current input character. */
- int **trans, *t; /* Copy of d->trans so it can be optimized
+ state_num s, s1; /* Current state. */
+ unsigned char const *p; /* Current input character. */
+ state_num **trans, *t; /* Copy of d->trans so it can be optimized
into a register. */
- unsigned char eol = eolbyte; /* Likewise for eolbyte. */
+ unsigned char eol = eolbyte; /* Likewise for eolbyte. */
unsigned char saved_end;
- if (! d->tralloc)
- build_state_zero(d);
+ if (!d->tralloc)
+ build_state_zero (d);
s = s1 = 0;
p = (unsigned char const *) begin;
@@ -3287,9 +3320,9 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if (d->mb_cur_max > 1)
{
- MALLOC(mblen_buf, end - begin + 2);
- MALLOC(inputwcs, end - begin + 2);
- memset(&mbs, 0, sizeof(mbstate_t));
+ MALLOC (mblen_buf, end - begin + 2);
+ MALLOC (inputwcs, end - begin + 2);
+ memset (&mbs, 0, sizeof (mbstate_t));
prepare_wc_buf ((const char *) p, end);
}
@@ -3301,7 +3334,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if (p > buf_end)
break;
s1 = s;
- SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);
+ SKIP_REMAINS_MB_IF_INITIAL_STATE (s, p);
if (d->states[s].mbps.nelem == 0)
{
@@ -3316,15 +3349,15 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if (backref)
{
*backref = 1;
- free(mblen_buf);
- free(inputwcs);
+ free (mblen_buf);
+ free (inputwcs);
*end = saved_end;
return (char *) p;
}
/* Can match with a multibyte character (and multi character
collating element). Transition table might be updated. */
- s = transit_state(d, s, &p);
+ s = transit_state (d, s, &p);
trans = d->trans;
}
else
@@ -3334,7 +3367,9 @@ dfaexec (struct dfa *d, char const *begin, char *end,
s1 = t[*p++];
if ((t = trans[s1]) == NULL)
{
- int tmp = s; s = s1; s1 = tmp; /* swap */
+ state_num tmp = s;
+ s = s1;
+ s1 = tmp; /* swap */
break;
}
s = t[*p++];
@@ -3349,8 +3384,8 @@ dfaexec (struct dfa *d, char const *begin, char *end,
*backref = (d->states[s].backref != 0);
if (d->mb_cur_max > 1)
{
- free(mblen_buf);
- free(inputwcs);
+ free (mblen_buf);
+ free (inputwcs);
}
*end = saved_end;
return (char *) p;
@@ -3361,7 +3396,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
{
/* Can match with a multibyte character (and multicharacter
collating element). Transition table might be updated. */
- s = transit_state(d, s, &p);
+ s = transit_state (d, s, &p);
trans = d->trans;
}
else
@@ -3373,7 +3408,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if ((char *) p <= end && p[-1] == eol)
{
if (count)
- ++*count;
+ ++ * count;
if (d->mb_cur_max > 1)
prepare_wc_buf ((const char *) p, end);
@@ -3384,8 +3419,8 @@ dfaexec (struct dfa *d, char const *begin, char *end,
{
if (d->mb_cur_max > 1)
{
- free(mblen_buf);
- free(inputwcs);
+ free (mblen_buf);
+ free (inputwcs);
}
*end = saved_end;
return NULL;
@@ -3393,7 +3428,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if (s >= 0)
{
- build_state(s, d);
+ build_state (s, d);
trans = d->trans;
continue;
}
@@ -3411,30 +3446,30 @@ dfaexec (struct dfa *d, char const *begin, char *end,
static void
free_mbdata (struct dfa *d)
{
- unsigned int i;
+ size_t i;
- free(d->multibyte_prop);
+ free (d->multibyte_prop);
d->multibyte_prop = NULL;
for (i = 0; i < d->nmbcsets; ++i)
{
- unsigned int j;
+ size_t j;
struct mb_char_classes *p = &(d->mbcsets[i]);
- free(p->chars);
- free(p->ch_classes);
- free(p->range_sts);
- free(p->range_ends);
+ free (p->chars);
+ free (p->ch_classes);
+ free (p->range_sts);
+ free (p->range_ends);
for (j = 0; j < p->nequivs; ++j)
- free(p->equivs[j]);
- free(p->equivs);
+ free (p->equivs[j]);
+ free (p->equivs);
for (j = 0; j < p->ncoll_elems; ++j)
- free(p->coll_elems[j]);
- free(p->coll_elems);
+ free (p->coll_elems[j]);
+ free (p->coll_elems);
}
- free(d->mbcsets);
+ free (d->mbcsets);
d->mbcsets = NULL;
d->nmbcsets = 0;
}
@@ -3447,33 +3482,33 @@ dfainit (struct dfa *d)
memset (d, 0, sizeof *d);
d->calloc = 1;
- MALLOC(d->charclasses, d->calloc);
+ MALLOC (d->charclasses, d->calloc);
d->talloc = 1;
- MALLOC(d->tokens, d->talloc);
+ MALLOC (d->tokens, d->talloc);
d->mb_cur_max = MB_CUR_MAX;
if (d->mb_cur_max > 1)
{
d->nmultibyte_prop = 1;
- MALLOC(d->multibyte_prop, d->nmultibyte_prop);
+ MALLOC (d->multibyte_prop, d->nmultibyte_prop);
d->mbcsets_alloc = 1;
- MALLOC(d->mbcsets, d->mbcsets_alloc);
+ MALLOC (d->mbcsets, d->mbcsets_alloc);
}
}
static void
dfaoptimize (struct dfa *d)
{
- unsigned int i;
+ size_t i;
- if (!MBS_SUPPORT || !using_utf8())
+ if (!MBS_SUPPORT || !using_utf8 ())
return;
for (i = 0; i < d->tindex; ++i)
{
- switch(d->tokens[i])
+ switch (d->tokens[i])
{
case ANYCHAR:
/* Lowered. */
@@ -3494,49 +3529,50 @@ dfaoptimize (struct dfa *d)
void
dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
{
- dfainit(d);
- dfaparse(s, len, d);
- dfamust(d);
- dfaoptimize(d);
- dfaanalyze(d, searchflag);
+ dfainit (d);
+ dfaparse (s, len, d);
+ dfamust (d);
+ dfaoptimize (d);
+ dfaanalyze (d, searchflag);
}
/* Free the storage held by the components of a dfa. */
void
dfafree (struct dfa *d)
{
- int i;
+ size_t i;
struct dfamust *dm, *ndm;
- free(d->charclasses);
- free(d->tokens);
+ free (d->charclasses);
+ free (d->tokens);
if (d->mb_cur_max > 1)
- free_mbdata(d);
-
- for (i = 0; i < d->sindex; ++i) {
- free(d->states[i].elems.elems);
- if (MBS_SUPPORT)
- free(d->states[i].mbps.elems);
- }
- free(d->states);
+ free_mbdata (d);
+
+ for (i = 0; i < d->sindex; ++i)
+ {
+ free (d->states[i].elems.elems);
+ if (MBS_SUPPORT)
+ free (d->states[i].mbps.elems);
+ }
+ free (d->states);
for (i = 0; i < d->tindex; ++i)
- free(d->follows[i].elems);
- free(d->follows);
+ free (d->follows[i].elems);
+ free (d->follows);
for (i = 0; i < d->tralloc; ++i)
{
- free(d->trans[i]);
- free(d->fails[i]);
+ free (d->trans[i]);
+ free (d->fails[i]);
}
- free(d->realtrans);
- free(d->fails);
- free(d->newlines);
- free(d->success);
+ free (d->realtrans);
+ free (d->fails);
+ free (d->newlines);
+ free (d->success);
for (dm = d->musts; dm; dm = ndm)
{
ndm = dm->next;
- free(dm->must);
- free(dm);
+ free (dm->must);
+ free (dm);
}
}
@@ -3643,15 +3679,15 @@ icpyalloc (char const *string)
return icatalloc (NULL, string);
}
-static char * _GL_ATTRIBUTE_PURE
+static char *_GL_ATTRIBUTE_PURE
istrstr (char const *lookin, char const *lookfor)
{
char const *cp;
size_t len;
- len = strlen(lookfor);
+ len = strlen (lookfor);
for (cp = lookin; *cp != '\0'; ++cp)
- if (strncmp(cp, lookfor, len) == 0)
+ if (strncmp (cp, lookfor, len) == 0)
return (char *) cp;
return NULL;
}
@@ -3659,13 +3695,13 @@ istrstr (char const *lookin, char const *lookfor)
static void
freelist (char **cpp)
{
- int i;
+ size_t i;
if (cpp == NULL)
return;
for (i = 0; cpp[i] != NULL; ++i)
{
- free(cpp[i]);
+ free (cpp[i]);
cpp[i] = NULL;
}
}
@@ -3673,38 +3709,38 @@ freelist (char **cpp)
static char **
enlist (char **cpp, char *new, size_t len)
{
- int i, j;
+ size_t i, j;
if (cpp == NULL)
return NULL;
- if ((new = icpyalloc(new)) == NULL)
+ if ((new = icpyalloc (new)) == NULL)
{
- freelist(cpp);
+ freelist (cpp);
return NULL;
}
new[len] = '\0';
/* Is there already something in the list that's new (or longer)? */
for (i = 0; cpp[i] != NULL; ++i)
- if (istrstr(cpp[i], new) != NULL)
+ if (istrstr (cpp[i], new) != NULL)
{
- free(new);
+ free (new);
return cpp;
}
/* Eliminate any obsoleted strings. */
j = 0;
while (cpp[j] != NULL)
- if (istrstr(new, cpp[j]) == NULL)
+ if (istrstr (new, cpp[j]) == NULL)
++j;
else
{
- free(cpp[j]);
+ free (cpp[j]);
if (--i == j)
break;
cpp[j] = cpp[i];
cpp[i] = NULL;
}
/* Add the new string. */
- REALLOC(cpp, i + 2);
+ REALLOC (cpp, i + 2);
cpp[i] = new;
cpp[i + 1] = NULL;
return cpp;
@@ -3723,7 +3759,7 @@ comsubs (char *left, char const *right)
if (left == NULL || right == NULL)
return NULL;
- cpp = malloc(sizeof *cpp);
+ cpp = malloc (sizeof *cpp);
if (cpp == NULL)
return NULL;
cpp[0] = NULL;
@@ -3758,13 +3794,13 @@ comsubs (char *left, char const *right)
static char **
addlists (char **old, char **new)
{
- int i;
+ size_t i;
if (old == NULL || new == NULL)
return NULL;
for (i = 0; new[i] != NULL; ++i)
{
- old = enlist(old, new[i], strlen(new[i]));
+ old = enlist (old, new[i], strlen (new[i]));
if (old == NULL)
break;
}
@@ -3778,11 +3814,11 @@ inboth (char **left, char **right)
{
char **both;
char **temp;
- int lnum, rnum;
+ size_t lnum, rnum;
if (left == NULL || right == NULL)
return NULL;
- both = malloc(sizeof *both);
+ both = malloc (sizeof *both);
if (both == NULL)
return NULL;
both[0] = NULL;
@@ -3790,15 +3826,15 @@ inboth (char **left, char **right)
{
for (rnum = 0; right[rnum] != NULL; ++rnum)
{
- temp = comsubs(left[lnum], right[rnum]);
+ temp = comsubs (left[lnum], right[rnum]);
if (temp == NULL)
{
- freelist(both);
+ freelist (both);
return NULL;
}
- both = addlists(both, temp);
- freelist(temp);
- free(temp);
+ both = addlists (both, temp);
+ freelist (temp);
+ free (temp);
if (both == NULL)
return NULL;
}
@@ -3815,10 +3851,10 @@ typedef struct
} must;
static void
-resetmust (must *mp)
+resetmust (must * mp)
{
mp->left[0] = mp->right[0] = mp->is[0] = '\0';
- freelist(mp->in);
+ freelist (mp->in);
}
static void
@@ -3827,8 +3863,8 @@ dfamust (struct dfa *d)
must *musts;
must *mp;
char *result;
- int ri;
- int i;
+ size_t ri;
+ size_t i;
int exact;
token t;
static must must0;
@@ -3843,21 +3879,21 @@ dfamust (struct dfa *d)
mp[i] = must0;
for (i = 0; i <= d->tindex; ++i)
{
- mp[i].in = xmalloc(sizeof *mp[i].in);
- mp[i].left = xmalloc(2);
- mp[i].right = xmalloc(2);
- mp[i].is = xmalloc(2);
+ mp[i].in = xmalloc (sizeof *mp[i].in);
+ mp[i].left = xmalloc (2);
+ mp[i].right = xmalloc (2);
+ mp[i].is = xmalloc (2);
mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
mp[i].in[0] = NULL;
}
#ifdef DEBUG
- fprintf(stderr, "dfamust:\n");
+ fprintf (stderr, "dfamust:\n");
for (i = 0; i < d->tindex; ++i)
{
- fprintf(stderr, " %d:", i);
- prtok(d->tokens[i]);
+ fprintf (stderr, " %zd:", i);
+ prtok (d->tokens[i]);
}
- putc('\n', stderr);
+ putc ('\n', stderr);
#endif
for (ri = 0; ri < d->tindex; ++ri)
{
@@ -3874,13 +3910,13 @@ dfamust (struct dfa *d)
case LIMWORD:
case NOTLIMWORD:
case BACKREF:
- resetmust(mp);
+ resetmust (mp);
break;
case STAR:
case QMARK:
assert (musts < mp);
--mp;
- resetmust(mp);
+ resetmust (mp);
break;
case OR:
assert (&musts[2] <= mp);
@@ -3888,7 +3924,7 @@ dfamust (struct dfa *d)
char **new;
must *lmp;
must *rmp;
- int j, ln, rn, n;
+ size_t j, ln, rn, n;
rmp = --mp;
lmp = --mp;
@@ -3901,8 +3937,8 @@ dfamust (struct dfa *d)
++i;
lmp->left[i] = '\0';
/* Right side */
- ln = strlen(lmp->right);
- rn = strlen(rmp->right);
+ ln = strlen (lmp->right);
+ rn = strlen (rmp->right);
n = ln;
if (n > rn)
n = rn;
@@ -3912,11 +3948,11 @@ dfamust (struct dfa *d)
for (j = 0; j < i; ++j)
lmp->right[j] = lmp->right[(ln - i) + j];
lmp->right[j] = '\0';
- new = inboth(lmp->in, rmp->in);
+ new = inboth (lmp->in, rmp->in);
if (new == NULL)
goto done;
- freelist(lmp->in);
- free(lmp->in);
+ freelist (lmp->in);
+ free (lmp->in);
lmp->in = new;
}
break;
@@ -3928,7 +3964,7 @@ dfamust (struct dfa *d)
case END:
assert (mp == &musts[1]);
for (i = 0; musts[0].in[i] != NULL; ++i)
- if (strlen(musts[0].in[i]) > strlen(result))
+ if (strlen (musts[0].in[i]) > strlen (result))
result = musts[0].in[i];
if (STREQ (result, musts[0].is))
exact = 1;
@@ -3942,41 +3978,39 @@ dfamust (struct dfa *d)
rmp = --mp;
lmp = --mp;
/* In. Everything in left, plus everything in
- right, plus catenation of
+ right, plus concatenation of
left's right and right's left. */
- lmp->in = addlists(lmp->in, rmp->in);
+ lmp->in = addlists (lmp->in, rmp->in);
if (lmp->in == NULL)
goto done;
- if (lmp->right[0] != '\0' &&
- rmp->left[0] != '\0')
+ if (lmp->right[0] != '\0' && rmp->left[0] != '\0')
{
char *tp;
- tp = icpyalloc(lmp->right);
- tp = icatalloc(tp, rmp->left);
- lmp->in = enlist(lmp->in, tp, strlen(tp));
- free(tp);
+ tp = icpyalloc (lmp->right);
+ tp = icatalloc (tp, rmp->left);
+ lmp->in = enlist (lmp->in, tp, strlen (tp));
+ free (tp);
if (lmp->in == NULL)
goto done;
}
/* Left-hand */
if (lmp->is[0] != '\0')
{
- lmp->left = icatalloc(lmp->left,
- rmp->left);
+ lmp->left = icatalloc (lmp->left, rmp->left);
if (lmp->left == NULL)
goto done;
}
/* Right-hand */
if (rmp->is[0] == '\0')
lmp->right[0] = '\0';
- lmp->right = icatalloc(lmp->right, rmp->right);
+ lmp->right = icatalloc (lmp->right, rmp->right);
if (lmp->right == NULL)
goto done;
/* Guaranteed to be */
if (lmp->is[0] != '\0' && rmp->is[0] != '\0')
{
- lmp->is = icatalloc(lmp->is, rmp->is);
+ lmp->is = icatalloc (lmp->is, rmp->is);
if (lmp->is == NULL)
goto done;
}
@@ -3994,59 +4028,55 @@ dfamust (struct dfa *d)
/* not on *my* shift */
goto done;
}
- else if (t >= CSET
- || !MBS_SUPPORT
- || t == ANYCHAR
- || t == MBCSET
- )
+ else if (t >= CSET || !MBS_SUPPORT || t == ANYCHAR || t == MBCSET)
{
/* easy enough */
- resetmust(mp);
+ resetmust (mp);
}
else
{
/* plain character */
- resetmust(mp);
+ resetmust (mp);
mp->is[0] = mp->left[0] = mp->right[0] = t;
mp->is[1] = mp->left[1] = mp->right[1] = '\0';
- mp->in = enlist(mp->in, mp->is, (size_t)1);
+ mp->in = enlist (mp->in, mp->is, (size_t) 1);
if (mp->in == NULL)
goto done;
}
break;
}
#ifdef DEBUG
- fprintf(stderr, " node: %d:", ri);
- prtok(d->tokens[ri]);
- fprintf(stderr, "\n in:");
+ fprintf (stderr, " node: %zd:", ri);
+ prtok (d->tokens[ri]);
+ fprintf (stderr, "\n in:");
for (i = 0; mp->in[i]; ++i)
- fprintf(stderr, " \"%s\"", mp->in[i]);
- fprintf(stderr, "\n is: \"%s\"\n", mp->is);
- fprintf(stderr, " left: \"%s\"\n", mp->left);
- fprintf(stderr, " right: \"%s\"\n", mp->right);
+ fprintf (stderr, " \"%s\"", mp->in[i]);
+ fprintf (stderr, "\n is: \"%s\"\n", mp->is);
+ fprintf (stderr, " left: \"%s\"\n", mp->left);
+ fprintf (stderr, " right: \"%s\"\n", mp->right);
#endif
++mp;
}
- done:
- if (strlen(result))
+done:
+ if (strlen (result))
{
- MALLOC(dm, 1);
+ MALLOC (dm, 1);
dm->exact = exact;
- MALLOC(dm->must, strlen(result) + 1);
- strcpy(dm->must, result);
+ MALLOC (dm->must, strlen (result) + 1);
+ strcpy (dm->must, result);
dm->next = d->musts;
d->musts = dm;
}
mp = musts;
for (i = 0; i <= d->tindex; ++i)
{
- freelist(mp[i].in);
- free(mp[i].in);
- free(mp[i].left);
- free(mp[i].right);
- free(mp[i].is);
+ freelist (mp[i].in);
+ free (mp[i].in);
+ free (mp[i].left);
+ free (mp[i].right);
+ free (mp[i].is);
}
- free(mp);
+ free (mp);
}
struct dfa *
@@ -4055,7 +4085,7 @@ dfaalloc (void)
return xmalloc (sizeof (struct dfa));
}
-struct dfamust * _GL_ATTRIBUTE_PURE
+struct dfamust *_GL_ATTRIBUTE_PURE
dfamusts (struct dfa const *d)
{
return d->musts;
diff --git a/dfa.h b/dfa.h
index bffa519..96dd4b8 100644
--- a/dfa.h
+++ b/dfa.h
@@ -1,5 +1,5 @@
/* dfa.h - declarations for GNU deterministic regexp compiler
- Copyright (C) 1988, 1998, 2007, 2009-2011 Free Software Foundation, Inc.
+ Copyright (C) 1988, 1998, 2007, 2009-2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,17 +18,6 @@
/* Written June, 1988 by Mike Haertel */
-#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 6) || __STRICT_ANSI__
-# define __attribute__(x)
-#endif
-
-/* The __pure__ attribute was added in gcc 2.96. */
-#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
-# define _GL_ATTRIBUTE_PURE __attribute__ ((__pure__))
-#else
-# define _GL_ATTRIBUTE_PURE /* empty */
-#endif
-
/* Element of a list of strings, at least one of which is known to
appear in any R.E. matching the DFA. */
struct dfamust
@@ -74,7 +63,7 @@ extern void dfacomp (char const *, size_t, struct dfa *, int);
encountered a back-reference (1) or not (0). The caller may use this
to decide whether to fall back on a backtracking matcher. */
extern char *dfaexec (struct dfa *d, char const *begin, char *end,
- int newline, int *count, int *backref);
+ int newline, size_t *count, int *backref);
/* Free the storage held by the components of a struct dfa. */
extern void dfafree (struct dfa *);
@@ -93,7 +82,7 @@ extern void dfaanalyze (struct dfa *, int);
/* Compute, for each possible character, the transitions out of a given
state, storing them in an array of integers. */
-extern void dfastate (int, struct dfa *, int []);
+extern void dfastate (ptrdiff_t, struct dfa *, ptrdiff_t []);
/* Error handling. */
@@ -106,4 +95,4 @@ extern void dfawarn (const char *);
/* dfaerror() is called by the regexp routines whenever an error occurs. It
takes a single argument, a NUL-terminated string describing the error.
The user must supply a dfaerror. */
-extern void dfaerror (const char *) __attribute__ ((noreturn));
+extern _Noreturn void dfaerror (const char *);
diff --git a/pc/ChangeLog b/pc/ChangeLog
index 837c5b5..41cc25b 100644
--- a/pc/ChangeLog
+++ b/pc/ChangeLog
@@ -1,3 +1,7 @@
+2012-03-29 Arnold D. Robbins <address@hidden>
+
+ * config.h: Add definition for _Noreturn.
+
2012-03-28 Arnold D. Robbins <address@hidden>
* 4.0.1: Release tar ball made.
diff --git a/pc/config.h b/pc/config.h
index 849b216..6b50eef 100644
--- a/pc/config.h
+++ b/pc/config.h
@@ -507,6 +507,18 @@
/* Define to 1 if on MINIX. */
#undef _MINIX
+/* The _Noreturn keyword of C11. */
+#ifndef _Noreturn
+# if (3 <= __GNUC__ || (__GNUC__ == 2 && 8 <= __GNUC_MINOR__) \
+ || 0x5110 <= __SUNPRO_C)
+# define _Noreturn __attribute__ ((__noreturn__))
+# elif defined _MSC_VER && 1200 <= _MSC_VER
+# define _Noreturn __declspec (noreturn)
+# else
+# define _Noreturn
+# endif
+#endif
+
/* Define to 2 if the system does not provide POSIX.1 features except with
this defined. */
#undef _POSIX_1_SOURCE
diff --git a/re.c b/re.c
index 1144ec7..9be46d9 100644
--- a/re.c
+++ b/re.c
@@ -268,7 +268,7 @@ research(Regexp *rp, char *str, int start,
*/
if (rp->dfa && ! no_bol && ! need_start) {
char save;
- int count = 0;
+ size_t count = 0;
/*
* dfa likes to stick a '\n' right after the matched
* text. So we just save and restore the character.
diff --git a/version.c b/version.c
index 695a1e3..c1ecb4f 100644
--- a/version.c
+++ b/version.c
@@ -1,3 +1,3 @@
#include "config.h"
-const char *version_string = "GNU Awk 4.0.1";
+const char *version_string = "GNU Awk 4.0.1a";
diff --git a/vms/ChangeLog b/vms/ChangeLog
index 3a8279e..fa1fbd1 100644
--- a/vms/ChangeLog
+++ b/vms/ChangeLog
@@ -1,3 +1,7 @@
+2012-03-29 Arnold D. Robbins <address@hidden>
+
+ * config.h: Add definition for _Noreturn.
+
2012-03-28 Arnold D. Robbins <address@hidden>
* 4.0.1: Release tar ball made.
diff --git a/vms/vms-conf.h b/vms/vms-conf.h
index b81c20a..59abd5d 100644
--- a/vms/vms-conf.h
+++ b/vms/vms-conf.h
@@ -461,6 +461,18 @@
/* Define to 1 if on MINIX. */
#undef _MINIX
+/* The _Noreturn keyword of C11. */
+#ifndef _Noreturn
+# if (3 <= __GNUC__ || (__GNUC__ == 2 && 8 <= __GNUC_MINOR__) \
+ || 0x5110 <= __SUNPRO_C)
+# define _Noreturn __attribute__ ((__noreturn__))
+# elif defined _MSC_VER && 1200 <= _MSC_VER
+# define _Noreturn __declspec (noreturn)
+# else
+# define _Noreturn
+# endif
+#endif
+
/* Define to 2 if the system does not provide POSIX.1 features except with
this defined. */
#undef _POSIX_1_SOURCE
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 10 +
configh.in | 13 +
configure | 23 +-
configure.ac | 40 ++-
dfa.c | 1982 ++++++++++++++++++++++++++++----------------------------
dfa.h | 19 +-
pc/ChangeLog | 4 +
pc/config.h | 12 +
re.c | 2 +-
version.c | 2 +-
vms/ChangeLog | 4 +
vms/vms-conf.h | 12 +
12 files changed, 1119 insertions(+), 1004 deletions(-)
hooks/post-receive
--
gawk
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [gawk-diffs] [SCM] gawk branch, gawk-4.0-stable, updated. 19a22c41661c3b032c9cdcfd6913373526c025c8,
Arnold Robbins <=