gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-970


From: Arnold Robbins
Subject: [gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-970-g86423cd
Date: Tue, 23 Aug 2016 02:54:32 +0000 (UTC)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, gawk-4.1-stable has been updated
       via  86423cdaa93430b6389c7280e6cca621ecda182c (commit)
       via  20634ad7c920344eaff684419816ad523b2f4029 (commit)
       via  9b2cc2ba3a51f012d9006596811a867eb7846265 (commit)
      from  9346a8f73056487f614d81532c50b8703e3a3cf0 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=86423cdaa93430b6389c7280e6cca621ecda182c

commit 86423cdaa93430b6389c7280e6cca621ecda182c
Author: Arnold D. Robbins <address@hidden>
Date:   Tue Aug 23 05:54:13 2016 +0300

    Bump version for one last pre-release tarball.

diff --git a/configure b/configure
index 967b7e1..4b8feff 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for GNU Awk 4.1.3i.
+# Generated by GNU Autoconf 2.69 for GNU Awk 4.1.3j.
 #
 # Report bugs to <address@hidden>.
 #
@@ -580,8 +580,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='GNU Awk'
 PACKAGE_TARNAME='gawk'
-PACKAGE_VERSION='4.1.3i'
-PACKAGE_STRING='GNU Awk 4.1.3i'
+PACKAGE_VERSION='4.1.3j'
+PACKAGE_STRING='GNU Awk 4.1.3j'
 PACKAGE_BUGREPORT='address@hidden'
 PACKAGE_URL='http://www.gnu.org/software/gawk/'
 
@@ -1328,7 +1328,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures GNU Awk 4.1.3i to adapt to many kinds of systems.
+\`configure' configures GNU Awk 4.1.3j to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1398,7 +1398,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of GNU Awk 4.1.3i:";;
+     short | recursive ) echo "Configuration of GNU Awk 4.1.3j:";;
    esac
   cat <<\_ACEOF
 
@@ -1517,7 +1517,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-GNU Awk configure 4.1.3i
+GNU Awk configure 4.1.3j
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2226,7 +2226,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by GNU Awk $as_me 4.1.3i, which was
+It was created by GNU Awk $as_me 4.1.3j, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -3109,7 +3109,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='gawk'
- VERSION='4.1.3i'
+ VERSION='4.1.3j'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -12047,7 +12047,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by GNU Awk $as_me 4.1.3i, which was
+This file was extended by GNU Awk $as_me 4.1.3j, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -12115,7 +12115,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; 
s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-GNU Awk config.status 4.1.3i
+GNU Awk config.status 4.1.3j
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index 60cb04c..3e415c1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@ dnl
 
 dnl Process this file with autoconf to produce a configure script.
 
-AC_INIT([GNU Awk], 4.1.3i, address@hidden, gawk)
+AC_INIT([GNU Awk], 4.1.3j, address@hidden, gawk)
 
 # This is a hack. Different versions of install on different systems
 # are just too different. Chuck it and use install-sh.

http://git.sv.gnu.org/cgit/gawk.git/commit/?id=20634ad7c920344eaff684419816ad523b2f4029

commit 20634ad7c920344eaff684419816ad523b2f4029
Author: Arnold D. Robbins <address@hidden>
Date:   Tue Aug 23 05:53:44 2016 +0300

    Doc updates, add 'spell' target for make.

diff --git a/ChangeLog b/ChangeLog
index 356722d..c7dac31 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -7,6 +7,11 @@
        (resetup): Call dfa_init.
        * node.c (str2wstr): using_utf8 is now called dfa_using_utf8.
 
+       Unrelated:
+
+       * Makefile.am: Quote all uses of $(srcdir) and $(distdir).
+       (spell): New target.
+
 2016-08-18         Arnold D. Robbins     <address@hidden>
 
        * dfa.c: Sync with grep.
diff --git a/Makefile.am b/Makefile.am
index 4ddd8ff..8660c11 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -185,16 +185,16 @@ check-local: gawk$(EXEEXT)
 # A little extra clean up when making distributions.
 # And additional set up for the pc directory.
 dist-hook:
-       cd $(distdir)/extension ; rm -f *.o *.so
-       cd $(srcdir)/pc ; \
+       cd "$(distdir)"/extension ; rm -f *.o *.so
+       cd "$(srcdir)"/pc ; \
        chmod u+w config.h ; \
        sed -n -f configpk.sed < ../configure.ac > /tmp/tmp.sed ; \
        sed -f config.sed < ../configh.in > /tmp/config.tmp ; \
        sed -f /tmp/tmp.sed < /tmp/config.tmp > config.h ; \
        $(RM) /tmp/tmp.sed /tmp/config.tmp
        pwd
-       chmod u+w $(distdir)/pc/config.h
-       cp $(srcdir)/pc/config.h $(distdir)/pc/config.h
+       chmod u+w "$(distdir)"/pc/config.h
+       cp "$(srcdir)"/pc/config.h "$(distdir)"/pc/config.h
 
 # Special rules for individual files
 # Use of awk instead of $(AWK) is deliberate, in case gawk doesn't build
@@ -202,14 +202,14 @@ dist-hook:
 
 awkgram.c: awkgram.y
        $(YACC) $(AM_YFLAGS) $(YFLAGS) $<
-       sed 's/parse error/syntax error/g' < y.tab.c | awk -f 
$(srcdir)/bisonfix.awk awkgram > $*.c && rm y.tab.c
+       sed 's/parse error/syntax error/g' < y.tab.c | awk -f 
"$(srcdir)"/bisonfix.awk awkgram > $*.c && rm y.tab.c
        if test -f y.tab.h; then \
        if cmp -s y.tab.h $*.h; then rm -f y.tab.h; else mv y.tab.h $*.h; fi; \
        else :; fi
 
 command.c: command.y
        $(YACC) -p zz $<
-       sed 's/parse error/syntax error/g' < y.tab.c | awk -f 
$(srcdir)/bisonfix.awk command > $*.c && rm y.tab.c
+       sed 's/parse error/syntax error/g' < y.tab.c | awk -f 
"$(srcdir)"/bisonfix.awk command > $*.c && rm y.tab.c
 
 # This is for my development & testing.
 efence: gawk
@@ -227,3 +227,6 @@ valgrind-noleak:
        cd test; rm -f log.[0-9]*; \
        make check VALGRIND="valgrind --leak-check=no --log-file=log.%p"; \
        make valgrind-scan
+
+spell:
+       cd "$(srcdir)"/doc ; $(MAKE) spell
diff --git a/Makefile.in b/Makefile.in
index 7a2ccd3..afb9bdc 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1197,16 +1197,16 @@ check-local: gawk$(EXEEXT)
 # A little extra clean up when making distributions.
 # And additional set up for the pc directory.
 dist-hook:
-       cd $(distdir)/extension ; rm -f *.o *.so
-       cd $(srcdir)/pc ; \
+       cd "$(distdir)"/extension ; rm -f *.o *.so
+       cd "$(srcdir)"/pc ; \
        chmod u+w config.h ; \
        sed -n -f configpk.sed < ../configure.ac > /tmp/tmp.sed ; \
        sed -f config.sed < ../configh.in > /tmp/config.tmp ; \
        sed -f /tmp/tmp.sed < /tmp/config.tmp > config.h ; \
        $(RM) /tmp/tmp.sed /tmp/config.tmp
        pwd
-       chmod u+w $(distdir)/pc/config.h
-       cp $(srcdir)/pc/config.h $(distdir)/pc/config.h
+       chmod u+w "$(distdir)"/pc/config.h
+       cp "$(srcdir)"/pc/config.h "$(distdir)"/pc/config.h
 
 # Special rules for individual files
 # Use of awk instead of $(AWK) is deliberate, in case gawk doesn't build
@@ -1214,14 +1214,14 @@ dist-hook:
 
 awkgram.c: awkgram.y
        $(YACC) $(AM_YFLAGS) $(YFLAGS) $<
-       sed 's/parse error/syntax error/g' < y.tab.c | awk -f 
$(srcdir)/bisonfix.awk awkgram > $*.c && rm y.tab.c
+       sed 's/parse error/syntax error/g' < y.tab.c | awk -f 
"$(srcdir)"/bisonfix.awk awkgram > $*.c && rm y.tab.c
        if test -f y.tab.h; then \
        if cmp -s y.tab.h $*.h; then rm -f y.tab.h; else mv y.tab.h $*.h; fi; \
        else :; fi
 
 command.c: command.y
        $(YACC) -p zz $<
-       sed 's/parse error/syntax error/g' < y.tab.c | awk -f 
$(srcdir)/bisonfix.awk command > $*.c && rm y.tab.c
+       sed 's/parse error/syntax error/g' < y.tab.c | awk -f 
"$(srcdir)"/bisonfix.awk command > $*.c && rm y.tab.c
 
 # This is for my development & testing.
 efence: gawk
@@ -1240,6 +1240,9 @@ valgrind-noleak:
        make check VALGRIND="valgrind --leak-check=no --log-file=log.%p"; \
        make valgrind-scan
 
+spell:
+       cd "$(srcdir)"/doc ; $(MAKE) spell
+
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
 .NOEXPORT:
diff --git a/doc/ChangeLog b/doc/ChangeLog
index deb45a4..f518835 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,11 @@
+2016-08-23         Arnold D. Robbins     <address@hidden>
+
+       * Makefile.am (EXTRA_DIST): Add new file, wordlist.
+       (spell): New target.
+       * wordlist: New file.
+       * gawktexi.in: Fix typos, adjust update date.
+       * awkcard.in: Update copyright years.
+
 2016-08-01         Arnold D. Robbins     <address@hidden>
 
        * gawktexi.in: Mark DJGPP port as unsupported.
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 5c585f1..bda97de 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -47,6 +47,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block 
setter.outline \
        lflashlight-small.xpic lflashlight.eps lflashlight.pdf \
        rflashlight-small.xpic rflashlight.eps rflashlight.pdf \
        statist.jpg statist.eps statist.pdf \
+       wordlist \
        bc_notes
 
 # Get rid of generated files when cleaning
@@ -109,3 +110,7 @@ awkcard.nc: $(CARDFILES)
 
 awkcard.pdf: awkcard.ps
        ps2pdf awkcard.ps awkcard.pdf
+
+spell:
+       export LC_ALL=C ; spell "$(srcdir)"/gawktexi.in | \
+       sort -u | comm -23 - "$(srcdir)"/wordlist
diff --git a/doc/Makefile.in b/doc/Makefile.in
index 577b477..96103d7 100644
--- a/doc/Makefile.in
+++ b/doc/Makefile.in
@@ -373,6 +373,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block 
setter.outline \
        lflashlight-small.xpic lflashlight.eps lflashlight.pdf \
        rflashlight-small.xpic rflashlight.eps rflashlight.pdf \
        statist.jpg statist.eps statist.pdf \
+       wordlist \
        bc_notes
 
 
@@ -915,6 +916,10 @@ awkcard.nc: $(CARDFILES)
 awkcard.pdf: awkcard.ps
        ps2pdf awkcard.ps awkcard.pdf
 
+spell:
+       export LC_ALL=C ; spell "$(srcdir)"/gawktexi.in | \
+       sort -u | comm -23 - "$(srcdir)"/wordlist
+
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
 .NOEXPORT:
diff --git a/doc/awkcard.in b/doc/awkcard.in
index e64f394..34648be 100644
--- a/doc/awkcard.in
+++ b/doc/awkcard.in
@@ -1,7 +1,7 @@
 .\" AWK Reference Card --- Arnold Robbins, address@hidden
 .\"
 .\" Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-.\" 2003, 2004, 2005, 2007, 2009, 2010, 2011, 2012, 2013, 2014, 2015
+.\" 2003, 2004, 2005, 2007, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016
 .\" Free Software Foundation, Inc.
 .\" 
 .\" Permission is granted to make and distribute verbatim copies of
@@ -100,7 +100,7 @@ Brian Kernighan and Michael Brennan who reviewed it.
 \*(CD
 .SL
 .nf
-\*(FRCopyright \(co 1996\(en2005, 2007, 2009\(en2014
+\*(FRCopyright \(co 1996\(en2005, 2007, 2009\(en2016
 Free Software Foundation, Inc.
 .nf
 .BT
@@ -1970,7 +1970,7 @@ maintains it.\*(CX
 .ES
 .fi
 \*(CDCopyright \(co 1996\(en2005,
-2007, 2009\(en2014 Free Software Foundation, Inc.
+2007, 2009\(en2016 Free Software Foundation, Inc.
 .sp .5   
 Permission is granted to make and distribute verbatim copies of this
 reference card provided the copyright notice and this permission notice
diff --git a/doc/gawk.info b/doc/gawk.info
index da03f15..6dcbfc3 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -12896,7 +12896,7 @@ would see the latter (undesirable) output.
 terminal device.  On modern systems, this means your keyboard and
 screen.
 
-   (2) In private correspondance, Dr. Kernighan has indicated to me that
+   (2) In private correspondence, Dr. Kernighan has indicated to me that
 the way this was done was probably a mistake.
 
 
@@ -15313,7 +15313,7 @@ anyway, because 'gawk' goes to the next file as soon as 
an 'ENDFILE'
 rule finishes!)
 
    You need to be careful calling 'rewind()'.  You can end up causing
-infinite recursion if you don't pay attenion.  Here is an example use:
+infinite recursion if you don't pay attention.  Here is an example use:
 
      $ cat data
      -| a
@@ -30245,7 +30245,7 @@ Reference Counts
      assumed by a variable is used in more than one place, only one copy
      of the value itself is kept, and the associated reference count is
      increased when the same value is used by an additional variable,
-     and decresed when the related variable is no longer in use.  When
+     and decreased when the related variable is no longer in use.  When
      the reference count goes to zero, the memory space used to store
      the value of the variable is freed.
 
@@ -32786,13 +32786,13 @@ Index
 * elements of arrays:                    Reference to Elements.
                                                               (line   6)
 * email address for bug reports, address@hidden: Bugs.      (line  30)
-* EMISTERED:                             TCP/IP Networking.   (line   6)
 * empty array elements:                  Reference to Elements.
                                                               (line  18)
 * empty pattern:                         Empty.               (line   6)
 * empty strings:                         awk split records.   (line 114)
 * empty strings, See null strings:       Regexp Field Splitting.
                                                               (line  43)
+* EMRED:                                 TCP/IP Networking.   (line   6)
 * enable breakpoint:                     Breakpoint Control.  (line  73)
 * enable debugger command:               Breakpoint Control.  (line  73)
 * end debugger command:                  Debugger Execution Control.
@@ -35006,270 +35006,270 @@ Node: Shell Quoting631858
 Node: Data File Management633259
 Node: Filetrans Function633891
 Node: Rewind Function637987
-Node: File Checking639892
-Ref: File Checking-Footnote-1641226
-Node: Empty Files641427
-Node: Ignoring Assigns643406
-Node: Getopt Function644956
-Ref: Getopt Function-Footnote-1656425
-Node: Passwd Functions656625
-Ref: Passwd Functions-Footnote-1665464
-Node: Group Functions665552
-Ref: Group Functions-Footnote-1673449
-Node: Walking Arrays673656
-Node: Library Functions Summary676664
-Node: Library Exercises678070
-Node: Sample Programs678535
-Node: Running Examples679305
-Node: Clones680033
-Node: Cut Program681257
-Node: Egrep Program691186
-Ref: Egrep Program-Footnote-1698698
-Node: Id Program698808
-Node: Split Program702488
-Ref: Split Program-Footnote-1705947
-Node: Tee Program706076
-Node: Uniq Program708866
-Node: Wc Program716292
-Ref: Wc Program-Footnote-1720547
-Node: Miscellaneous Programs720641
-Node: Dupword Program721854
-Node: Alarm Program723884
-Node: Translate Program728739
-Ref: Translate Program-Footnote-1733304
-Node: Labels Program733574
-Ref: Labels Program-Footnote-1736925
-Node: Word Sorting737009
-Node: History Sorting741081
-Node: Extract Program742916
-Node: Simple Sed750445
-Node: Igawk Program753519
-Ref: Igawk Program-Footnote-1767850
-Ref: Igawk Program-Footnote-2768052
-Ref: Igawk Program-Footnote-3768174
-Node: Anagram Program768289
-Node: Signature Program771351
-Node: Programs Summary772598
-Node: Programs Exercises773812
-Ref: Programs Exercises-Footnote-1777941
-Node: Advanced Features778032
-Node: Nondecimal Data780022
-Node: Array Sorting781613
-Node: Controlling Array Traversal782313
-Ref: Controlling Array Traversal-Footnote-1790680
-Node: Array Sorting Functions790798
-Ref: Array Sorting Functions-Footnote-1795889
-Node: Two-way I/O796085
-Ref: Two-way I/O-Footnote-1802379
-Ref: Two-way I/O-Footnote-2802566
-Node: TCP/IP Networking802648
-Node: Profiling805766
-Node: Advanced Features Summary813305
-Node: Internationalization815241
-Node: I18N and L10N816721
-Node: Explaining gettext817408
-Ref: Explaining gettext-Footnote-1823300
-Ref: Explaining gettext-Footnote-2823485
-Node: Programmer i18n823650
-Ref: Programmer i18n-Footnote-1828505
-Node: Translator i18n828554
-Node: String Extraction829348
-Ref: String Extraction-Footnote-1830480
-Node: Printf Ordering830566
-Ref: Printf Ordering-Footnote-1833352
-Node: I18N Portability833416
-Ref: I18N Portability-Footnote-1835872
-Node: I18N Example835935
-Ref: I18N Example-Footnote-1838741
-Node: Gawk I18N838814
-Node: I18N Summary839459
-Node: Debugger840800
-Node: Debugging841822
-Node: Debugging Concepts842263
-Node: Debugging Terms844072
-Node: Awk Debugging846647
-Node: Sample Debugging Session847553
-Node: Debugger Invocation848087
-Node: Finding The Bug849473
-Node: List of Debugger Commands855951
-Node: Breakpoint Control857284
-Node: Debugger Execution Control860978
-Node: Viewing And Changing Data864340
-Node: Execution Stack867714
-Node: Debugger Info869351
-Node: Miscellaneous Debugger Commands873422
-Node: Readline Support878510
-Node: Limitations879406
-Node: Debugging Summary881515
-Node: Arbitrary Precision Arithmetic882688
-Node: Computer Arithmetic884104
-Ref: table-numeric-ranges887695
-Ref: Computer Arithmetic-Footnote-1888417
-Node: Math Definitions888474
-Ref: table-ieee-formats891788
-Ref: Math Definitions-Footnote-1892391
-Node: MPFR features892496
-Node: FP Math Caution894213
-Ref: FP Math Caution-Footnote-1895285
-Node: Inexactness of computations895654
-Node: Inexact representation896614
-Node: Comparing FP Values897974
-Node: Errors accumulate899056
-Node: Getting Accuracy900489
-Node: Try To Round903199
-Node: Setting precision904098
-Ref: table-predefined-precision-strings904795
-Node: Setting the rounding mode906625
-Ref: table-gawk-rounding-modes906999
-Ref: Setting the rounding mode-Footnote-1910407
-Node: Arbitrary Precision Integers910586
-Ref: Arbitrary Precision Integers-Footnote-1913570
-Node: POSIX Floating Point Problems913719
-Ref: POSIX Floating Point Problems-Footnote-1917601
-Node: Floating point summary917639
-Node: Dynamic Extensions919829
-Node: Extension Intro921382
-Node: Plugin License922648
-Node: Extension Mechanism Outline923445
-Ref: figure-load-extension923884
-Ref: figure-register-new-function925449
-Ref: figure-call-new-function926541
-Node: Extension API Description928603
-Node: Extension API Functions Introduction930051
-Node: General Data Types934863
-Ref: General Data Types-Footnote-1940818
-Node: Memory Allocation Functions941117
-Ref: Memory Allocation Functions-Footnote-1943962
-Node: Constructor Functions944061
-Node: Registration Functions945806
-Node: Extension Functions946491
-Node: Exit Callback Functions948790
-Node: Extension Version String950040
-Node: Input Parsers950703
-Node: Output Wrappers960585
-Node: Two-way processors965097
-Node: Printing Messages967362
-Ref: Printing Messages-Footnote-1968436
-Node: Updating ERRNO968589
-Node: Requesting Values969328
-Ref: table-value-types-returned970065
-Node: Accessing Parameters970948
-Node: Symbol Table Access972183
-Node: Symbol table by name972695
-Node: Symbol table by cookie974716
-Ref: Symbol table by cookie-Footnote-1978868
-Node: Cached values978932
-Ref: Cached values-Footnote-1982439
-Node: Array Manipulation982530
-Ref: Array Manipulation-Footnote-1983629
-Node: Array Data Types983666
-Ref: Array Data Types-Footnote-1986324
-Node: Array Functions986416
-Node: Flattening Arrays990274
-Node: Creating Arrays997182
-Node: Extension API Variables1001951
-Node: Extension Versioning1002587
-Ref: gawk-api-version1003024
-Node: Extension API Informational Variables1004780
-Node: Extension API Boilerplate1005844
-Node: Finding Extensions1009658
-Node: Extension Example1010217
-Node: Internal File Description1011015
-Node: Internal File Ops1015095
-Ref: Internal File Ops-Footnote-11026857
-Node: Using Internal File Ops1026997
-Ref: Using Internal File Ops-Footnote-11029380
-Node: Extension Samples1029654
-Node: Extension Sample File Functions1031183
-Node: Extension Sample Fnmatch1038832
-Node: Extension Sample Fork1040319
-Node: Extension Sample Inplace1041537
-Node: Extension Sample Ord1044747
-Node: Extension Sample Readdir1045583
-Ref: table-readdir-file-types1046472
-Node: Extension Sample Revout1047277
-Node: Extension Sample Rev2way1047866
-Node: Extension Sample Read write array1048606
-Node: Extension Sample Readfile1050548
-Node: Extension Sample Time1051643
-Node: Extension Sample API Tests1052991
-Node: gawkextlib1053483
-Node: Extension summary1055907
-Node: Extension Exercises1059599
-Node: Language History1061097
-Node: V7/SVR3.11062753
-Node: SVR41064905
-Node: POSIX1066339
-Node: BTL1067718
-Node: POSIX/GNU1068447
-Node: Feature History1073968
-Node: Common Extensions1087297
-Node: Ranges and Locales1088580
-Ref: Ranges and Locales-Footnote-11093196
-Ref: Ranges and Locales-Footnote-21093223
-Ref: Ranges and Locales-Footnote-31093458
-Node: Contributors1093679
-Node: History summary1099239
-Node: Installation1100619
-Node: Gawk Distribution1101563
-Node: Getting1102047
-Node: Extracting1103008
-Node: Distribution contents1104646
-Node: Unix Installation1110397
-Node: Quick Installation1111013
-Node: Additional Configuration Options1113440
-Node: Configuration Philosophy1115244
-Node: Non-Unix Installation1117613
-Node: PC Installation1118071
-Node: PC Binary Installation1119391
-Node: PC Compiling1121243
-Ref: PC Compiling-Footnote-11124037
-Node: PC Testing1124146
-Node: PC Using1125326
-Ref: PC Using-Footnote-11129479
-Node: Cygwin1129552
-Node: MSYS1130322
-Node: VMS Installation1130823
-Node: VMS Compilation1131614
-Ref: VMS Compilation-Footnote-11132843
-Node: VMS Dynamic Extensions1132901
-Node: VMS Installation Details1134586
-Node: VMS Running1136839
-Node: VMS GNV1141118
-Node: VMS Old Gawk1141853
-Node: Bugs1142324
-Node: Other Versions1146639
-Node: Installation summary1153223
-Node: Notes1154274
-Node: Compatibility Mode1155139
-Node: Additions1155921
-Node: Accessing The Source1156846
-Node: Adding Code1158281
-Node: New Ports1164500
-Node: Derived Files1168988
-Ref: Derived Files-Footnote-11174473
-Ref: Derived Files-Footnote-21174508
-Ref: Derived Files-Footnote-31175106
-Node: Future Extensions1175220
-Node: Implementation Limitations1175878
-Node: Extension Design1177061
-Node: Old Extension Problems1178215
-Ref: Old Extension Problems-Footnote-11179733
-Node: Extension New Mechanism Goals1179790
-Ref: Extension New Mechanism Goals-Footnote-11183154
-Node: Extension Other Design Decisions1183343
-Node: Extension Future Growth1185456
-Node: Old Extension Mechanism1186292
-Node: Notes summary1188055
-Node: Basic Concepts1189237
-Node: Basic High Level1189918
-Ref: figure-general-flow1190200
-Ref: figure-process-flow1190885
-Ref: Basic High Level-Footnote-11194186
-Node: Basic Data Typing1194371
-Node: Glossary1197699
-Node: Copying1229645
-Node: GNU Free Documentation License1267184
-Node: Index1292302
+Node: File Checking639893
+Ref: File Checking-Footnote-1641227
+Node: Empty Files641428
+Node: Ignoring Assigns643407
+Node: Getopt Function644957
+Ref: Getopt Function-Footnote-1656426
+Node: Passwd Functions656626
+Ref: Passwd Functions-Footnote-1665465
+Node: Group Functions665553
+Ref: Group Functions-Footnote-1673450
+Node: Walking Arrays673657
+Node: Library Functions Summary676665
+Node: Library Exercises678071
+Node: Sample Programs678536
+Node: Running Examples679306
+Node: Clones680034
+Node: Cut Program681258
+Node: Egrep Program691187
+Ref: Egrep Program-Footnote-1698699
+Node: Id Program698809
+Node: Split Program702489
+Ref: Split Program-Footnote-1705948
+Node: Tee Program706077
+Node: Uniq Program708867
+Node: Wc Program716293
+Ref: Wc Program-Footnote-1720548
+Node: Miscellaneous Programs720642
+Node: Dupword Program721855
+Node: Alarm Program723885
+Node: Translate Program728740
+Ref: Translate Program-Footnote-1733305
+Node: Labels Program733575
+Ref: Labels Program-Footnote-1736926
+Node: Word Sorting737010
+Node: History Sorting741082
+Node: Extract Program742917
+Node: Simple Sed750446
+Node: Igawk Program753520
+Ref: Igawk Program-Footnote-1767851
+Ref: Igawk Program-Footnote-2768053
+Ref: Igawk Program-Footnote-3768175
+Node: Anagram Program768290
+Node: Signature Program771352
+Node: Programs Summary772599
+Node: Programs Exercises773813
+Ref: Programs Exercises-Footnote-1777942
+Node: Advanced Features778033
+Node: Nondecimal Data780023
+Node: Array Sorting781614
+Node: Controlling Array Traversal782314
+Ref: Controlling Array Traversal-Footnote-1790681
+Node: Array Sorting Functions790799
+Ref: Array Sorting Functions-Footnote-1795890
+Node: Two-way I/O796086
+Ref: Two-way I/O-Footnote-1802380
+Ref: Two-way I/O-Footnote-2802567
+Node: TCP/IP Networking802649
+Node: Profiling805767
+Node: Advanced Features Summary813306
+Node: Internationalization815242
+Node: I18N and L10N816722
+Node: Explaining gettext817409
+Ref: Explaining gettext-Footnote-1823301
+Ref: Explaining gettext-Footnote-2823486
+Node: Programmer i18n823651
+Ref: Programmer i18n-Footnote-1828506
+Node: Translator i18n828555
+Node: String Extraction829349
+Ref: String Extraction-Footnote-1830481
+Node: Printf Ordering830567
+Ref: Printf Ordering-Footnote-1833353
+Node: I18N Portability833417
+Ref: I18N Portability-Footnote-1835873
+Node: I18N Example835936
+Ref: I18N Example-Footnote-1838742
+Node: Gawk I18N838815
+Node: I18N Summary839460
+Node: Debugger840801
+Node: Debugging841823
+Node: Debugging Concepts842264
+Node: Debugging Terms844073
+Node: Awk Debugging846648
+Node: Sample Debugging Session847554
+Node: Debugger Invocation848088
+Node: Finding The Bug849474
+Node: List of Debugger Commands855952
+Node: Breakpoint Control857285
+Node: Debugger Execution Control860979
+Node: Viewing And Changing Data864341
+Node: Execution Stack867715
+Node: Debugger Info869352
+Node: Miscellaneous Debugger Commands873423
+Node: Readline Support878511
+Node: Limitations879407
+Node: Debugging Summary881516
+Node: Arbitrary Precision Arithmetic882689
+Node: Computer Arithmetic884105
+Ref: table-numeric-ranges887696
+Ref: Computer Arithmetic-Footnote-1888418
+Node: Math Definitions888475
+Ref: table-ieee-formats891789
+Ref: Math Definitions-Footnote-1892392
+Node: MPFR features892497
+Node: FP Math Caution894214
+Ref: FP Math Caution-Footnote-1895286
+Node: Inexactness of computations895655
+Node: Inexact representation896615
+Node: Comparing FP Values897975
+Node: Errors accumulate899057
+Node: Getting Accuracy900490
+Node: Try To Round903200
+Node: Setting precision904099
+Ref: table-predefined-precision-strings904796
+Node: Setting the rounding mode906626
+Ref: table-gawk-rounding-modes907000
+Ref: Setting the rounding mode-Footnote-1910408
+Node: Arbitrary Precision Integers910587
+Ref: Arbitrary Precision Integers-Footnote-1913571
+Node: POSIX Floating Point Problems913720
+Ref: POSIX Floating Point Problems-Footnote-1917602
+Node: Floating point summary917640
+Node: Dynamic Extensions919830
+Node: Extension Intro921383
+Node: Plugin License922649
+Node: Extension Mechanism Outline923446
+Ref: figure-load-extension923885
+Ref: figure-register-new-function925450
+Ref: figure-call-new-function926542
+Node: Extension API Description928604
+Node: Extension API Functions Introduction930052
+Node: General Data Types934864
+Ref: General Data Types-Footnote-1940819
+Node: Memory Allocation Functions941118
+Ref: Memory Allocation Functions-Footnote-1943963
+Node: Constructor Functions944062
+Node: Registration Functions945807
+Node: Extension Functions946492
+Node: Exit Callback Functions948791
+Node: Extension Version String950041
+Node: Input Parsers950704
+Node: Output Wrappers960586
+Node: Two-way processors965098
+Node: Printing Messages967363
+Ref: Printing Messages-Footnote-1968437
+Node: Updating ERRNO968590
+Node: Requesting Values969329
+Ref: table-value-types-returned970066
+Node: Accessing Parameters970949
+Node: Symbol Table Access972184
+Node: Symbol table by name972696
+Node: Symbol table by cookie974717
+Ref: Symbol table by cookie-Footnote-1978869
+Node: Cached values978933
+Ref: Cached values-Footnote-1982440
+Node: Array Manipulation982531
+Ref: Array Manipulation-Footnote-1983630
+Node: Array Data Types983667
+Ref: Array Data Types-Footnote-1986325
+Node: Array Functions986417
+Node: Flattening Arrays990275
+Node: Creating Arrays997183
+Node: Extension API Variables1001952
+Node: Extension Versioning1002588
+Ref: gawk-api-version1003025
+Node: Extension API Informational Variables1004781
+Node: Extension API Boilerplate1005845
+Node: Finding Extensions1009659
+Node: Extension Example1010218
+Node: Internal File Description1011016
+Node: Internal File Ops1015096
+Ref: Internal File Ops-Footnote-11026858
+Node: Using Internal File Ops1026998
+Ref: Using Internal File Ops-Footnote-11029381
+Node: Extension Samples1029655
+Node: Extension Sample File Functions1031184
+Node: Extension Sample Fnmatch1038833
+Node: Extension Sample Fork1040320
+Node: Extension Sample Inplace1041538
+Node: Extension Sample Ord1044748
+Node: Extension Sample Readdir1045584
+Ref: table-readdir-file-types1046473
+Node: Extension Sample Revout1047278
+Node: Extension Sample Rev2way1047867
+Node: Extension Sample Read write array1048607
+Node: Extension Sample Readfile1050549
+Node: Extension Sample Time1051644
+Node: Extension Sample API Tests1052992
+Node: gawkextlib1053484
+Node: Extension summary1055908
+Node: Extension Exercises1059600
+Node: Language History1061098
+Node: V7/SVR3.11062754
+Node: SVR41064906
+Node: POSIX1066340
+Node: BTL1067719
+Node: POSIX/GNU1068448
+Node: Feature History1073969
+Node: Common Extensions1087298
+Node: Ranges and Locales1088581
+Ref: Ranges and Locales-Footnote-11093197
+Ref: Ranges and Locales-Footnote-21093224
+Ref: Ranges and Locales-Footnote-31093459
+Node: Contributors1093680
+Node: History summary1099240
+Node: Installation1100620
+Node: Gawk Distribution1101564
+Node: Getting1102048
+Node: Extracting1103009
+Node: Distribution contents1104647
+Node: Unix Installation1110398
+Node: Quick Installation1111014
+Node: Additional Configuration Options1113441
+Node: Configuration Philosophy1115245
+Node: Non-Unix Installation1117614
+Node: PC Installation1118072
+Node: PC Binary Installation1119392
+Node: PC Compiling1121244
+Ref: PC Compiling-Footnote-11124038
+Node: PC Testing1124147
+Node: PC Using1125327
+Ref: PC Using-Footnote-11129480
+Node: Cygwin1129553
+Node: MSYS1130323
+Node: VMS Installation1130824
+Node: VMS Compilation1131615
+Ref: VMS Compilation-Footnote-11132844
+Node: VMS Dynamic Extensions1132902
+Node: VMS Installation Details1134587
+Node: VMS Running1136840
+Node: VMS GNV1141119
+Node: VMS Old Gawk1141854
+Node: Bugs1142325
+Node: Other Versions1146640
+Node: Installation summary1153224
+Node: Notes1154275
+Node: Compatibility Mode1155140
+Node: Additions1155922
+Node: Accessing The Source1156847
+Node: Adding Code1158282
+Node: New Ports1164501
+Node: Derived Files1168989
+Ref: Derived Files-Footnote-11174474
+Ref: Derived Files-Footnote-21174509
+Ref: Derived Files-Footnote-31175107
+Node: Future Extensions1175221
+Node: Implementation Limitations1175879
+Node: Extension Design1177062
+Node: Old Extension Problems1178216
+Ref: Old Extension Problems-Footnote-11179734
+Node: Extension New Mechanism Goals1179791
+Ref: Extension New Mechanism Goals-Footnote-11183155
+Node: Extension Other Design Decisions1183344
+Node: Extension Future Growth1185457
+Node: Old Extension Mechanism1186293
+Node: Notes summary1188056
+Node: Basic Concepts1189238
+Node: Basic High Level1189919
+Ref: figure-general-flow1190201
+Ref: figure-process-flow1190886
+Ref: Basic High Level-Footnote-11194187
+Node: Basic Data Typing1194372
+Node: Glossary1197700
+Node: Copying1229647
+Node: GNU Free Documentation License1267186
+Node: Index1292304
 
 End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 99b77fa..acf57e5 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -56,7 +56,7 @@
 @c applies to and all the info about who's publishing this edition
 
 @c These apply across the board.
address@hidden UPDATE-MONTH June, 2016
address@hidden UPDATE-MONTH August, 2016
 @set VERSION 4.1
 @set PATCHLEVEL 4
 
@@ -18415,7 +18415,7 @@ signal (bit 7) and if so, the guilty signal number 
(bits 0--6).
 Traditionally, @command{awk}'s @code{system()} function has simply
 returned the exit status value divided by 256. In the normal case this
 gives the exit status but in the case of death-by-signal it yields
-a fractional floating-point address@hidden private correspondance,
+a fractional floating-point address@hidden private correspondence,
 Dr.@: Kernighan has indicated to me that the way this was done
 was probably a mistake.} POSIX states that @command{awk}'s
 @code{system()} should return the full 16-bit value.
@@ -21718,7 +21718,7 @@ Because of this, you should not call it from an 
@code{ENDFILE} rule.
 file as soon as an @code{ENDFILE} rule finishes!)
 
 You need to be careful calling @code{rewind()}.  You can end up
-causing infinite recursion if you don't pay attenion. Here is an
+causing infinite recursion if you don't pay attention. Here is an
 example use:
 
 @example
@@ -27593,7 +27593,7 @@ programming and knowledge of the behavior of the 
coprocess are required.
 @cindex files, @code{/inet4/@dots{}} (@command{gawk})
 @cindex @code{/inet6/@dots{}} special files (@command{gawk})
 @cindex files, @code{/inet6/@dots{}} (@command{gawk})
address@hidden @code{EMISTERED}
address@hidden @code{EMRED}
 @ifnotdocbook
 @quotation
 @code{EMRED}:@*
@@ -27608,7 +27608,7 @@ programming and knowledge of the behavior of the 
coprocess are required.
 @docbook
 <blockquote>
 <attribution>Mike O'Brien (aka Mr.&nbsp;Protocol)</attribution>
-<literallayout class="normal"><literal>EMISTERED</literal>:
+<literallayout class="normal"><literal>EMRED</literal>:
 &nbsp;&nbsp;&nbsp;&nbsp;<emphasis>A host is a host from coast to 
coast,</emphasis>
 &nbsp;&nbsp;&nbsp;&nbsp;<emphasis>and no-one can talk to host that's 
close,</emphasis>
 &nbsp;&nbsp;&nbsp;&nbsp;<emphasis>unless the host that isn't close</emphasis>
@@ -40719,7 +40719,7 @@ An internal mechanism in @command{gawk} to minimize the 
amount of memory
 needed to store the value of string variables. If the value assumed by
 a variable is used in more than one place, only one copy of the value
 itself is kept, and the associated reference count is increased when the
-same value is used by an additional variable, and decresed when the related
+same value is used by an additional variable, and decreased when the related
 variable is no longer in use. When the reference count goes to zero,
 the memory space used to store the value of the variable is freed.
 
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index 83ee9a1..2c94c30 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -51,7 +51,7 @@
 @c applies to and all the info about who's publishing this edition
 
 @c These apply across the board.
address@hidden UPDATE-MONTH June, 2016
address@hidden UPDATE-MONTH August, 2016
 @set VERSION 4.1
 @set PATCHLEVEL 4
 
@@ -17607,7 +17607,7 @@ signal (bit 7) and if so, the guilty signal number 
(bits 0--6).
 Traditionally, @command{awk}'s @code{system()} function has simply
 returned the exit status value divided by 256. In the normal case this
 gives the exit status but in the case of death-by-signal it yields
-a fractional floating-point address@hidden private correspondance,
+a fractional floating-point address@hidden private correspondence,
 Dr.@: Kernighan has indicated to me that the way this was done
 was probably a mistake.} POSIX states that @command{awk}'s
 @code{system()} should return the full 16-bit value.
@@ -20809,7 +20809,7 @@ Because of this, you should not call it from an 
@code{ENDFILE} rule.
 file as soon as an @code{ENDFILE} rule finishes!)
 
 You need to be careful calling @code{rewind()}.  You can end up
-causing infinite recursion if you don't pay attenion. Here is an
+causing infinite recursion if you don't pay attention. Here is an
 example use:
 
 @example
@@ -26684,7 +26684,7 @@ programming and knowledge of the behavior of the 
coprocess are required.
 @cindex files, @code{/inet4/@dots{}} (@command{gawk})
 @cindex @code{/inet6/@dots{}} special files (@command{gawk})
 @cindex files, @code{/inet6/@dots{}} (@command{gawk})
address@hidden @code{EMISTERED}
address@hidden @code{EMRED}
 @ifnotdocbook
 @quotation
 @code{EMRED}:@*
@@ -26699,7 +26699,7 @@ programming and knowledge of the behavior of the 
coprocess are required.
 @docbook
 <blockquote>
 <attribution>Mike O'Brien (aka Mr.&nbsp;Protocol)</attribution>
-<literallayout class="normal"><literal>EMISTERED</literal>:
+<literallayout class="normal"><literal>EMRED</literal>:
 &nbsp;&nbsp;&nbsp;&nbsp;<emphasis>A host is a host from coast to 
coast,</emphasis>
 &nbsp;&nbsp;&nbsp;&nbsp;<emphasis>and no-one can talk to host that's 
close,</emphasis>
 &nbsp;&nbsp;&nbsp;&nbsp;<emphasis>unless the host that isn't close</emphasis>
@@ -39810,7 +39810,7 @@ An internal mechanism in @command{gawk} to minimize the 
amount of memory
 needed to store the value of string variables. If the value assumed by
 a variable is used in more than one place, only one copy of the value
 itself is kept, and the associated reference count is increased when the
-same value is used by an additional variable, and decresed when the related
+same value is used by an additional variable, and decreased when the related
 variable is no longer in use. When the reference count goes to zero,
 the memory space used to store the value of the variable is freed.
 

http://git.sv.gnu.org/cgit/gawk.git/commit/?id=9b2cc2ba3a51f012d9006596811a867eb7846265

commit 9b2cc2ba3a51f012d9006596811a867eb7846265
Author: Arnold D. Robbins <address@hidden>
Date:   Tue Aug 23 05:48:38 2016 +0300

    Update dfa, including API changes.

diff --git a/ChangeLog b/ChangeLog
index 72e3abe..356722d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2016-08-23         Arnold D. Robbins     <address@hidden>
+
+       * dfa.h: Sync with grep. API changes.
+       * dfa.c: Sync with grep.
+       * re.c (make_regexp): Adjust for API changes, move call to dfasyntax
+       into stanza that compiles the regex.
+       (resetup): Call dfa_init.
+       * node.c (str2wstr): using_utf8 is now called dfa_using_utf8.
+
 2016-08-18         Arnold D. Robbins     <address@hidden>
 
        * dfa.c: Sync with grep.
diff --git a/dfa.c b/dfa.c
index aeb38df..4d1e1ab 100644
--- a/dfa.c
+++ b/dfa.c
@@ -363,14 +363,85 @@ struct mb_char_classes
   size_t nchars;
 };
 
+struct regex_syntax
+{
+  /* Syntax bits controlling the behavior of the lexical analyzer.  */
+  reg_syntax_t syntax_bits;
+  bool syntax_bits_set;
+
+  /* Flag for case-folding letters into sets.  */
+  bool case_fold;
+
+  /* End-of-line byte in data.  */
+  unsigned char eolbyte;
+
+  /* Cache of char-context values.  */
+  int sbit[NOTCHAR];
+
+  /* If never_trail[B], the byte B cannot be a non-initial byte in a
+     multibyte character.  */
+  bool never_trail[NOTCHAR];
+
+  /* Set of characters considered letters.  */
+  charclass letters;
+
+  /* Set of characters that are newline.  */
+  charclass newline;
+};
+
+/* Lexical analyzer.  All the dross that deals with the obnoxious
+   GNU Regex syntax bits is located here.  The poor, suffering
+   reader is referred to the GNU Regex documentation for the
+   meaning of the @address@hidden@ syntax bits.  */
+struct lexer_state
+{
+  char const *lexptr;  /* Pointer to next input character.  */
+  size_t lexleft;      /* Number of characters remaining.  */
+  token lasttok;       /* Previous token returned; initially END.  */
+  size_t parens;       /* Count of outstanding left parens.  */
+  int minrep, maxrep;  /* Repeat counts for {m,n}.  */
+
+  /* Wide character representation of the current multibyte character,
+     or WEOF if there was an encoding error.  Used only if
+     MB_CUR_MAX > 1.  */
+  wint_t wctok;
+
+  /* Length of the multibyte representation of wctok.  */
+  int cur_mb_len;
+
+  /* We're separated from beginning or (, | only by zero-width characters.  */
+  bool laststart;
+};
+
+/* Recursive descent parser for regular expressions.  */
+
+struct parser_state
+{
+  token tok;               /* Lookahead token.  */
+  size_t depth;            /* Current depth of a hypothetical stack
+                              holding deferred productions.  This is
+                              used to determine the depth that will be
+                              required of the real stack later on in
+                              dfaanalyze.  */
+};
+
 /* A compiled regular expression.  */
 struct dfa
 {
+  /* Syntax configuration */
+  struct regex_syntax syntax;
+
   /* Fields filled by the scanner.  */
   charclass *charclasses;       /* Array of character sets for CSET tokens.  */
   size_t cindex;                /* Index for adding new charclasses.  */
   size_t calloc;                /* Number of charclasses allocated.  */
 
+  /* Scanner state */
+  struct lexer_state lexstate;
+
+  /* Parser state */
+  struct parser_state parsestate;
+
   /* Fields filled by the parser.  */
   token *tokens;                /* Postfix parse array.  */
   size_t tindex;                /* Index for adding new tokens.  */
@@ -486,7 +557,7 @@ struct dfa
 #define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \
   SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr)
 
-static void regexp (void);
+static void regexp (struct dfa *dfa);
 
 /* A table indexed by byte values that contains the corresponding wide
    character (if any) for that byte.  WEOF means the byte is not a
@@ -705,39 +776,6 @@ dfa_charclass_index (struct dfa *d, charclass const s)
   return i;
 }
 
-/* A pointer to the current dfa is kept here during parsing.  */
-static struct dfa *dfa;
-
-/* Find the index of charclass S in the current DFA, or allocate a new one.  */
-static size_t
-charclass_index (charclass const s)
-{
-  return dfa_charclass_index (dfa, s);
-}
-
-/* Syntax bits controlling the behavior of the lexical analyzer.  */
-static reg_syntax_t syntax_bits;
-static bool syntax_bits_set;
-
-/* Flag for case-folding letters into sets.  */
-static bool case_fold;
-
-/* End-of-line byte in data.  */
-static unsigned char eolbyte;
-
-/* Cache of char-context values.  */
-static int sbit[NOTCHAR];
-
-/* If never_trail[B], the byte B cannot be a non-initial byte in a
-   multibyte character.  */
-static bool never_trail[NOTCHAR];
-
-/* Set of characters considered letters.  */
-static charclass letters;
-
-/* Set of characters that are newline.  */
-static charclass newline;
-
 static bool
 unibyte_word_constituent (unsigned char c)
 {
@@ -745,25 +783,29 @@ unibyte_word_constituent (unsigned char c)
 }
 
 static int
-char_context (unsigned char c)
+char_context (struct dfa const *dfa, unsigned char c)
 {
-  if (c == eolbyte)
+  if (c == dfa->syntax.eolbyte)
     return CTX_NEWLINE;
   if (unibyte_word_constituent (c))
     return CTX_LETTER;
   return CTX_NONE;
 }
 
-/* Entry point to set syntax options.  */
-void
-dfasyntax (reg_syntax_t bits, bool fold, unsigned char eol)
+/* UTF-8 encoding allows some optimizations that we can't otherwise
+   assume in a multibyte encoding.  */
+static bool using_utf8;
+
+bool
+dfa_using_utf8 (void)
 {
-  int i;
-  syntax_bits_set = true;
-  syntax_bits = bits;
-  case_fold = fold;
-  eolbyte = eol;
+  return using_utf8;
+}
 
+static void
+init_mbrtowc_cache (void)
+{
+  int i;
   for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
     {
       char c = i;
@@ -771,23 +813,39 @@ dfasyntax (reg_syntax_t bits, bool fold, unsigned char 
eol)
       mbstate_t s = { 0 };
       wchar_t wc;
       mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF;
+    }
+}
+
+/* Entry point to set syntax options.  */
+void
+dfasyntax (struct dfa *dfa, reg_syntax_t bits, bool fold, unsigned char eol)
+{
+  int i;
+  dfa->syntax.syntax_bits_set = true;
+  dfa->syntax.syntax_bits = bits;
+  dfa->syntax.case_fold = fold;
+  dfa->syntax.eolbyte = eol;
 
-      /* Now that mbrtowc_cache[uc] is set, use it to calculate sbit.  */
-      sbit[uc] = char_context (uc);
-      switch (sbit[uc])
+  for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
+    {
+      unsigned char uc = i;
+
+      /* Use mbrtowc_cache to calculate sbit.  */
+      dfa->syntax.sbit[uc] = char_context (dfa, uc);
+      switch (dfa->syntax.sbit[uc])
         {
         case CTX_LETTER:
-          setbit (uc, letters);
+          setbit (uc, dfa->syntax.letters);
           break;
         case CTX_NEWLINE:
-          setbit (uc, newline);
+          setbit (uc, dfa->syntax.newline);
           break;
         }
 
       /* POSIX requires that the five bytes in "\n\r./" (including the
          terminating NUL) cannot occur inside a multibyte character.  */
-      never_trail[uc] = (using_utf8 () ? (uc & 0xc0) != 0x80
-                         : strchr ("\n\r./", uc) != NULL);
+      dfa->syntax.never_trail[uc] = (using_utf8 ? (uc & 0xc0) != 0x80
+                                     : strchr ("\n\r./", uc) != NULL);
     }
 }
 
@@ -819,25 +877,21 @@ setbit_case_fold_c (int b, charclass c)
       setbit (i, c);
 }
 
+static void check_utf8 (void)
+{
+  wchar_t wc;
+  mbstate_t mbs = { 0 };
+  using_utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
+}
 
+static bool unibyte_c;
 
-/* UTF-8 encoding allows some optimizations that we can't otherwise
-   assume in a multibyte encoding.  */
-bool
-using_utf8 (void)
+static void check_unibyte_c (void)
 {
-  static int utf8 = -1;
-  if (utf8 < 0)
-    {
-      wchar_t wc;
-      mbstate_t mbs = { 0 };
-      utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
-#ifdef LIBC_IS_BORKED
-      if (gawk_mb_cur_max == 1)
-         utf8 = 0;
-#endif
-    }
-  return utf8;
+  char const *locale = setlocale (LC_ALL, NULL);
+  unibyte_c = (!locale
+               || STREQ (locale, "C")
+               || STREQ (locale, "POSIX"));
 }
 
 /* The current locale is known to be a unibyte locale
@@ -846,7 +900,7 @@ using_utf8 (void)
    processed more efficiently.  */
 
 static bool
-using_simple_locale (void)
+using_simple_locale (struct dfa const *dfa)
 {
   /* The native character set is known to be compatible with
      the C locale.  The following test isn't perfect, but it's good
@@ -864,44 +918,9 @@ using_simple_locale (void)
      && '}' == 125 && '~' == 126)
   };
 
-  if (! native_c_charset || dfa->multibyte)
-    return false;
-  else
-    {
-      static int unibyte_c = -1;
-      if (unibyte_c < 0)
-        {
-          char const *locale = setlocale (LC_ALL, NULL);
-          unibyte_c = (!locale
-                       || STREQ (locale, "C")
-                       || STREQ (locale, "POSIX"));
-        }
-      return unibyte_c;
-    }
+  return (!native_c_charset || dfa->multibyte) ? false : unibyte_c;
 }
 
-/* Lexical analyzer.  All the dross that deals with the obnoxious
-   GNU Regex syntax bits is located here.  The poor, suffering
-   reader is referred to the GNU Regex documentation for the
-   meaning of the @address@hidden@ syntax bits.  */
-
-static char const *lexptr;      /* Pointer to next input character.  */
-static size_t lexleft;          /* Number of characters remaining.  */
-static token lasttok;           /* Previous token returned; initially END.  */
-static bool laststart;         /* We're separated from beginning or (,
-                                   | only by zero-width characters.  */
-static size_t parens;           /* Count of outstanding left parens.  */
-static int minrep, maxrep;      /* Repeat counts for {m,n}.  */
-
-static int cur_mb_len = 1;      /* Length of the multibyte representation of
-                                   wctok.  */
-
-static wint_t wctok;           /* Wide character representation of the current
-                                   multibyte character, or WEOF if there was
-                                   an encoding error.  Used only if
-                                   MB_CUR_MAX > 1.  */
-
-
 /* Fetch the next lexical input character.  Set C (of type int) to the
    next input byte, except set C to EOF if the input is a multibyte
    character of length greater than 1.  Set WC (of type wint_t) to the
@@ -909,24 +928,25 @@ static wint_t wctok;              /* Wide character 
representation of the current
    of length 1); otherwise set WC to WEOF.  If there is no more input,
    report EOFERR if EOFERR is not null, and return lasttok = END
    otherwise.  */
-# define FETCH_WC(c, wc, eoferr)               \
+# define FETCH_WC(dfa, c, wc, eoferr)          \
   do {                                         \
-    if (! lexleft)                             \
+    if (! dfa->lexstate.lexleft)               \
       {                                                \
         if ((eoferr) != 0)                     \
           dfaerror (eoferr);                   \
         else                                   \
-          return lasttok = END;                        \
+          return dfa->lexstate.lasttok = END;  \
       }                                                \
     else                                       \
       {                                                \
         wint_t _wc;                            \
-        size_t nbytes = mbs_to_wchar (&_wc, lexptr, lexleft, dfa); \
-        cur_mb_len = nbytes;                   \
+        size_t nbytes = mbs_to_wchar (&_wc, dfa->lexstate.lexptr, \
+                                      dfa->lexstate.lexleft, dfa); \
+        dfa->lexstate.cur_mb_len = nbytes;     \
         (wc) = _wc;                            \
-        (c) = nbytes == 1 ? to_uchar (*lexptr) : EOF;    \
-        lexptr += nbytes;                      \
-        lexleft -= nbytes;                     \
+        (c) = nbytes == 1 ? to_uchar (*dfa->lexstate.lexptr) : EOF; \
+        dfa->lexstate.lexptr += nbytes;                \
+        dfa->lexstate.lexleft -= nbytes;       \
       }                                                \
   } while (false)
 
@@ -1023,7 +1043,7 @@ find_pred (const char *str)
 /* Multibyte character handling sub-routine for lex.
    Parse a bracket expression and build a struct mb_char_classes.  */
 static token
-parse_bracket_exp (void)
+parse_bracket_exp (struct dfa *dfa)
 {
   bool invert;
   int c, c1, c2;
@@ -1067,12 +1087,12 @@ parse_bracket_exp (void)
     work_mbc = NULL;
 
   memset (ccl, 0, sizeof ccl);
-  FETCH_WC (c, wc, _("unbalanced ["));
+  FETCH_WC (dfa, c, wc, _("unbalanced ["));
   if (c == '^')
     {
-      FETCH_WC (c, wc, _("unbalanced ["));
+      FETCH_WC (dfa, c, wc, _("unbalanced ["));
       invert = true;
-      known_bracket_exp = using_simple_locale ();
+      known_bracket_exp = using_simple_locale (dfa);
     }
   else
     invert = false;
@@ -1089,9 +1109,9 @@ parse_bracket_exp (void)
          dfa is ever called.  */
       if (c == '[')
         {
-          FETCH_WC (c1, wc1, _("unbalanced ["));
+          FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
 
-          if ((c1 == ':' && (syntax_bits & RE_CHAR_CLASSES))
+          if ((c1 == ':' && (dfa->syntax.syntax_bits & RE_CHAR_CLASSES))
               || c1 == '.' || c1 == '=')
             {
               enum { MAX_BRACKET_STRING_LEN = 32 };
@@ -1099,8 +1119,9 @@ parse_bracket_exp (void)
               size_t len = 0;
               for (;;)
                 {
-                  FETCH_WC (c, wc, _("unbalanced ["));
-                  if ((c == c1 && *lexptr == ']') || lexleft == 0)
+                  FETCH_WC (dfa, c, wc, _("unbalanced ["));
+                  if ((c == c1 && *dfa->lexstate.lexptr == ']')
+                      || dfa->lexstate.lexleft == 0)
                     break;
                   if (len < MAX_BRACKET_STRING_LEN)
                     str[len++] = c;
@@ -1111,7 +1132,7 @@ parse_bracket_exp (void)
               str[len] = '\0';
 
               /* Fetch bracket.  */
-              FETCH_WC (c, wc, _("unbalanced ["));
+              FETCH_WC (dfa, c, wc, _("unbalanced ["));
               if (c1 == ':')
                 /* Build character class.  POSIX allows character
                    classes to match multicharacter collating elements,
@@ -1119,8 +1140,9 @@ parse_bracket_exp (void)
                    worry about that possibility.  */
                 {
                   char const *class
-                    = (case_fold && (STREQ (str, "upper")
-                                     || STREQ (str, "lower")) ? "alpha" : str);
+                    = (dfa->syntax.case_fold && (STREQ (str, "upper")
+                                                 || STREQ (str, "lower")) ?
+                                                      "alpha" : str);
                   const struct dfa_ctype *pred = find_pred (class);
                   if (!pred)
                     dfaerror (_("invalid character class"));
@@ -1138,7 +1160,7 @@ parse_bracket_exp (void)
               colon_warning_state |= 8;
 
               /* Fetch new lookahead character.  */
-              FETCH_WC (c1, wc1, _("unbalanced ["));
+              FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
               continue;
             }
 
@@ -1146,21 +1168,21 @@ parse_bracket_exp (void)
              are already set up.  */
         }
 
-      if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
-        FETCH_WC (c, wc, _("unbalanced ["));
+      if (c == '\\' && (dfa->syntax.syntax_bits & 
RE_BACKSLASH_ESCAPE_IN_LISTS))
+        FETCH_WC (dfa, c, wc, _("unbalanced ["));
 
       if (c1 == NOTCHAR)
-        FETCH_WC (c1, wc1, _("unbalanced ["));
+        FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
 
       if (c1 == '-')
         /* build range characters.  */
         {
-          FETCH_WC (c2, wc2, _("unbalanced ["));
+          FETCH_WC (dfa, c2, wc2, _("unbalanced ["));
 
           /* A bracket expression like [a-[.aa.]] matches an unknown set.
              Treat it like [-a[.aa.]] while parsing it, and
              remember that the set is unknown.  */
-          if (c2 == '[' && *lexptr == '.')
+          if (c2 == '[' && *dfa->lexstate.lexptr == '.')
             {
               known_bracket_exp = false;
               c2 = ']';
@@ -1170,28 +1192,29 @@ parse_bracket_exp (void)
             {
               /* In the case [x-], the - is an ordinary hyphen,
                  which is left in c1, the lookahead character.  */
-              lexptr -= cur_mb_len;
-              lexleft += cur_mb_len;
+              dfa->lexstate.lexptr -= dfa->lexstate.cur_mb_len;
+              dfa->lexstate.lexleft += dfa->lexstate.cur_mb_len;
             }
           else
             {
-              if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
-                FETCH_WC (c2, wc2, _("unbalanced ["));
+              if (c2 == '\\' && (dfa->syntax.syntax_bits
+                                 & RE_BACKSLASH_ESCAPE_IN_LISTS))
+                FETCH_WC (dfa, c2, wc2, _("unbalanced ["));
 
               colon_warning_state |= 8;
-              FETCH_WC (c1, wc1, _("unbalanced ["));
+              FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
 
               /* Treat [x-y] as a range if x != y.  */
               if (wc != wc2 || wc == WEOF)
                 {
                   if (dfa->multibyte)
                     known_bracket_exp = false;
-                  else if (using_simple_locale ())
+                  else if (using_simple_locale (dfa))
                     {
                       int ci;
                       for (ci = c; ci <= c2; ci++)
                         setbit (ci, ccl);
-                      if (case_fold)
+                      if (dfa->syntax.case_fold)
                         {
                           int uc = toupper (c);
                           int uc2 = toupper (c2);
@@ -1215,7 +1238,7 @@ parse_bracket_exp (void)
 
       if (!dfa->multibyte)
         {
-          if (case_fold)
+          if (dfa->syntax.case_fold)
             setbit_case_fold_c (c, ccl);
           else
             setbit (c, ccl);
@@ -1228,7 +1251,7 @@ parse_bracket_exp (void)
         {
           wchar_t folded[CASE_FOLDED_BUFSIZE + 1];
           unsigned int i;
-          unsigned int n = (case_fold
+          unsigned int n = (dfa->syntax.case_fold
                             ? case_folded_counterparts (wc, folded + 1) + 1
                             : 1);
           folded[0] = wc;
@@ -1253,7 +1276,7 @@ parse_bracket_exp (void)
   if (dfa->multibyte)
     {
       work_mbc->invert = invert;
-      work_mbc->cset = emptyset (ccl) ? -1 : charclass_index (ccl);
+      work_mbc->cset = emptyset (ccl) ? -1 : dfa_charclass_index (dfa, ccl);
       return MBCSET;
     }
 
@@ -1261,29 +1284,29 @@ parse_bracket_exp (void)
     {
       assert (!dfa->multibyte);
       notset (ccl);
-      if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
+      if (dfa->syntax.syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
         clrbit ('\n', ccl);
     }
 
-  return CSET + charclass_index (ccl);
+  return CSET + dfa_charclass_index (dfa, ccl);
 }
 
 #define PUSH_LEX_STATE(s)                      \
   do                                           \
     {                                          \
-      char const *lexptr_saved = lexptr;       \
-      size_t lexleft_saved = lexleft;          \
-      lexptr = (s);                            \
-      lexleft = strlen (lexptr)
+      char const *lexptr_saved = dfa->lexstate.lexptr; \
+      size_t lexleft_saved = dfa->lexstate.lexleft;            \
+      dfa->lexstate.lexptr = (s);                              \
+      dfa->lexstate.lexleft = strlen (dfa->lexstate.lexptr)
 
 #define POP_LEX_STATE()                                \
-      lexptr = lexptr_saved;                   \
-      lexleft = lexleft_saved;                 \
+      dfa->lexstate.lexptr = lexptr_saved;                     \
+      dfa->lexstate.lexleft = lexleft_saved;                   \
     }                                          \
   while (false)
 
 static token
-lex (void)
+lex (struct dfa *dfa)
 {
   int c, c2;
   bool backslash = false;
@@ -1298,14 +1321,14 @@ lex (void)
      "if (backslash) ...".  */
   for (i = 0; i < 2; ++i)
     {
-      FETCH_WC (c, wctok, NULL);
+      FETCH_WC (dfa, c, dfa->lexstate.wctok, NULL);
 
       switch (c)
         {
         case '\\':
           if (backslash)
             goto normal_char;
-          if (lexleft == 0)
+          if (dfa->lexstate.lexleft == 0)
             dfaerror (_("unfinished \\ escape"));
           backslash = true;
           break;
@@ -1313,25 +1336,29 @@ lex (void)
         case '^':
           if (backslash)
             goto normal_char;
-          if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
-              || lasttok == END || lasttok == LPAREN || lasttok == OR)
-            return lasttok = BEGLINE;
+          if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+              || dfa->lexstate.lasttok == END || dfa->lexstate.lasttok == 
LPAREN
+              || dfa->lexstate.lasttok == OR)
+            return dfa->lexstate.lasttok = BEGLINE;
           goto normal_char;
 
         case '$':
           if (backslash)
             goto normal_char;
-          if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
-              || lexleft == 0
-              || (syntax_bits & RE_NO_BK_PARENS
-                  ? lexleft > 0 && *lexptr == ')'
-                  : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')')
-              || (syntax_bits & RE_NO_BK_VBAR
-                  ? lexleft > 0 && *lexptr == '|'
-                  : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|')
-              || ((syntax_bits & RE_NEWLINE_ALT)
-                  && lexleft > 0 && *lexptr == '\n'))
-            return lasttok = ENDLINE;
+          if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+              || dfa->lexstate.lexleft == 0
+              || (dfa->syntax.syntax_bits & RE_NO_BK_PARENS
+                  ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == ')'
+                  : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == 
'\\'
+                    && dfa->lexstate.lexptr[1] == ')')
+              || (dfa->syntax.syntax_bits & RE_NO_BK_VBAR
+                  ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == '|'
+                  : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == 
'\\'
+                    && dfa->lexstate.lexptr[1] == '|')
+              || ((dfa->syntax.syntax_bits & RE_NEWLINE_ALT)
+                  && dfa->lexstate.lexleft > 0
+                  && *dfa->lexstate.lexptr == '\n'))
+            return dfa->lexstate.lasttok = ENDLINE;
           goto normal_char;
 
         case '1':
@@ -1343,74 +1370,84 @@ lex (void)
         case '7':
         case '8':
         case '9':
-          if (backslash && !(syntax_bits & RE_NO_BK_REFS))
+          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS))
             {
-              laststart = false;
-              return lasttok = BACKREF;
+              dfa->lexstate.laststart = false;
+              return dfa->lexstate.lasttok = BACKREF;
             }
           goto normal_char;
 
         case '`':
-          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
-            return lasttok = BEGLINE; /* FIXME: should be beginning of string 
*/
+          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+            {
+              /* FIXME: should be beginning of string */
+              return dfa->lexstate.lasttok = BEGLINE;
+            }
           goto normal_char;
 
         case '\'':
-          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
-            return lasttok = ENDLINE;   /* FIXME: should be end of string */
+          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+            {
+              /* FIXME: should be end of string */
+              return dfa->lexstate.lasttok = ENDLINE;
+            }
           goto normal_char;
 
         case '<':
-          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
-            return lasttok = BEGWORD;
+          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+            return dfa->lexstate.lasttok = BEGWORD;
           goto normal_char;
 
         case '>':
-          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
-            return lasttok = ENDWORD;
+          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+            return dfa->lexstate.lasttok = ENDWORD;
           goto normal_char;
 
         case 'b':
-          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
-            return lasttok = LIMWORD;
+          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+            return dfa->lexstate.lasttok = LIMWORD;
           goto normal_char;
 
         case 'B':
-          if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
-            return lasttok = NOTLIMWORD;
+          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+            return dfa->lexstate.lasttok = NOTLIMWORD;
           goto normal_char;
 
         case '?':
-          if (syntax_bits & RE_LIMITED_OPS)
+          if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
             goto normal_char;
-          if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+          if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
             goto normal_char;
-          if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+          if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+              && dfa->lexstate.laststart)
             goto normal_char;
-          return lasttok = QMARK;
+          return dfa->lexstate.lasttok = QMARK;
 
         case '*':
           if (backslash)
             goto normal_char;
-          if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+          if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+              && dfa->lexstate.laststart)
             goto normal_char;
-          return lasttok = STAR;
+          return dfa->lexstate.lasttok = STAR;
 
         case '+':
-          if (syntax_bits & RE_LIMITED_OPS)
+          if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
             goto normal_char;
-          if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+          if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
             goto normal_char;
-          if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+          if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+              && dfa->lexstate.laststart)
             goto normal_char;
-          return lasttok = PLUS;
+          return dfa->lexstate.lasttok = PLUS;
 
         case '{':
-          if (!(syntax_bits & RE_INTERVALS))
+          if (!(dfa->syntax.syntax_bits & RE_INTERVALS))
             goto normal_char;
-          if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
+          if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0))
             goto normal_char;
-          if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+          if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+              && dfa->lexstate.laststart)
             goto normal_char;
 
           /* Cases:
@@ -1420,79 +1457,86 @@ lex (void)
              {,} - 0 to infinity (same as '*')
              {M,N} - M through N */
           {
-            char const *p = lexptr;
-            char const *lim = p + lexleft;
-            minrep = maxrep = -1;
+            char const *p = dfa->lexstate.lexptr;
+            char const *lim = p + dfa->lexstate.lexleft;
+            dfa->lexstate.minrep = dfa->lexstate.maxrep = -1;
             for (; p != lim && ISASCIIDIGIT (*p); p++)
               {
-                if (minrep < 0)
-                  minrep = *p - '0';
+                if (dfa->lexstate.minrep < 0)
+                  dfa->lexstate.minrep = *p - '0';
                 else
-                  minrep = MIN (RE_DUP_MAX + 1, minrep * 10 + *p - '0');
+                  dfa->lexstate.minrep = MIN (RE_DUP_MAX + 1,
+                                              (dfa->lexstate.minrep
+                                               * 10 + *p - '0'));
               }
             if (p != lim)
               {
                 if (*p != ',')
-                  maxrep = minrep;
+                  dfa->lexstate.maxrep = dfa->lexstate.minrep;
                 else
                   {
-                    if (minrep < 0)
-                      minrep = 0;
+                    if (dfa->lexstate.minrep < 0)
+                      dfa->lexstate.minrep = 0;
                     while (++p != lim && ISASCIIDIGIT (*p))
                       {
-                        if (maxrep < 0)
-                          maxrep = *p - '0';
+                        if (dfa->lexstate.maxrep < 0)
+                          dfa->lexstate.maxrep = *p - '0';
                         else
-                          maxrep = MIN (RE_DUP_MAX + 1, maxrep * 10 + *p - 
'0');
+                          dfa->lexstate.maxrep = MIN (RE_DUP_MAX + 1,
+                                                      (dfa->lexstate.maxrep
+                                                       * 10 + *p - '0'));
                       }
                   }
               }
             if (! ((! backslash || (p != lim && *p++ == '\\'))
                    && p != lim && *p++ == '}'
-                   && 0 <= minrep && (maxrep < 0 || minrep <= maxrep)))
+                   && 0 <= dfa->lexstate.minrep
+                   && (dfa->lexstate.maxrep < 0
+                       || dfa->lexstate.minrep <= dfa->lexstate.maxrep)))
               {
-                if (syntax_bits & RE_INVALID_INTERVAL_ORD)
+                if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD)
                   goto normal_char;
                 dfaerror (_("invalid content of \\{\\}"));
               }
-            if (RE_DUP_MAX < maxrep)
+            if (RE_DUP_MAX < dfa->lexstate.maxrep)
               dfaerror (_("regular expression too big"));
-            lexptr = p;
-            lexleft = lim - p;
+            dfa->lexstate.lexptr = p;
+            dfa->lexstate.lexleft = lim - p;
           }
-          laststart = false;
-          return lasttok = REPMN;
+          dfa->lexstate.laststart = false;
+          return dfa->lexstate.lasttok = REPMN;
 
         case '|':
-          if (syntax_bits & RE_LIMITED_OPS)
+          if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
             goto normal_char;
-          if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
+          if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0))
             goto normal_char;
-          laststart = true;
-          return lasttok = OR;
+          dfa->lexstate.laststart = true;
+          return dfa->lexstate.lasttok = OR;
 
         case '\n':
-          if (syntax_bits & RE_LIMITED_OPS
-              || backslash || !(syntax_bits & RE_NEWLINE_ALT))
+          if (dfa->syntax.syntax_bits & RE_LIMITED_OPS
+              || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT))
             goto normal_char;
-          laststart = true;
-          return lasttok = OR;
+          dfa->lexstate.laststart = true;
+          return dfa->lexstate.lasttok = OR;
 
         case '(':
-          if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+          if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
             goto normal_char;
-          ++parens;
-          laststart = true;
-          return lasttok = LPAREN;
+          ++dfa->lexstate.parens;
+          dfa->lexstate.laststart = true;
+          return dfa->lexstate.lasttok = LPAREN;
 
         case ')':
-          if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+          if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
             goto normal_char;
-          if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
+          if (dfa->lexstate.parens == 0
+              && dfa->syntax.syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
             goto normal_char;
-          --parens;
-          laststart = false;
-          return lasttok = RPAREN;
+          --dfa->lexstate.parens;
+          dfa->lexstate.laststart = false;
+          return dfa->lexstate.lasttok = RPAREN;
 
         case '.':
           if (backslash)
@@ -1501,21 +1545,21 @@ lex (void)
             {
               /* In multibyte environment period must match with a single
                  character not a byte.  So we use ANYCHAR.  */
-              laststart = false;
-              return lasttok = ANYCHAR;
+              dfa->lexstate.laststart = false;
+              return dfa->lexstate.lasttok = ANYCHAR;
             }
           zeroset (ccl);
           notset (ccl);
-          if (!(syntax_bits & RE_DOT_NEWLINE))
+          if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE))
             clrbit ('\n', ccl);
-          if (syntax_bits & RE_DOT_NOT_NULL)
+          if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
             clrbit ('\0', ccl);
-          laststart = false;
-          return lasttok = CSET + charclass_index (ccl);
+          dfa->lexstate.laststart = false;
+          return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa, ccl);
 
         case 's':
         case 'S':
-          if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+          if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
             goto normal_char;
           if (!dfa->multibyte)
             {
@@ -1525,8 +1569,9 @@ lex (void)
                   setbit (c2, ccl);
               if (c == 'S')
                 notset (ccl);
-              laststart = false;
-              return lasttok = CSET + charclass_index (ccl);
+              dfa->lexstate.laststart = false;
+              return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
+                                                                         ccl);
             }
 
           /* FIXME: see if optimizing this, as is done with ANYCHAR and
@@ -1537,16 +1582,16 @@ lex (void)
              strings, each minus its "already processed" '['.  */
           PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]");
 
-          lasttok = parse_bracket_exp ();
+          dfa->lexstate.lasttok = parse_bracket_exp (dfa);
 
           POP_LEX_STATE ();
 
-          laststart = false;
-          return lasttok;
+          dfa->lexstate.laststart = false;
+          return dfa->lexstate.lasttok;
 
         case 'w':
         case 'W':
-          if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+          if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
             goto normal_char;
 
           if (!dfa->multibyte)
@@ -1557,8 +1602,9 @@ lex (void)
                   setbit (c2, ccl);
               if (c == 'W')
                 notset (ccl);
-              laststart = false;
-              return lasttok = CSET + charclass_index (ccl);
+              dfa->lexstate.laststart = false;
+              return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
+                                                                         ccl);
             }
 
           /* FIXME: see if optimizing this, as is done with ANYCHAR and
@@ -1569,35 +1615,36 @@ lex (void)
              strings, each minus its "already processed" '['.  */
           PUSH_LEX_STATE (c == 'w' ? "_[:alnum:]]" : "^_[:alnum:]]");
 
-          lasttok = parse_bracket_exp ();
+          dfa->lexstate.lasttok = parse_bracket_exp (dfa);
 
           POP_LEX_STATE ();
 
-          laststart = false;
-          return lasttok;
+          dfa->lexstate.laststart = false;
+          return dfa->lexstate.lasttok;
 
         case '[':
           if (backslash)
             goto normal_char;
-          laststart = false;
-          return lasttok = parse_bracket_exp ();
+          dfa->lexstate.laststart = false;
+          return dfa->lexstate.lasttok = parse_bracket_exp (dfa);
 
         default:
         normal_char:
-          laststart = false;
+          dfa->lexstate.laststart = false;
           /* For multibyte character sets, folding is done in atom.  Always
              return WCHAR.  */
           if (dfa->multibyte)
-            return lasttok = WCHAR;
+            return dfa->lexstate.lasttok = WCHAR;
 
-          if (case_fold && isalpha (c))
+          if (dfa->syntax.case_fold && isalpha (c))
             {
               zeroset (ccl);
               setbit_case_fold_c (c, ccl);
-              return lasttok = CSET + charclass_index (ccl);
+              return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
+                                                                         ccl);
             }
 
-          return lasttok = c;
+          return dfa->lexstate.lasttok = c;
         }
     }
 
@@ -1607,17 +1654,8 @@ lex (void)
   return END;                   /* keeps pedantic compilers happy.  */
 }
 
-/* Recursive descent parser for regular expressions.  */
-
-static token tok;               /* Lookahead token.  */
-static size_t depth;            /* Current depth of a hypothetical stack
-                                   holding deferred productions.  This is
-                                   used to determine the depth that will be
-                                   required of the real stack later on in
-                                   dfaanalyze.  */
-
 static void
-addtok_mb (token t, int mbprop)
+addtok_mb (struct dfa *dfa, token t, int mbprop)
 {
   if (dfa->talloc == dfa->tindex)
     {
@@ -1640,7 +1678,7 @@ addtok_mb (token t, int mbprop)
 
     case CAT:
     case OR:
-      --depth;
+      --dfa->parsestate.depth;
       break;
 
     case BACKREF:
@@ -1650,19 +1688,19 @@ addtok_mb (token t, int mbprop)
       ++dfa->nleaves;
       /* fallthrough */
     case EMPTY:
-      ++depth;
+      ++dfa->parsestate.depth;
       break;
     }
-  if (depth > dfa->depth)
-    dfa->depth = depth;
+  if (dfa->parsestate.depth > dfa->depth)
+    dfa->depth = dfa->parsestate.depth;
 }
 
-static void addtok_wc (wint_t wc);
+static void addtok_wc (struct dfa *dfa, wint_t wc);
 
 /* Add the given token to the parse tree, maintaining the depth count and
    updating the maximum depth if necessary.  */
 static void
-addtok (token t)
+addtok (struct dfa *dfa, token t)
 {
   if (dfa->multibyte && t == MBCSET)
     {
@@ -1674,9 +1712,9 @@ addtok (token t)
          This does not require UTF-8.  */
       for (i = 0; i < work_mbc->nchars; i++)
         {
-          addtok_wc (work_mbc->chars[i]);
+          addtok_wc (dfa, work_mbc->chars[i]);
           if (need_or)
-            addtok (OR);
+            addtok (dfa, OR);
           need_or = true;
         }
       work_mbc->nchars = 0;
@@ -1685,14 +1723,14 @@ addtok (token t)
          that the mbcset is empty now.  Do nothing in that case.  */
       if (work_mbc->cset != -1)
         {
-          addtok (CSET + work_mbc->cset);
+          addtok (dfa, CSET + work_mbc->cset);
           if (need_or)
-            addtok (OR);
+            addtok (dfa, OR);
         }
     }
   else
     {
-      addtok_mb (t, 3);
+      addtok_mb (dfa, t, 3);
     }
 }
 
@@ -1703,7 +1741,7 @@ addtok (token t)
    <mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT>
    <mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT> */
 static void
-addtok_wc (wint_t wc)
+addtok_wc (struct dfa *dfa, wint_t wc)
 {
   unsigned char buf[MB_LEN_MAX];
   mbstate_t s = { 0 };
@@ -1711,25 +1749,25 @@ addtok_wc (wint_t wc)
   size_t stored_bytes = wcrtomb ((char *) buf, wc, &s);
 
   if (stored_bytes != (size_t) -1)
-    cur_mb_len = stored_bytes;
+    dfa->lexstate.cur_mb_len = stored_bytes;
   else
     {
       /* This is merely stop-gap.  buf[0] is undefined, yet skipping
          the addtok_mb call altogether can corrupt the heap.  */
-      cur_mb_len = 1;
+      dfa->lexstate.cur_mb_len = 1;
       buf[0] = 0;
     }
 
-  addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1);
-  for (i = 1; i < cur_mb_len; i++)
+  addtok_mb (dfa, buf[0], dfa->lexstate.cur_mb_len == 1 ? 3 : 1);
+  for (i = 1; i < dfa->lexstate.cur_mb_len; i++)
     {
-      addtok_mb (buf[i], i == cur_mb_len - 1 ? 2 : 0);
-      addtok (CAT);
+      addtok_mb (dfa, buf[i], i == dfa->lexstate.cur_mb_len - 1 ? 2 : 0);
+      addtok (dfa, CAT);
     }
 }
 
 static void
-add_utf8_anychar (void)
+add_utf8_anychar (struct dfa *dfa)
 {
   static charclass const utf8_classes[5] = {
     /* 80-bf: non-leading bytes.  */
@@ -1758,12 +1796,12 @@ add_utf8_anychar (void)
         copyset (utf8_classes[i], c);
         if (i == 1)
           {
-            if (!(syntax_bits & RE_DOT_NEWLINE))
+            if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE))
               clrbit ('\n', c);
-            if (syntax_bits & RE_DOT_NOT_NULL)
+            if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
               clrbit ('\0', c);
           }
-        dfa->utf8_anychar_classes[i] = CSET + charclass_index (c);
+        dfa->utf8_anychar_classes[i] = CSET + dfa_charclass_index (dfa, c);
       }
 
   /* A valid UTF-8 character is
@@ -1777,12 +1815,12 @@ add_utf8_anychar (void)
      and you get "B|(C|(D|EA)A)A".  And since the token buffer is in reverse
      Polish notation, you get "B C D E A CAT OR A CAT OR A CAT OR".  */
   for (i = 1; i < n; i++)
-    addtok (dfa->utf8_anychar_classes[i]);
+    addtok (dfa, dfa->utf8_anychar_classes[i]);
   while (--i > 1)
     {
-      addtok (dfa->utf8_anychar_classes[0]);
-      addtok (CAT);
-      addtok (OR);
+      addtok (dfa, dfa->utf8_anychar_classes[0]);
+      addtok (dfa, CAT);
+      addtok (dfa, OR);
     }
 }
 
@@ -1822,31 +1860,32 @@ add_utf8_anychar (void)
    The parser builds a parse tree in postfix form in an array of tokens.  */
 
 static void
-atom (void)
+atom (struct dfa *dfa)
 {
-  if (tok == WCHAR)
+  if (dfa->parsestate.tok == WCHAR)
     {
-      if (wctok == WEOF)
-        addtok (BACKREF);
+      if (dfa->lexstate.wctok == WEOF)
+        addtok (dfa, BACKREF);
       else
         {
-          addtok_wc (wctok);
+          addtok_wc (dfa, dfa->lexstate.wctok);
 
-          if (case_fold)
+          if (dfa->syntax.case_fold)
             {
               wchar_t folded[CASE_FOLDED_BUFSIZE];
-              unsigned int i, n = case_folded_counterparts (wctok, folded);
+              unsigned int i, n = case_folded_counterparts 
(dfa->lexstate.wctok,
+                                                            folded);
               for (i = 0; i < n; i++)
                 {
-                  addtok_wc (folded[i]);
-                  addtok (OR);
+                  addtok_wc (dfa, folded[i]);
+                  addtok (dfa, OR);
                 }
             }
         }
 
-      tok = lex ();
+      dfa->parsestate.tok = lex (dfa);
     }
-  else if (tok == ANYCHAR && using_utf8 ())
+  else if (dfa->parsestate.tok == ANYCHAR && using_utf8)
     {
       /* For UTF-8 expand the period to a series of CSETs that define a valid
          UTF-8 character.  This avoids using the slow multibyte path.  I'm
@@ -1855,32 +1894,35 @@ atom (void)
          it is done above in add_utf8_anychar.  So, let's start with
          UTF-8: it is the most used, and the structure of the encoding
          makes the correctness more obvious.  */
-      add_utf8_anychar ();
-      tok = lex ();
+      add_utf8_anychar (dfa);
+      dfa->parsestate.tok = lex (dfa);
     }
-  else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
-           || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
-           || tok == ANYCHAR || tok == MBCSET
-           || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
+  else if ((dfa->parsestate.tok >= 0 && dfa->parsestate.tok < NOTCHAR)
+           || dfa->parsestate.tok >= CSET || dfa->parsestate.tok == BACKREF
+           || dfa->parsestate.tok == BEGLINE || dfa->parsestate.tok == ENDLINE
+           || dfa->parsestate.tok == BEGWORD || dfa->parsestate.tok == ANYCHAR
+           || dfa->parsestate.tok == MBCSET || dfa->parsestate.tok == ENDWORD
+           || dfa->parsestate.tok == LIMWORD
+           || dfa->parsestate.tok == NOTLIMWORD)
     {
-      addtok (tok);
-      tok = lex ();
+      addtok (dfa, dfa->parsestate.tok);
+      dfa->parsestate.tok = lex (dfa);
     }
-  else if (tok == LPAREN)
+  else if (dfa->parsestate.tok == LPAREN)
     {
-      tok = lex ();
-      regexp ();
-      if (tok != RPAREN)
+      dfa->parsestate.tok = lex (dfa);
+      regexp (dfa);
+      if (dfa->parsestate.tok != RPAREN)
         dfaerror (_("unbalanced ("));
-      tok = lex ();
+      dfa->parsestate.tok = lex (dfa);
     }
   else
-    addtok (EMPTY);
+    addtok (dfa, EMPTY);
 }
 
 /* Return the number of tokens in the given subexpression.  */
 static size_t _GL_ATTRIBUTE_PURE
-nsubtoks (size_t tindex)
+nsubtoks (struct dfa const *dfa, size_t tindex)
 {
   size_t ntoks1;
 
@@ -1891,90 +1933,93 @@ nsubtoks (size_t tindex)
     case QMARK:
     case STAR:
     case PLUS:
-      return 1 + nsubtoks (tindex - 1);
+      return 1 + nsubtoks (dfa, tindex - 1);
     case CAT:
     case OR:
-      ntoks1 = nsubtoks (tindex - 1);
-      return 1 + ntoks1 + nsubtoks (tindex - 1 - ntoks1);
+      ntoks1 = nsubtoks (dfa, tindex - 1);
+      return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1);
     }
 }
 
 /* Copy the given subexpression to the top of the tree.  */
 static void
-copytoks (size_t tindex, size_t ntokens)
+copytoks (struct dfa *dfa, size_t tindex, size_t ntokens)
 {
   size_t i;
 
   if (dfa->multibyte)
     for (i = 0; i < ntokens; ++i)
-      addtok_mb (dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]);
+      addtok_mb (dfa, dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + 
i]);
   else
     for (i = 0; i < ntokens; ++i)
-      addtok_mb (dfa->tokens[tindex + i], 3);
+      addtok_mb (dfa, dfa->tokens[tindex + i], 3);
 }
 
 static void
-closure (void)
+closure (struct dfa *dfa)
 {
   int i;
   size_t tindex, ntokens;
 
-  atom ();
-  while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
-    if (tok == REPMN && (minrep || maxrep))
+  atom (dfa);
+  while (dfa->parsestate.tok == QMARK || dfa->parsestate.tok == STAR
+         || dfa->parsestate.tok == PLUS || dfa->parsestate.tok == REPMN)
+    if (dfa->parsestate.tok == REPMN
+        && (dfa->lexstate.minrep || dfa->lexstate.maxrep))
       {
-        ntokens = nsubtoks (dfa->tindex);
+        ntokens = nsubtoks (dfa, dfa->tindex);
         tindex = dfa->tindex - ntokens;
-        if (maxrep < 0)
-          addtok (PLUS);
-        if (minrep == 0)
-          addtok (QMARK);
-        for (i = 1; i < minrep; ++i)
+        if (dfa->lexstate.maxrep < 0)
+          addtok (dfa, PLUS);
+        if (dfa->lexstate.minrep == 0)
+          addtok (dfa, QMARK);
+        for (i = 1; i < dfa->lexstate.minrep; ++i)
           {
-            copytoks (tindex, ntokens);
-            addtok (CAT);
+            copytoks (dfa, tindex, ntokens);
+            addtok (dfa, CAT);
           }
-        for (; i < maxrep; ++i)
+        for (; i < dfa->lexstate.maxrep; ++i)
           {
-            copytoks (tindex, ntokens);
-            addtok (QMARK);
-            addtok (CAT);
+            copytoks (dfa, tindex, ntokens);
+            addtok (dfa, QMARK);
+            addtok (dfa, CAT);
           }
-        tok = lex ();
+        dfa->parsestate.tok = lex (dfa);
       }
-    else if (tok == REPMN)
+    else if (dfa->parsestate.tok == REPMN)
       {
-        dfa->tindex -= nsubtoks (dfa->tindex);
-        tok = lex ();
-        closure ();
+        dfa->tindex -= nsubtoks (dfa, dfa->tindex);
+        dfa->parsestate.tok = lex (dfa);
+        closure (dfa);
       }
     else
       {
-        addtok (tok);
-        tok = lex ();
+        addtok (dfa, dfa->parsestate.tok);
+        dfa->parsestate.tok = lex (dfa);
       }
 }
 
 static void
-branch (void)
+branch (struct dfa* dfa)
 {
-  closure ();
-  while (tok != RPAREN && tok != OR && tok >= 0)
+  closure (dfa);
+  while (dfa->parsestate.tok != RPAREN && dfa->parsestate.tok != OR
+         && dfa->parsestate.tok >= 0)
     {
-      closure ();
-      addtok (CAT);
+      closure (dfa);
+      addtok (dfa, CAT);
     }
 }
 
 static void
-regexp (void)
+regexp (struct dfa *dfa)
 {
-  branch ();
-  while (tok == OR)
+  branch (dfa);
+  while (dfa->parsestate.tok == OR)
     {
-      tok = lex ();
-      branch ();
-      addtok (OR);
+      dfa->parsestate.tok = lex (dfa);
+      branch (dfa);
+      addtok (dfa, OR);
     }
 }
 
@@ -1984,34 +2029,33 @@ regexp (void)
 static void
 dfaparse (char const *s, size_t len, struct dfa *d)
 {
-  dfa = d;
-  lexptr = s;
-  lexleft = len;
-  lasttok = END;
-  laststart = true;
-  parens = 0;
-  if (dfa->multibyte)
+  d->lexstate.lexptr = s;
+  d->lexstate.lexleft = len;
+  d->lexstate.lasttok = END;
+  d->lexstate.laststart = true;
+  d->lexstate.parens = 0;
+  if (d->multibyte)
     {
-      cur_mb_len = 0;
+      d->lexstate.cur_mb_len = 0;
       memset (&d->mbs, 0, sizeof d->mbs);
     }
 
-  if (!syntax_bits_set)
+  if (!d->syntax.syntax_bits_set)
     dfaerror (_("no syntax specified"));
 
-  tok = lex ();
-  depth = d->depth;
+  d->parsestate.tok = lex (d);
+  d->parsestate.depth = d->depth;
 
-  regexp ();
+  regexp (d);
 
-  if (tok != END)
+  if (d->parsestate.tok != END)
     dfaerror (_("unbalanced )"));
 
-  addtok (END - d->nregexps);
-  addtok (CAT);
+  addtok (d, END - d->nregexps);
+  addtok (d, CAT);
 
   if (d->nregexps)
-    addtok (OR);
+    addtok (d, OR);
 
   ++d->nregexps;
 }
@@ -2282,19 +2326,19 @@ epsclosure (position_set *s, struct dfa const *d, char 
*visited)
    character included in C.  */
 
 static int
-charclass_context (charclass c)
+charclass_context (struct dfa const *dfa, charclass c)
 {
   int context = 0;
   unsigned int j;
 
-  if (tstbit (eolbyte, c))
+  if (tstbit (dfa->syntax.eolbyte, c))
     context |= CTX_NEWLINE;
 
   for (j = 0; j < CHARCLASS_WORDS; ++j)
     {
-      if (c[j] & letters[j])
+      if (c[j] & dfa->syntax.letters[j])
         context |= CTX_LETTER;
-      if (c[j] & ~(letters[j] | newline[j]))
+      if (c[j] & ~(dfa->syntax.letters[j] | dfa->syntax.newline[j]))
         context |= CTX_NONE;
     }
 
@@ -2689,15 +2733,15 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
           if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
                                     d->states[s].context, CTX_NEWLINE))
             for (j = 0; j < CHARCLASS_WORDS; ++j)
-              matches[j] &= ~newline[j];
+              matches[j] &= ~d->syntax.newline[j];
           if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
                                     d->states[s].context, CTX_LETTER))
             for (j = 0; j < CHARCLASS_WORDS; ++j)
-              matches[j] &= ~letters[j];
+              matches[j] &= ~d->syntax.letters[j];
           if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
                                     d->states[s].context, CTX_NONE))
             for (j = 0; j < CHARCLASS_WORDS; ++j)
-              matches[j] &= letters[j] | newline[j];
+              matches[j] &= d->syntax.letters[j] | d->syntax.newline[j];
 
           /* If there are no characters left, there's no point in going on.  */
           for (j = 0; j < CHARCLASS_WORDS && !matches[j]; ++j)
@@ -2803,7 +2847,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
 
       for (i = 0; i < NOTCHAR; ++i)
         trans[i] = unibyte_word_constituent (i) ? state_letter : state;
-      trans[eolbyte] = state_newline;
+      trans[d->syntax.eolbyte] = state_newline;
     }
   else
     for (i = 0; i < NOTCHAR; ++i)
@@ -2859,7 +2903,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
         }
 
       /* Find out if the new state will want any context information.  */
-      possible_contexts = charclass_context (labels[i]);
+      possible_contexts = charclass_context (d, labels[i]);
       separate_contexts = state_separate_contexts (&follows);
 
       /* Find the state(s) corresponding to the union of the follows.  */
@@ -2906,7 +2950,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
             {
               int c = j * CHARCLASS_WORD_BITS + k;
 
-              if (c == eolbyte)
+              if (c == d->syntax.eolbyte)
                 trans[c] = state_newline;
               else if (unibyte_word_constituent (c))
                 trans[c] = state_letter;
@@ -3032,8 +3076,8 @@ build_state (state_num s, struct dfa *d)
 
   /* Keep the newline transition in a special place so we can use it as
      a sentinel.  */
-  d->newlines[s] = trans[eolbyte];
-  trans[eolbyte] = -1;
+  d->newlines[s] = trans[d->syntax.eolbyte];
+  trans[d->syntax.eolbyte] = -1;
 
   if (ACCEPTING (s, *d))
     d->fails[s] = trans;
@@ -3052,7 +3096,7 @@ transit_state_singlebyte (struct dfa *d, state_num s, 
unsigned char const **pp)
 {
   state_num *t;
 
-  if (**pp == eolbyte)
+  if (**pp == d->syntax.eolbyte)
     {
       /* S is always an initial state in transit_state, so the
          transition table for the state must have been built already.  */
@@ -3095,7 +3139,7 @@ transit_state (struct dfa *d, state_num s, unsigned char 
const **pp,
   size_t i, j;
 
   int mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
-  int context = wc == eolbyte ? CTX_NEWLINE : CTX_NONE;
+  int context = wc == d->syntax.eolbyte ? CTX_NEWLINE : CTX_NONE;
   bool context_newline = context == CTX_NEWLINE;
 
   /* This state has some operators which can match a multibyte character.  */
@@ -3213,7 +3257,7 @@ skip_remains_mb (struct dfa *d, unsigned char const *p,
                  unsigned char const *mbp, char const *end, wint_t *wcp)
 {
   wint_t wc = WEOF;
-  if (never_trail[*p])
+  if (d->syntax.never_trail[*p])
     return p;
   while (mbp < p)
     mbp += mbs_to_wchar (&wc, (char const *) mbp,
@@ -3251,7 +3295,7 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end, bool allow_nl,
   unsigned char const *p, *mbp; /* Current input character.  */
   state_num **trans, *t;        /* Copy of d->trans so it can be optimized
                                    into a register.  */
-  unsigned char eol = eolbyte;  /* Likewise for eolbyte.  */
+  unsigned char eol = d->syntax.eolbyte;  /* Likewise for eolbyte.  */
   unsigned char saved_end;
   size_t nlcount = 0;
 
@@ -3318,8 +3362,8 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end, bool allow_nl,
                 }
 
               if (d->states[s].mbps.nelem == 0 || (*p == eol && !allow_nl)
-                  || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE))
-                  || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL))
+                  || (*p == '\n' && !(d->syntax.syntax_bits & RE_DOT_NEWLINE))
+                  || (*p == '\0' && (d->syntax.syntax_bits & RE_DOT_NOT_NULL))
                   || (char *) p >= end)
                 {
                   /* If an input character does not match ANYCHAR, do it
@@ -3382,14 +3426,14 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end, bool allow_nl,
         }
       else if (d->fails[s])
         {
-          if (d->success[s] & sbit[*p])
+          if (d->success[s] & d->syntax.sbit[*p])
             goto done;
 
           s1 = s;
           if (!multibyte || d->states[s].mbps.nelem == 0
               || (*p == eol && !allow_nl)
-              || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE))
-              || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL))
+              || (*p == '\n' && !(d->syntax.syntax_bits & RE_DOT_NEWLINE))
+              || (*p == '\0' && (d->syntax.syntax_bits & RE_DOT_NOT_NULL))
               || (char *) p >= end)
             {
               /* If a input character does not match ANYCHAR, do it
@@ -3491,17 +3535,6 @@ free_mbdata (struct dfa *d)
     }
 }
 
-/* Initialize the components of a dfa that the other routines don't
-   initialize for themselves.  */
-static void
-dfainit (struct dfa *d)
-{
-  memset (d, 0, sizeof *d);
-  d->multibyte = MB_CUR_MAX > 1;
-  d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb;
-  d->fast = !d->multibyte;
-}
-
 /* Return true if every construct in D is supported by this DFA matcher.  */
 static bool _GL_ATTRIBUTE_PURE
 dfa_supported (struct dfa const *d)
@@ -3533,7 +3566,7 @@ dfaoptimize (struct dfa *d)
   size_t i;
   bool have_backref = false;
 
-  if (!using_utf8 ())
+  if (!using_utf8)
     return;
 
   for (i = 0; i < d->tindex; ++i)
@@ -3652,7 +3685,6 @@ dfassbuild (struct dfa *d)
 void
 dfacomp (char const *s, size_t len, struct dfa *d, bool searchflag)
 {
-  dfainit (d);
   dfaparse (s, len, d);
   dfassbuild (d);
 
@@ -3969,7 +4001,7 @@ dfamust (struct dfa const *d)
   size_t rj;
   bool need_begline = false;
   bool need_endline = false;
-  bool case_fold_unibyte = case_fold && MB_CUR_MAX == 1;
+  bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1;
   struct dfamust *dm;
 
   for (ri = 0; ri < d->tindex; ++ri)
@@ -4206,7 +4238,20 @@ dfamustfree (struct dfamust *dm)
 struct dfa *
 dfaalloc (void)
 {
-  return xmalloc (sizeof (struct dfa));
+  struct dfa *d = xcalloc (1, sizeof (struct dfa));
+  d->multibyte = MB_CUR_MAX > 1;
+  d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb;
+  d->fast = !d->multibyte;
+  d->lexstate.cur_mb_len = 1;
+  return d;
+}
+
+void
+dfa_init (void)
+{
+  check_utf8 ();
+  check_unibyte_c ();
+  init_mbrtowc_cache ();
 }
 
 /* vim:set shiftwidth=2: */
diff --git a/dfa.h b/dfa.h
index 242f4cf..02f56f4 100644
--- a/dfa.h
+++ b/dfa.h
@@ -54,10 +54,10 @@ extern struct dfamust *dfamust (struct dfa const *);
 /* Free the storage held by the components of a struct dfamust. */
 extern void dfamustfree (struct dfamust *);
 
-/* dfasyntax() takes three arguments; the first sets the syntax bits described
-   earlier in this file, the second sets the case-folding flag, and the
-   third specifies the line terminator. */
-extern void dfasyntax (reg_syntax_t, bool, unsigned char);
+/* dfasyntax() takes four arguments; the first is the dfa to operate on, the
+   second sets the syntax bits described earlier in this file, the third sets
+   the case-folding flag, and the fourth specifies the line terminator. */
+extern void dfasyntax (struct dfa *, reg_syntax_t, bool, unsigned char);
 
 /* Compile the given string of the given length into the given struct dfa.
    Final argument is a flag specifying whether to build a searching or an
@@ -104,4 +104,7 @@ extern void dfawarn (const char *);
    The user must supply a dfaerror.  */
 extern _Noreturn void dfaerror (const char *);
 
-extern bool using_utf8 (void);
+extern bool dfa_using_utf8 (void) _GL_ATTRIBUTE_PURE;
+
+/* This must be called before calling any of the above dfa*() functions. */
+extern void dfa_init (void);
diff --git a/node.c b/node.c
index 24f1493..b63017a 100644
--- a/node.c
+++ b/node.c
@@ -756,7 +756,7 @@ str2wstr(NODE *n, size_t **ptr)
                         * stopping early. This is particularly important
                         * for match() where we need to build the indices.
                         */
-                       if (using_utf8()) {
+                       if (dfa_using_utf8()) {
                                count = 1;
                                wc = 0xFFFD;    /* unicode replacement 
character */
                                goto set_wc;
diff --git a/re.c b/re.c
index 8a32530..878c884 100644
--- a/re.c
+++ b/re.c
@@ -203,14 +203,11 @@ make_regexp(const char *s, size_t len, bool ignorecase, 
bool dfa, bool canfatal)
                syn &= ~RE_ICASE;
        }
 
-       /* only call dfasyntax if we're using dfa; saves time */
-       if (dfa && ! no_dfa) {
-               dfa_syn = syn;
-               /* FIXME: dfa doesn't pay attention RE_ICASE */
-               if (ignorecase)
-                       dfa_syn |= RE_ICASE;
-               dfasyntax(dfa_syn, ignorecase, '\n');
-       }
+       dfa_syn = syn;
+       /* FIXME: dfa doesn't pay attention RE_ICASE */
+       if (ignorecase)
+               dfa_syn |= RE_ICASE;
+
        re_set_syntax(syn);
 
        if ((rerr = re_compile_pattern(buf, len, &(rp->pat))) != NULL) {
@@ -228,6 +225,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, 
bool dfa, bool canfatal)
        if (dfa && ! no_dfa) {
                rp->dfa = true;
                rp->dfareg = dfaalloc();
+               dfasyntax(rp->dfareg, dfa_syn, ignorecase, '\n');
                dfacomp(buf, len, rp->dfareg, true);
        } else
                rp->dfa = false;
@@ -423,7 +421,8 @@ resetup()
                syn |= RE_INTERVALS | RE_INVALID_INTERVAL_ORD | RE_NO_BK_BRACES;
 
        (void) re_set_syntax(syn);
-       dfasyntax(syn, false, '\n');
+
+       dfa_init();
 }
 
 /* avoid_dfa --- return true if we should not use the DFA matcher */

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog       |   14 +
 Makefile.am     |   15 +-
 Makefile.in     |   15 +-
 configure       |   20 +-
 configure.ac    |    2 +-
 dfa.c           |  859 +++++++++++++++++++++++++++++--------------------------
 dfa.h           |   13 +-
 doc/ChangeLog   |    8 +
 doc/Makefile.am |    5 +
 doc/Makefile.in |    5 +
 doc/awkcard.in  |    6 +-
 doc/gawk.info   |  538 +++++++++++++++++-----------------
 doc/gawk.texi   |   12 +-
 doc/gawktexi.in |   12 +-
 node.c          |    2 +-
 re.c            |   17 +-
 16 files changed, 814 insertions(+), 729 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]