gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5671-g1edcc78c


From: Arnold Robbins
Subject: [SCM] gawk branch, feature/minrx, updated. gawk-4.1.0-5671-g1edcc78c
Date: Sun, 28 Jul 2024 13:54:59 -0400 (EDT)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, feature/minrx has been updated
       via  1edcc78cf777df407fe2f30ca5a28ac756bb2fef (commit)
      from  b9e51f4f82e21b3afd8bf9071474103698fda7cd (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=1edcc78cf777df407fe2f30ca5a28ac756bb2fef

commit 1edcc78cf777df407fe2f30ca5a28ac756bb2fef
Author: Arnold D. Robbins <arnold@skeeve.com>
Date:   Sun Jul 28 20:54:33 2024 +0300

    Update minrx.h and minrx.cpp.

diff --git a/support/ChangeLog b/support/ChangeLog
index 415829be..77a3a541 100644
--- a/support/ChangeLog
+++ b/support/ChangeLog
@@ -1,3 +1,7 @@
+2024-07-28         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * minrx.h, minrx.cpp: Updated.
+
 2024-07-20         Arnold D. Robbins     <arnold@skeeve.com>
 
        * minrx.h, minrx.cpp: New files.
diff --git a/support/minrx.cpp b/support/minrx.cpp
index ceb4b78b..e970e7e4 100644
--- a/support/minrx.cpp
+++ b/support/minrx.cpp
@@ -246,14 +246,17 @@ struct QVec {
 
 typedef int32_t WChar;                 // because wchar_t may not be 32 bits
 constexpr int32_t WCharMax = 0x10FFFF; // maximum code point: valid for 
Unicode and (FIXME!) blithely assumed for non-Unicode
-struct WConv {
+class WConv {
+public:
        enum { End = -1 };
+private:
        const char *const bp;
        const char *const ep;
        const char *cp;
        std::mbstate_t mbs;
        WChar wch = End;
        int len = 0;
+public:
        WConv(const WConv &) = default;
        WConv(const char *bp, const char *ep): bp(bp), ep(ep), cp(bp) { 
std::memset(&mbs, 0, sizeof mbs); }
        auto look() const { return wch; }
@@ -272,6 +275,8 @@ struct WConv {
                }
                return *this;
        }
+       std::size_t off() const { return cp - bp; }
+       auto ptr() const { return cp; }
        auto save() { return std::make_tuple(cp, wch, len); }
        void restore(std::tuple<const char *, WChar, int> t) { std::tie(cp, 
wch, len) = t; }
 };
@@ -650,21 +655,20 @@ struct Compile {
                                                                return {{}, 0, 
MINRX_REG_ECOLLATE};
                                                } else if (wc == L':') {
                                                        wconv.nextchr();
-                                                       // FIXME: search for 
the matching :] using wconv.nextchr() rather than memchr()
-                                                       const char *colon = 
(const char *) std::memchr(wconv.cp, ':', wconv.ep - wconv.cp);
-                                                       if (colon && wconv.ep - 
colon >= 2 && colon[1] == ']') {
-                                                               auto cclname = 
std::string(wconv.cp, colon);
-                                                               if (cclass(cs, 
cclname)) {
-                                                                       
wconv.cp = colon + 2;
-                                                                       
wconv.len = 0;
-                                                                       wc = 
wconv.nextchr().look();
-                                                                       
continue; // can't be range endpoint
-                                                               } else {
-                                                                       return 
{{}, 0, MINRX_REG_ECTYPE};
-                                                               }
-                                                       } else {
+                                                       auto bp = wconv.ptr();
+                                                       while (wconv.look() != 
WConv::End && wconv.look() != L':')
+                                                               wconv.nextchr();
+                                                       if (wconv.look() != 
L':')
                                                                return {{}, 0, 
MINRX_REG_ECTYPE};
-                                                       }
+                                                       auto ep = wconv.ptr();
+                                                       wconv.nextchr();
+                                                       if (wconv.look() != 
L']')
+                                                               return {{}, 0, 
MINRX_REG_ECTYPE};
+                                                       wc = 
wconv.nextchr().look();
+                                                       auto cclname = 
std::string(bp, ep);
+                                                       if (cclass(cs, cclname))
+                                                               continue;
+                                                       return {{}, 0, 
MINRX_REG_ECTYPE};
                                                } else if (wc == L'=') {
                                                        // FIXME: recognize 
some equivalence classes.
                                                        return {{}, 0, 
MINRX_REG_ECOLLATE};
@@ -864,12 +868,12 @@ std::mutex Compile::cclmutex;
 struct Execute {
        typedef COWVec<std::size_t, (std::size_t) -1> Vec;
        struct NState {
-               const char *bp = nullptr;
+               std::size_t boff;
                Vec substack;
                NState() {}
                NState(Vec::Allocator &allocator): substack(allocator) {}
                bool cmpgt(const NState &ns, std::size_t nstk) const {
-                       return bp != ns.bp ? bp < ns.bp : 
substack.cmpgt(ns.substack, nstk);
+                       return boff != ns.boff ? boff < ns.boff : 
substack.cmpgt(ns.substack, nstk);
                }
        };
        const Regexp &r;
@@ -898,13 +902,13 @@ struct Execute {
                do {
                        NInt k = epsq.remove();
                        NState &ns = epsv.lookup(k);
-                       if (best.has_value() && (std::size_t) (ns.bp - 
wconv.bp) > best->get(r.nstk + 0))
+                       if (best.has_value() && ns.boff > best->get(r.nstk + 0))
                                continue;
                        const auto &n = nodes[k];
                        switch (n.type) {
                        case Node::Exit:
                                {
-                                       std::size_t b = ns.bp - wconv.bp, e = 
wconv.cp - wconv.bp;
+                                       auto b = ns.boff, e = wconv.off();
                                        if (!best.has_value()
                                            || b < best->get(r.nstk + 0)
                                            || (b == best->get(r.nstk + 0) && e 
>= best->get(r.nstk + 1)))
@@ -940,9 +944,9 @@ struct Execute {
                        case Node::Loop:
                                {
                                        NState nscopy = ns;
-                                       nscopy.substack.put(n.nstk, wconv.cp - 
wconv.bp);
+                                       nscopy.substack.put(n.nstk, 
wconv.off());
                                        nscopy.substack.put(n.nstk + 1, -1);
-                                       nscopy.substack.put(n.nstk + 2, 
wconv.cp - wconv.bp);
+                                       nscopy.substack.put(n.nstk + 2, 
wconv.off());
                                        add(ncsv, k + 1, nscopy);
                                        if (n.args[1]) {
                                                nscopy.substack.put(n.nstk + 1, 
0);
@@ -953,10 +957,10 @@ struct Execute {
                        case Node::Next:
                                {
                                        add(ncsv, k + 1, ns);
-                                       if (n.args[1] && (std::size_t) 
(wconv.cp - wconv.bp) > ns.substack.get(n.nstk - 1)) {
+                                       if (n.args[1] && wconv.off() > 
ns.substack.get(n.nstk - 1)) {
                                                NState nscopy = ns;
                                                nscopy.substack.sub(n.nstk - 2, 
1);
-                                               nscopy.substack.put(n.nstk - 1, 
wconv.cp - wconv.bp);
+                                               nscopy.substack.put(n.nstk - 1, 
wconv.off());
                                                add(ncsv, k - n.args[0], 
nscopy);
                                        }
                                }
@@ -964,7 +968,7 @@ struct Execute {
                        case Node::SubL:
                                {
                                        NState nscopy = ns;
-                                       nscopy.substack.put(n.nstk, wconv.cp - 
wconv.bp);
+                                       nscopy.substack.put(n.nstk, 
wconv.off());
                                        if (n.args[0] != (NInt) -1)
                                                for (auto i = n.args[0]; i <= 
n.args[1]; ++i) {
                                                        
nscopy.substack.put(r.nstk + i * 2, -1);
@@ -977,46 +981,46 @@ struct Execute {
                                if (n.args[0] != (NInt) -1) {
                                        NState nscopy = ns;
                                        nscopy.substack.put(r.nstk + n.args[0] 
* 2 + 0, ns.substack.get(n.nstk - 1));
-                                       nscopy.substack.put(r.nstk + n.args[0] 
* 2 + 1, wconv.cp - wconv.bp);
+                                       nscopy.substack.put(r.nstk + n.args[0] 
* 2 + 1, wconv.off());
                                        add(ncsv, k + 1, nscopy);
                                } else {
                                        add(ncsv, k + 1, ns);
                                }
                                break;
                        case Node::ZBOB:
-                               if (wconv.cp == wconv.bp && (flags & 
MINRX_REG_NOTBOL) == 0)
+                               if (wconv.off() == 0 && (flags & 
MINRX_REG_NOTBOL) == 0)
                                        add(ncsv, k + 1, ns);
                                break;
                        case Node::ZEOB:
-                               if (wconv.cp == wconv.ep && (flags & 
MINRX_REG_NOTEOL) == 0)
+                               if (wconv.look() == WConv::End && (flags & 
MINRX_REG_NOTEOL) == 0)
                                        add(ncsv, k + 1, ns);
                                break;
                        case Node::ZBOL:
-                               if (((wconv.cp == wconv.bp && (flags & 
MINRX_REG_NOTBOL) == 0)) || lookback == L'\n')
+                               if (((wconv.off() == 0 && (flags & 
MINRX_REG_NOTBOL) == 0)) || lookback == L'\n')
                                        add(ncsv, k + 1, ns);
                                break;
                        case Node::ZEOL:
-                               if (((wconv.cp == wconv.ep && (flags & 
MINRX_REG_NOTEOL) == 0)) || wconv.look() == L'\n')
+                               if (((wconv.look() == WConv::End && (flags & 
MINRX_REG_NOTEOL) == 0)) || wconv.look() == L'\n')
                                        add(ncsv, k + 1, ns);
                                break;
                        case Node::ZBOW:
-                               if ((wconv.cp == wconv.bp || 
!is_word(lookback)) && (wconv.cp != wconv.ep && is_word(wconv.look())))
+                               if ((wconv.off() == 0 || !is_word(lookback)) && 
(wconv.look() != WConv::End && is_word(wconv.look())))
                                        add(ncsv, k + 1, ns);
                                break;
                        case Node::ZEOW:
-                               if ((wconv.cp != wconv.bp && is_word(lookback)) 
&& (wconv.cp == wconv.ep || !is_word(wconv.look())))
+                               if ((wconv.off() != 0 && is_word(lookback)) && 
(wconv.look() == WConv::End || !is_word(wconv.look())))
                                        add(ncsv, k + 1, ns);
                                break;
                        case Node::ZXOW:
-                               if (   ((wconv.cp == wconv.bp || 
!is_word(lookback)) && (wconv.cp != wconv.ep && is_word(wconv.look())))
-                                   || ((wconv.cp != wconv.bp && 
is_word(lookback)) && (wconv.cp == wconv.ep || !is_word(wconv.look()))))
+                               if (   ((wconv.off() == 0 || 
!is_word(lookback)) && (wconv.look() != WConv::End && is_word(wconv.look())))
+                                   || ((wconv.off() != 0 && is_word(lookback)) 
&& (wconv.look() == WConv::End || !is_word(wconv.look()))))
                                        add(ncsv, k + 1, ns);
                                break;
                        case Node::ZNWB:
-                               if (   (wconv.cp == wconv.bp && wconv.cp == 
wconv.ep)
-                                   || (wconv.cp == wconv.bp && wconv.cp != 
wconv.ep && !is_word(wconv.look()))
-                                   || (wconv.cp != wconv.bp && wconv.cp == 
wconv.ep && !is_word(lookback))
-                                   || (wconv.cp != wconv.bp && wconv.cp != 
wconv.ep && is_word(lookback) == is_word(wconv.look())))
+                               if (   (wconv.off() == 0 && wconv.look() == 
WConv::End)
+                                   || (wconv.off() == 0 && wconv.look() != 
WConv::End && !is_word(wconv.look()))
+                                   || (wconv.off() != 0 && !is_word(lookback) 
&& wconv.look() == WConv::End)
+                                   || (wconv.off() != 0 && wconv.look() != 
WConv::End && is_word(lookback) == is_word(wconv.look())))
                                        add(ncsv, k + 1, ns);
                                break;
                        default:
@@ -1030,10 +1034,10 @@ struct Execute {
                auto nodes = &r.nodes[0];
                wconv.nextchr();
                if ((flags & MINRX_REG_RESUME) != 0 && rm && rm[0].rm_eo > 0)
-                       while (wconv.cp != wconv.ep && wconv.cp - wconv.bp != 
rm[0].rm_eo)
+                       while (wconv.look() != WConv::End && wconv.off() < 
rm[0].rm_eo)
                                lookback = wconv.look(), wconv.nextchr();
                NState nsinit(allocator);
-               nsinit.bp = wconv.cp;
+               nsinit.boff = wconv.off();
                add(mcsvs[0], 0, nsinit);
                if (!epsq.empty())
                        epsclosure(mcsvs[0]);
@@ -1056,7 +1060,7 @@ struct Execute {
                        }
                        wconv.nextchr(), lookback = wc, wc = wconv.look();
                        if (!best.has_value()) {
-                               nsinit.bp = wconv.cp;
+                               nsinit.boff = wconv.off();
                                add(mcsvs[1], 0, nsinit);
                        }
                        if (!epsq.empty())
@@ -1080,7 +1084,7 @@ struct Execute {
                        }
                        wconv.nextchr(), lookback = wc, wc = wconv.look();
                        if (!best.has_value()) {
-                               nsinit.bp = wconv.cp;
+                               nsinit.boff = wconv.off();
                                add(mcsvs[0], 0, nsinit);
                        }
                        if (!epsq.empty())
@@ -1128,7 +1132,7 @@ minrx_regncomp(minrx_regex_t *rx, std::size_t ns, const 
char *s, int flags)
 {
        auto r = MinRX::Compile(s, s + ns, (minrx_regcomp_flags_t) 
flags).compile();
        rx->re_regexp = r;
-       rx->re_nsub = r->nsub;
+       rx->re_nsub = r->nsub - 1;
        rx->re_compflags = (minrx_regcomp_flags_t) flags;
        return r->err;
 }
diff --git a/support/minrx.h b/support/minrx.h
index ecae246e..ddb955aa 100644
--- a/support/minrx.h
+++ b/support/minrx.h
@@ -39,7 +39,7 @@ typedef enum {                                /* Flags for 
minrx_reg*comp() */
        MINRX_REG_BRACE_COMPAT = 16,    /* { begins interval expression only 
when followed by digit */
        MINRX_REG_BRACK_ESCAPE = 32,    /* bracket expressions [...] allow 
backslash escapes */
        MINRX_REG_EXTENSIONS_BSD = 64,  /* enable BSD extensions \< and \> */
-       MINRX_REG_EXTENSIONS_GNU = 128  /* enable GNU extensions \b \B \s \S \w 
\W \y */
+       MINRX_REG_EXTENSIONS_GNU = 128  /* enable GNU extensions \b \B \s \S \w 
\W */
 } minrx_regcomp_flags_t;
 
 typedef enum {                         /* Flags for minrx_reg*exec() */

-----------------------------------------------------------------------

Summary of changes:
 support/ChangeLog |  4 +++
 support/minrx.cpp | 88 +++++++++++++++++++++++++++++--------------------------
 support/minrx.h   |  2 +-
 3 files changed, 51 insertions(+), 43 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]