gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] gawk branch, gawk-5.2-stable, updated. gawk-4.1.0-5076-gc85749da


From: Arnold Robbins
Subject: [SCM] gawk branch, gawk-5.2-stable, updated. gawk-4.1.0-5076-gc85749da
Date: Fri, 1 Sep 2023 19:28:55 -0400 (EDT)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, gawk-5.2-stable has been updated
       via  c85749daba596ba2b827bcea239db74fc5321665 (commit)
      from  2874b94d73ff766b0f41ff7f259e7c8a0dab458c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=c85749daba596ba2b827bcea239db74fc5321665

commit c85749daba596ba2b827bcea239db74fc5321665
Author: Arnold D. Robbins <arnold@skeeve.com>
Date:   Fri Sep 1 16:28:03 2023 -0700

    Fix for match with multibyte chars and new tests.

diff --git a/ChangeLog b/ChangeLog
index 15a32135..bbdc869b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2023-09-01         Miguel Pineiro Jr     <mpj@pineiro.cc>
+
+       Fix the handling of zero-length matches in multibyte locales.
+       Thanks to Ed Morton <mortoneccc@comcast.net> for the report.
+
+       * builtin.c (do_match): Translate rstart (byte idx to char idx)
+       even when rlength is zero. For this we tweak the conversion of
+       rlength to keep it in bounds when rstart and rlength are both 0.
+       * node.c (str2wstr): Add an entry to the indices array for the
+       terminating null. It facilitates the tweak above and is needed
+       to translate the idx of a zero-width match at the end of the
+       string.
+
 2023-07-09         Arnold D. Robbins     <arnold@skeeve.com>
 
        * re.c (make_regexp): In error message, use the original text
diff --git a/builtin.c b/builtin.c
index e394cc34..2bc0aaa3 100644
--- a/builtin.c
+++ b/builtin.c
@@ -2791,9 +2791,9 @@ do_match(int nargs)
                size_t *wc_indices = NULL;
 
                rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);        
/* byte length */
-               if (rlength > 0 && gawk_mb_cur_max > 1) {
+               if (gawk_mb_cur_max > 1) {
                        t1 = str2wstr(t1, & wc_indices);
-                       rlength = wc_indices[rstart + rlength - 1] - 
wc_indices[rstart] + 1;
+                       rlength = wc_indices[rstart + rlength] - 
wc_indices[rstart];
                        rstart = wc_indices[rstart];
                }
 
@@ -2816,9 +2816,9 @@ do_match(int nargs)
                                        start = t1->stptr + s;
                                        subpat_start = s;
                                        subpat_len = len = SUBPATEND(rp, 
t1->stptr, ii) - s;
-                                       if (len > 0 && gawk_mb_cur_max > 1) {
+                                       if (gawk_mb_cur_max > 1) {
                                                subpat_start = wc_indices[s];
-                                               subpat_len = wc_indices[s + len 
- 1] - subpat_start + 1;
+                                               subpat_len = wc_indices[s + 
len] - subpat_start;
                                        }
 
                                        it = make_string(start, len);
diff --git a/node.c b/node.c
index 6c9a7306..fa120b10 100644
--- a/node.c
+++ b/node.c
@@ -757,7 +757,7 @@ str2wstr(NODE *n, size_t **ptr)
         * Create the array.
         */
        if (ptr != NULL) {
-               ezalloc(*ptr, size_t *, sizeof(size_t) * n->stlen, "str2wstr");
+               ezalloc(*ptr, size_t *, sizeof(size_t) * (n->stlen + 1), 
"str2wstr");
        }
 
        sp = n->stptr;
@@ -829,6 +829,11 @@ str2wstr(NODE *n, size_t **ptr)
                }
        }
 
+       /* Needed for zero-length matches at the end of a string */
+       assert(sp - n->stptr == n->stlen);
+       if (ptr != NULL)
+               (*ptr)[sp - n->stptr] = i;
+
        *wsp = L'\0';
        n->wstlen = wsp - n->wstptr;
        n->flags |= WSTRCUR;
diff --git a/pc/ChangeLog b/pc/ChangeLog
index 6f5620f3..2eea265d 100644
--- a/pc/ChangeLog
+++ b/pc/ChangeLog
@@ -1,3 +1,7 @@
+2023-09-01         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * Makefile.tst: Regenerated.
+
 2023-07-09         Arnold D. Robbins     <arnold@skeeve.com>
 
        * Makefile.tst: Regenerated.
diff --git a/pc/Makefile.tst b/pc/Makefile.tst
index c21c5fe1..d1d7c856 100644
--- a/pc/Makefile.tst
+++ b/pc/Makefile.tst
@@ -234,7 +234,7 @@ LOCALE_CHARSET_TESTS = \
        asort asorti backbigs1 backsmalls1 backsmalls2 \
        fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \
        mbprintf1 mbprintf2 mbprintf3 mbprintf4 mbprintf5 \
-       mtchi18n nlstringtest rebt8b2 rtlenmb sort1 sprintfc
+       mtchi18n mtchi18n2 nlstringtest rebt8b2 rtlenmb sort1 sprintfc
 
 SHLIB_TESTS = \
        apiterm \
@@ -311,7 +311,8 @@ NEED_LOCALE_C = \
 
 NEED_LOCALE_EN = \
        backbigs1 backsmalls1 backsmalls2 commas concat4 dfamb1 ignrcas2 
lc_num1 \
-       mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 posix_compare \
+       mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 \
+       mtchi18n2 posix_compare \
        printhuge reint2 rri1 subamp subi18n wideidx wideidx2 \
        widesub widesub2 widesub3 widesub4
 
@@ -3700,6 +3701,12 @@ mtchi18n:
        AWKPATH="$(srcdir)" $(AWK) -f $@.awk  < "$(srcdir)"/$@.in >_$@ 2>&1 || 
echo EXIT CODE: $$? >>_$@
        @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
 
+mtchi18n2:
+       @echo $@
+       @-[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=ENU_USA.1252; export GAWKLOCALE; \
+       AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: $$? 
>>_$@
+       @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
 rebt8b2:
        @echo $@ $(ZOS_FAIL)
        @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: 
$$? >>_$@
diff --git a/test/ChangeLog b/test/ChangeLog
index 9608f5de..7ed0832b 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,9 @@
+2023-09-01         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * Makefile.am (EXTRA_DIST): New test, mtchi18n2.
+       * mtchi18n2.sh, mtchi18n2.ok: New files.
+       Thanks to Miguel Pineiro Jr <mpj@pineiro.cc> for the tests.
+
 2023-07-09         Arnold D. Robbins     <arnold@skeeve.com>
 
        * Makefile.am (EXTRA_DIST): New test, regexpbad.
diff --git a/test/Makefile.am b/test/Makefile.am
index 36b302eb..fe37d58f 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -820,6 +820,8 @@ EXTRA_DIST = \
        mtchi18n.awk \
        mtchi18n.in \
        mtchi18n.ok \
+       mtchi18n2.awk \
+       mtchi18n2.ok \
        nasty.awk \
        nasty.ok \
        nasty2.awk \
@@ -1560,7 +1562,7 @@ LOCALE_CHARSET_TESTS = \
        asort asorti backbigs1 backsmalls1 backsmalls2 \
        fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \
        mbprintf1 mbprintf2 mbprintf3 mbprintf4 mbprintf5 \
-       mtchi18n nlstringtest rebt8b2 rtlenmb sort1 sprintfc
+       mtchi18n mtchi18n2 nlstringtest rebt8b2 rtlenmb sort1 sprintfc
 
 SHLIB_TESTS = \
        apiterm \
@@ -1632,7 +1634,8 @@ NEED_LOCALE_C = \
 
 NEED_LOCALE_EN = \
        backbigs1 backsmalls1 backsmalls2 commas concat4 dfamb1 ignrcas2 
lc_num1 \
-       mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 posix_compare \
+       mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 \
+       mtchi18n2 posix_compare \
        printhuge reint2 rri1 subamp subi18n wideidx wideidx2 \
        widesub widesub2 widesub3 widesub4
 
diff --git a/test/Makefile.in b/test/Makefile.in
index a5981131..7fe4e7a1 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -1088,6 +1088,8 @@ EXTRA_DIST = \
        mtchi18n.awk \
        mtchi18n.in \
        mtchi18n.ok \
+       mtchi18n2.awk \
+       mtchi18n2.ok \
        nasty.awk \
        nasty.ok \
        nasty2.awk \
@@ -1824,7 +1826,7 @@ LOCALE_CHARSET_TESTS = \
        asort asorti backbigs1 backsmalls1 backsmalls2 \
        fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \
        mbprintf1 mbprintf2 mbprintf3 mbprintf4 mbprintf5 \
-       mtchi18n nlstringtest rebt8b2 rtlenmb sort1 sprintfc
+       mtchi18n mtchi18n2 nlstringtest rebt8b2 rtlenmb sort1 sprintfc
 
 SHLIB_TESTS = \
        apiterm \
@@ -1901,7 +1903,8 @@ NEED_LOCALE_C = \
 
 NEED_LOCALE_EN = \
        backbigs1 backsmalls1 backsmalls2 commas concat4 dfamb1 ignrcas2 
lc_num1 \
-       mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 posix_compare \
+       mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 \
+       mtchi18n2 posix_compare \
        printhuge reint2 rri1 subamp subi18n wideidx wideidx2 \
        widesub widesub2 widesub3 widesub4
 
@@ -5463,6 +5466,12 @@ mtchi18n:
        AWKPATH="$(srcdir)" $(AWK) -f $@.awk  < "$(srcdir)"/$@.in >_$@ 2>&1 || 
echo EXIT CODE: $$? >>_$@
        @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
 
+mtchi18n2:
+       @echo $@
+       @-[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; export GAWKLOCALE; \
+       AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: $$? 
>>_$@
+       @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
 rebt8b2:
        @echo $@ $(ZOS_FAIL)
        @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: 
$$? >>_$@
diff --git a/test/Maketests b/test/Maketests
index f3d4c494..8284e165 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -2376,6 +2376,12 @@ mtchi18n:
        AWKPATH="$(srcdir)" $(AWK) -f $@.awk  < "$(srcdir)"/$@.in >_$@ 2>&1 || 
echo EXIT CODE: $$? >>_$@
        @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
 
+mtchi18n2:
+       @echo $@
+       @-[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; export GAWKLOCALE; \
+       AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: $$? 
>>_$@
+       @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
 rebt8b2:
        @echo $@ $(ZOS_FAIL)
        @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: 
$$? >>_$@
diff --git a/test/mtchi18n2.awk b/test/mtchi18n2.awk
new file mode 100755
index 00000000..70433862
--- /dev/null
+++ b/test/mtchi18n2.awk
@@ -0,0 +1,14 @@
+BEGIN {
+       match("\342\200\257", /^/, m)
+       print RSTART, RLENGTH
+
+       #match("\342\200\257", /^(a?)\u202F(b?)$/, m)
+       match("\342\200\257", /^(a?)\342\200\257(b?)$/, m)
+       print RSTART, RLENGTH, m[1,"start"], m[1,"length"], m[2, "start"], m[2, 
"length"]
+
+       match("\342\200\257", /$/, m)
+       print RSTART, RLENGTH
+
+       match("\342\200\257ac", /a(b?)c/, m)
+       print RSTART, RLENGTH, m[1,"start"], m[1,"length"]
+}
diff --git a/test/mtchi18n2.ok b/test/mtchi18n2.ok
new file mode 100755
index 00000000..10648642
--- /dev/null
+++ b/test/mtchi18n2.ok
@@ -0,0 +1,4 @@
+1 0
+1 1 1 0 2 0
+2 0
+2 2 3 0

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog          | 13 +++++++++++++
 builtin.c          |  8 ++++----
 node.c             |  7 ++++++-
 pc/ChangeLog       |  4 ++++
 pc/Makefile.tst    | 11 +++++++++--
 test/ChangeLog     |  6 ++++++
 test/Makefile.am   |  7 +++++--
 test/Makefile.in   | 13 +++++++++++--
 test/Maketests     |  6 ++++++
 test/mtchi18n2.awk | 14 ++++++++++++++
 test/mtchi18n2.ok  |  4 ++++
 11 files changed, 82 insertions(+), 11 deletions(-)
 create mode 100755 test/mtchi18n2.awk
 create mode 100755 test/mtchi18n2.ok


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]