[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[SCM] gawk branch, gawk-5.1-stable, updated. gawk-4.1.0-4365-gff3c50b
From: |
Arnold Robbins |
Subject: |
[SCM] gawk branch, gawk-5.1-stable, updated. gawk-4.1.0-4365-gff3c50b |
Date: |
Fri, 10 Dec 2021 05:20:52 -0500 (EST) |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".
The branch, gawk-5.1-stable has been updated
via ff3c50b94116b5a2a819883a4e9e0ba9ef83fdf7 (commit)
via 6e872ca782a6fa480aedbef8205bba6b7ccaf307 (commit)
from cfa3ac1c6a0fcbe794df853f25c757e4acd5fcc0 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=ff3c50b94116b5a2a819883a4e9e0ba9ef83fdf7
commit ff3c50b94116b5a2a819883a4e9e0ba9ef83fdf7
Author: Arnold D. Robbins <arnold@skeeve.com>
Date: Fri Dec 10 12:20:42 2021 +0200
Update texinfo.tex.
diff --git a/build-aux/ChangeLog b/build-aux/ChangeLog
index 67d6035..e6a2c41 100644
--- a/build-aux/ChangeLog
+++ b/build-aux/ChangeLog
@@ -1,3 +1,7 @@
+2021-12-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * texinfo.tex: Updated from GNULIB.
+
2021-10-27 Arnold D. Robbins <arnold@skeeve.com>
* 5.1.1: Release tar ball made.
diff --git a/build-aux/texinfo.tex b/build-aux/texinfo.tex
index e48383d..6e19429 100644
--- a/build-aux/texinfo.tex
+++ b/build-aux/texinfo.tex
@@ -3,7 +3,7 @@
% Load plain if necessary, i.e., if running under initex.
\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi
%
-\def\texinfoversion{2021-04-25.21}
+\def\texinfoversion{2021-11-01.16}
%
% Copyright 1985, 1986, 1988, 1990-2021 Free Software Foundation, Inc.
%
@@ -3614,6 +3614,9 @@ $$%
\def\quotedblbase{{\ecfont \char"12}}
\def\quotesinglbase{{\ecfont \char"0D}}
%
+\def\L{{\ecfont \char"8A}} % L with stroke
+\def\l{{\ecfont \char"AA}} % l with stroke
+%
% This positioning is not perfect (see the ogonek LaTeX package), but
% we have the precomposed glyphs for the most common cases. We put the
% tests to use those glyphs in the single \ogonek macro so we have fewer
@@ -7592,6 +7595,7 @@ might help (with 'rm \jobname.?? \jobname.??s')%
%
\def\printdefunline#1#2{%
\begingroup
+ \plainfrenchspacing
% call \deffnheader:
#1#2 \endheader
% common ending:
@@ -9402,7 +9406,7 @@ might help (with 'rm \jobname.?? \jobname.??s')%
\fi\fi
%
\ifimagevmode
- \nobreak\medskip
+ \medskip
% Usually we'll have text after the image which will insert
% \parskip glue, so insert it here too to equalize the space
% above and below.
@@ -11599,11 +11603,9 @@ directory should work if nowhere else does.}
@setregularquotes
@c Local variables:
-@c eval: (add-hook 'before-save-hook 'time-stamp)
+@c eval: (add-hook 'before-save-hook 'time-stamp nil t)
+@c time-stamp-pattern: "texinfoversion{%Y-%02m-%02d.%02H}"
@c page-delimiter: "^\\\\message\\|emacs-page"
-@c time-stamp-start: "def\\\\texinfoversion{"
-@c time-stamp-format: "%:y-%02m-%02d.%02H"
-@c time-stamp-end: "}"
@c End:
@c vim:sw=2:
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 21ae9ba..28a596c 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2021-12-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * texinfo.tex: Updated from GNULIB.
+
2021-11-25 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Add missing @item for AWKgo. Thanks to Antonio
diff --git a/doc/texinfo.tex b/doc/texinfo.tex
index e48383d..6e19429 100644
--- a/doc/texinfo.tex
+++ b/doc/texinfo.tex
@@ -3,7 +3,7 @@
% Load plain if necessary, i.e., if running under initex.
\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi
%
-\def\texinfoversion{2021-04-25.21}
+\def\texinfoversion{2021-11-01.16}
%
% Copyright 1985, 1986, 1988, 1990-2021 Free Software Foundation, Inc.
%
@@ -3614,6 +3614,9 @@ $$%
\def\quotedblbase{{\ecfont \char"12}}
\def\quotesinglbase{{\ecfont \char"0D}}
%
+\def\L{{\ecfont \char"8A}} % L with stroke
+\def\l{{\ecfont \char"AA}} % l with stroke
+%
% This positioning is not perfect (see the ogonek LaTeX package), but
% we have the precomposed glyphs for the most common cases. We put the
% tests to use those glyphs in the single \ogonek macro so we have fewer
@@ -7592,6 +7595,7 @@ might help (with 'rm \jobname.?? \jobname.??s')%
%
\def\printdefunline#1#2{%
\begingroup
+ \plainfrenchspacing
% call \deffnheader:
#1#2 \endheader
% common ending:
@@ -9402,7 +9406,7 @@ might help (with 'rm \jobname.?? \jobname.??s')%
\fi\fi
%
\ifimagevmode
- \nobreak\medskip
+ \medskip
% Usually we'll have text after the image which will insert
% \parskip glue, so insert it here too to equalize the space
% above and below.
@@ -11599,11 +11603,9 @@ directory should work if nowhere else does.}
@setregularquotes
@c Local variables:
-@c eval: (add-hook 'before-save-hook 'time-stamp)
+@c eval: (add-hook 'before-save-hook 'time-stamp nil t)
+@c time-stamp-pattern: "texinfoversion{%Y-%02m-%02d.%02H}"
@c page-delimiter: "^\\\\message\\|emacs-page"
-@c time-stamp-start: "def\\\\texinfoversion{"
-@c time-stamp-format: "%:y-%02m-%02d.%02H"
-@c time-stamp-end: "}"
@c End:
@c vim:sw=2:
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=6e872ca782a6fa480aedbef8205bba6b7ccaf307
commit 6e872ca782a6fa480aedbef8205bba6b7ccaf307
Author: Arnold D. Robbins <arnold@skeeve.com>
Date: Fri Dec 10 12:19:30 2021 +0200
Update support files from GNULIB.
diff --git a/support/ChangeLog b/support/ChangeLog
index 22e2d69..3655593 100644
--- a/support/ChangeLog
+++ b/support/ChangeLog
@@ -1,3 +1,8 @@
+2021-12-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * intprops.h, regcomp.c, regex_internal.c, regex_internal.h,
+ regexec.c: Sync with GNULIB.
+
2021-10-27 Arnold D. Robbins <arnold@skeeve.com>
* 5.1.1: Release tar ball made.
diff --git a/support/intprops.h b/support/intprops.h
index 3fe64e8..7f20f09 100644
--- a/support/intprops.h
+++ b/support/intprops.h
@@ -229,18 +229,18 @@
/* True if __builtin_add_overflow (A, B, P) and __builtin_sub_overflow
(A, B, P) work when P is non-null. */
+#if defined __has_builtin
+# define _GL_HAS_BUILTIN_ADD_OVERFLOW __has_builtin (__builtin_add_overflow)
/* __builtin_{add,sub}_overflow exists but is not reliable in GCC 5.x and 6.x,
see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98269>. */
-#if 7 <= __GNUC__ && !defined __ICC
+#elif 7 <= __GNUC__ && !defined __EDG__
# define _GL_HAS_BUILTIN_ADD_OVERFLOW 1
-#elif defined __has_builtin
-# define _GL_HAS_BUILTIN_ADD_OVERFLOW __has_builtin (__builtin_add_overflow)
#else
# define _GL_HAS_BUILTIN_ADD_OVERFLOW 0
#endif
/* True if __builtin_mul_overflow (A, B, P) works when P is non-null. */
-#ifdef __clang__
+#if defined __clang_major_ && __clang_major__ < 14
/* Work around Clang bug <https://bugs.llvm.org/show_bug.cgi?id=16404>. */
# define _GL_HAS_BUILTIN_MUL_OVERFLOW 0
#else
@@ -249,9 +249,8 @@
/* True if __builtin_add_overflow_p (A, B, C) works, and similarly for
__builtin_sub_overflow_p and __builtin_mul_overflow_p. */
-#if defined __clang__ || defined __ICC
-/* Clang 11 lacks __builtin_mul_overflow_p, and even if it did it
- would presumably run afoul of Clang bug 16404. ICC 2021.1's
+#ifdef __EDG__
+/* In EDG-based compilers like ICC 2021.3 and earlier,
__builtin_add_overflow_p etc. are not treated as integral constant
expressions even when all arguments are. */
# define _GL_HAS_BUILTIN_OVERFLOW_P 0
@@ -400,7 +399,7 @@
#if _GL_HAS_BUILTIN_MUL_OVERFLOW
# if ((9 < __GNUC__ + (3 <= __GNUC_MINOR__) \
|| (__GNUC__ == 8 && 4 <= __GNUC_MINOR__)) \
- && !defined __ICC)
+ && !defined __EDG__)
# define INT_MULTIPLY_WRAPV(a, b, r) __builtin_mul_overflow (a, b, r)
# else
/* Work around GCC bug 91450. */
diff --git a/support/regcomp.c b/support/regcomp.c
index 887e5b5..6a97fde 100644
--- a/support/regcomp.c
+++ b/support/regcomp.c
@@ -27,14 +27,10 @@ static void re_compile_fastmap_iter (regex_t *bufp,
const re_dfastate_t *init_state,
char *fastmap);
static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
-#ifdef RE_ENABLE_I18N
static void free_charset (re_charset_t *cset);
-#endif /* RE_ENABLE_I18N */
static void free_workarea_compile (regex_t *preg);
static reg_errcode_t create_initial_state (re_dfa_t *dfa);
-#ifdef RE_ENABLE_I18N
static void optimize_utf8 (re_dfa_t *dfa);
-#endif
static reg_errcode_t analyze (regex_t *preg);
static reg_errcode_t preorder (bin_tree_t *root,
reg_errcode_t (fn (void *, bin_tree_t *)),
@@ -89,7 +85,6 @@ static reg_errcode_t parse_bracket_element (bracket_elem_t
*elem,
static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
re_string_t *regexp,
re_token_t *token);
-#ifdef RE_ENABLE_I18N
static reg_errcode_t build_equiv_class (bitset_t sbcset,
re_charset_t *mbcset,
Idx *equiv_class_alloc,
@@ -100,14 +95,6 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE
trans,
Idx *char_class_alloc,
const char *class_name,
reg_syntax_t syntax);
-#else /* not RE_ENABLE_I18N */
-static reg_errcode_t build_equiv_class (bitset_t sbcset,
- const unsigned char *name);
-static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
- bitset_t sbcset,
- const char *class_name,
- reg_syntax_t syntax);
-#endif /* not RE_ENABLE_I18N */
static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
RE_TRANSLATE_TYPE trans,
const char *class_name,
@@ -279,8 +266,7 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
}
weak_alias (__re_compile_fastmap, re_compile_fastmap)
-static inline void
-__attribute__ ((always_inline))
+static __always_inline void
re_set_fastmap (char *fastmap, bool icase, int ch)
{
fastmap[ch] = 1;
@@ -306,7 +292,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t
*init_state,
if (type == CHARACTER)
{
re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
-#ifdef RE_ENABLE_I18N
if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
{
unsigned char buf[MB_LEN_MAX];
@@ -327,7 +312,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t
*init_state,
!= (size_t) -1))
re_set_fastmap (fastmap, false, buf[0]);
}
-#endif
}
else if (type == SIMPLE_BRACKET)
{
@@ -341,13 +325,12 @@ re_compile_fastmap_iter (regex_t *bufp, const
re_dfastate_t *init_state,
re_set_fastmap (fastmap, icase, ch);
}
}
-#ifdef RE_ENABLE_I18N
else if (type == COMPLEX_BRACKET)
{
re_charset_t *cset = dfa->nodes[node].opr.mbcset;
Idx i;
-# ifdef _LIBC
+#ifdef _LIBC
/* See if we have to try all bytes which start multiple collation
elements.
e.g. In da_DK, we want to catch 'a' since "aa" is a valid
@@ -363,7 +346,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t
*init_state,
if (table[i] < 0)
re_set_fastmap (fastmap, icase, i);
}
-# endif /* _LIBC */
+#endif /* _LIBC */
/* See if we have to start the match at all multibyte characters,
i.e. where we would not find an invalid sequence. This only
@@ -371,9 +354,9 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t
*init_state,
sets, the SIMPLE_BRACKET again suffices. */
if (dfa->mb_cur_max > 1
&& (cset->nchar_classes || cset->non_match || cset->nranges
-# ifdef _LIBC
+#ifdef _LIBC
|| cset->nequiv_classes
-# endif /* _LIBC */
+#endif /* _LIBC */
))
{
unsigned char c = 0;
@@ -406,12 +389,7 @@ re_compile_fastmap_iter (regex_t *bufp, const
re_dfastate_t *init_state,
}
}
}
-#endif /* RE_ENABLE_I18N */
- else if (type == OP_PERIOD
-#ifdef RE_ENABLE_I18N
- || type == OP_UTF8_PERIOD
-#endif /* RE_ENABLE_I18N */
- || type == END_OF_RE)
+ else if (type == OP_PERIOD || type == OP_UTF8_PERIOD || type ==
END_OF_RE)
{
memset (fastmap, '\1', sizeof (char) * SBC_MAX);
if (type == END_OF_RE)
@@ -550,7 +528,6 @@ regerror (int errcode, const regex_t *__restrict preg, char
*__restrict errbuf,
weak_alias (__regerror, regerror)
-#ifdef RE_ENABLE_I18N
/* This static array is used for the map to single-byte characters when
UTF-8 is used. Otherwise we would allocate memory just to initialize
it the same all the time. UTF-8 is the preferred encoding so this is
@@ -558,25 +535,24 @@ weak_alias (__regerror, regerror)
static const bitset_t utf8_sb_map =
{
/* Set the first 128 bits. */
-# if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__
+#if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__
[0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
-# else
-# if 4 * BITSET_WORD_BITS < ASCII_CHARS
-# error "bitset_word_t is narrower than 32 bits"
-# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
+#else
+# if 4 * BITSET_WORD_BITS < ASCII_CHARS
+# error "bitset_word_t is narrower than 32 bits"
+# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
-# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
+# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
BITSET_WORD_MAX, BITSET_WORD_MAX,
-# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
+# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
BITSET_WORD_MAX,
-# endif
+# endif
(BITSET_WORD_MAX
>> (SBC_MAX % BITSET_WORD_BITS == 0
? 0
: BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
-# endif
-};
#endif
+};
static void
@@ -614,10 +590,8 @@ free_dfa_content (re_dfa_t *dfa)
re_free (entry->array);
}
re_free (dfa->state_table);
-#ifdef RE_ENABLE_I18N
if (dfa->sb_char != utf8_sb_map)
re_free (dfa->sb_char);
-#endif
re_free (dfa->subexp_map);
#ifdef DEBUG
re_free (dfa->re_str);
@@ -796,11 +770,9 @@ re_compile_internal (regex_t *preg, const char * pattern,
size_t length,
if (__glibc_unlikely (err != REG_NOERROR))
goto re_compile_internal_free_return;
-#ifdef RE_ENABLE_I18N
/* If possible, do searching in single byte encoding to speed things up. */
if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
optimize_utf8 (dfa);
-#endif
/* Then create the initial state of the dfa. */
err = create_initial_state (dfa);
@@ -830,11 +802,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
#ifndef _LIBC
const char *codeset_name;
#endif
-#ifdef RE_ENABLE_I18N
size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
-#else
- size_t max_i18n_object_size = 0;
-#endif
size_t max_object_size =
MAX (sizeof (struct re_state_table_entry),
MAX (sizeof (re_token_t),
@@ -886,7 +854,6 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
dfa->map_notascii = 0;
#endif
-#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
{
if (dfa->is_utf8)
@@ -906,14 +873,13 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
wint_t wch = __btowc (ch);
if (wch != WEOF)
dfa->sb_char[i] |= (bitset_word_t) 1 << j;
-# ifndef _LIBC
+#ifndef _LIBC
if (isascii (ch) && wch != ch)
dfa->map_notascii = 1;
-# endif
+#endif
}
}
}
-#endif
if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL))
return REG_ESPACE;
@@ -933,8 +899,6 @@ init_word_char (re_dfa_t *dfa)
dfa->word_ops_used = 1;
if (__glibc_likely (dfa->map_notascii == 0))
{
- /* Avoid uint32_t and uint64_t as some non-GCC platforms lack
- them, an issue when this code is used in Gnulib. */
bitset_word_t bits0 = 0x00000000;
bitset_word_t bits1 = 0x03ff0000;
bitset_word_t bits2 = 0x87fffffe;
@@ -1074,7 +1038,6 @@ create_initial_state (re_dfa_t *dfa)
return REG_NOERROR;
}
-#ifdef RE_ENABLE_I18N
/* If it is possible to do searching in single byte encoding instead of UTF-8
to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
DFA nodes where needed. */
@@ -1154,7 +1117,6 @@ optimize_utf8 (re_dfa_t *dfa)
dfa->is_utf8 = 0;
dfa->has_mb_node = dfa->nbackref > 0 || has_period;
}
-#endif
/* Analyze the structure tree, and calculate "first", "next", "edest",
"eclosure", and "inveclosure". */
@@ -1792,7 +1754,6 @@ peek_token (re_token_t *token, re_string_t *input,
reg_syntax_t syntax)
token->opr.c = c;
token->word_char = 0;
-#ifdef RE_ENABLE_I18N
token->mb_partial = 0;
if (input->mb_cur_max > 1
&& !re_string_first_byte (input, re_string_cur_idx (input)))
@@ -1801,7 +1762,6 @@ peek_token (re_token_t *token, re_string_t *input,
reg_syntax_t syntax)
token->mb_partial = 1;
return 1;
}
-#endif
if (c == '\\')
{
unsigned char c2;
@@ -1814,7 +1774,6 @@ peek_token (re_token_t *token, re_string_t *input,
reg_syntax_t syntax)
c2 = re_string_peek_byte_case (input, 1);
token->opr.c = c2;
token->type = CHARACTER;
-#ifdef RE_ENABLE_I18N
if (input->mb_cur_max > 1)
{
wint_t wc = re_string_wchar_at (input,
@@ -1822,7 +1781,6 @@ peek_token (re_token_t *token, re_string_t *input,
reg_syntax_t syntax)
token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
}
else
-#endif
token->word_char = IS_WORD_CHAR (c2) != 0;
switch (c2)
@@ -1928,14 +1886,12 @@ peek_token (re_token_t *token, re_string_t *input,
reg_syntax_t syntax)
}
token->type = CHARACTER;
-#ifdef RE_ENABLE_I18N
if (input->mb_cur_max > 1)
{
wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
}
else
-#endif
token->word_char = IS_WORD_CHAR (token->opr.c);
switch (c)
@@ -2027,14 +1983,12 @@ peek_token_bracket (re_token_t *token, re_string_t
*input, reg_syntax_t syntax)
c = re_string_peek_byte (input, 0);
token->opr.c = c;
-#ifdef RE_ENABLE_I18N
if (input->mb_cur_max > 1
&& !re_string_first_byte (input, re_string_cur_idx (input)))
{
token->type = CHARACTER;
return 1;
}
-#endif /* RE_ENABLE_I18N */
if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
&& re_string_cur_idx (input) + 1 < re_string_length (input))
@@ -2256,7 +2210,6 @@ parse_expression (re_string_t *regexp, regex_t *preg,
re_token_t *token,
*err = REG_ESPACE;
return NULL;
}
-#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
{
while (!re_string_eoi (regexp)
@@ -2273,7 +2226,6 @@ parse_expression (re_string_t *regexp, regex_t *preg,
re_token_t *token,
}
}
}
-#endif
break;
case OP_OPEN_SUBEXP:
@@ -2666,40 +2618,30 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp,
re_dfa_t *dfa,
#ifndef _LIBC
-# ifdef RE_ENABLE_I18N
/* Convert the byte B to the corresponding wide character. In a
unibyte locale, treat B as itself. In a multibyte locale, return
WEOF if B is an encoding error. */
static wint_t
-parse_byte (unsigned char b, re_charset_t *mbcset)
+parse_byte (unsigned char b, re_dfa_t const *dfa)
{
- return mbcset == NULL ? b : __btowc (b);
+ return dfa->mb_cur_max > 1 ? __btowc (b) : b;
}
-# endif
- /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
- Build the range expression which starts from START_ELEM, and ends
- at END_ELEM. The result are written to MBCSET and SBCSET.
- RANGE_ALLOC is the allocated size of mbcset->range_starts, and
- mbcset->range_ends, is a pointer argument since we may
- update it. */
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument since we may
+ update it. */
static reg_errcode_t
-# ifdef RE_ENABLE_I18N
-build_range_exp (const reg_syntax_t syntax,
- bitset_t sbcset,
- re_charset_t *mbcset,
- Idx *range_alloc,
- const bracket_elem_t *start_elem,
- const bracket_elem_t *end_elem)
-# else /* not RE_ENABLE_I18N */
-build_range_exp (const reg_syntax_t syntax,
- bitset_t sbcset,
- const bracket_elem_t *start_elem,
- const bracket_elem_t *end_elem)
-# endif /* not RE_ENABLE_I18N */
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc,
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem,
+ re_dfa_t *dfa, reg_syntax_t syntax, uint_fast32_t nrules,
+ const unsigned char *collseqmb, const char *collseqwc,
+ int_fast32_t table_size, const void *symb_table,
+ const unsigned char *extra)
{
- unsigned int start_ch, end_ch;
/* Equivalence Classes and Character Classes can't be a range start/end. */
if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
|| start_elem->type == CHAR_CLASS
@@ -2715,110 +2657,88 @@ build_range_exp (const reg_syntax_t syntax,
&& strlen ((char *) end_elem->opr.name) > 1)))
return REG_ECOLLATE;
-# ifdef RE_ENABLE_I18N
- {
- wchar_t wc;
- wint_t start_wc;
- wint_t end_wc;
-
+ unsigned int
start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
- : 0));
+ : 0)),
end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
: 0));
+ wint_t
start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
- ? parse_byte (start_ch, mbcset) : start_elem->opr.wch);
+ ? parse_byte (start_ch, dfa) : start_elem->opr.wch),
end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
- ? parse_byte (end_ch, mbcset) : end_elem->opr.wch);
- if (start_wc == WEOF || end_wc == WEOF)
- return REG_ECOLLATE;
- else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
- && start_wc > end_wc))
- return REG_ERANGE;
-
- /* Got valid collation sequence values, add them as a new entry.
- However, for !_LIBC we have no collation elements: if the
- character set is single byte, the single byte character set
- that we build below suffices. parse_bracket_exp passes
- no MBCSET if dfa->mb_cur_max == 1. */
- if (mbcset)
- {
- /* Check the space of the arrays. */
- if (__glibc_unlikely (*range_alloc == mbcset->nranges))
- {
- /* There is not enough space, need realloc. */
- wchar_t *new_array_start, *new_array_end;
- Idx new_nranges;
-
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
- /* Use realloc since mbcset->range_starts and mbcset->range_ends
- are NULL if *range_alloc == 0. */
- new_array_start = re_realloc (mbcset->range_starts, wchar_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, wchar_t,
- new_nranges);
+ ? parse_byte (end_ch, dfa) : end_elem->opr.wch);
- if (__glibc_unlikely (new_array_start == NULL
- || new_array_end == NULL))
- {
- re_free (new_array_start);
- re_free (new_array_end);
- return REG_ESPACE;
- }
+ if (start_wc == WEOF || end_wc == WEOF)
+ return REG_ECOLLATE;
+ else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
+ && start_wc > end_wc))
+ return REG_ERANGE;
- mbcset->range_starts = new_array_start;
- mbcset->range_ends = new_array_end;
- *range_alloc = new_nranges;
- }
+ /* Got valid collation sequence values, add them as a new entry.
+ However, for !_LIBC we have no collation elements: if the
+ character set is single byte, the single byte character set
+ that we build below suffices. parse_bracket_exp passes
+ no MBCSET if dfa->mb_cur_max == 1. */
+ if (dfa->mb_cur_max > 1)
+ {
+ /* Check the space of the arrays. */
+ if (__glibc_unlikely (*range_alloc == mbcset->nranges))
+ {
+ /* There is not enough space, need realloc. */
+ wchar_t *new_array_start, *new_array_end;
+ Idx new_nranges;
- mbcset->range_starts[mbcset->nranges] = start_wc;
- mbcset->range_ends[mbcset->nranges++] = end_wc;
- }
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ /* Use realloc since mbcset->range_starts and mbcset->range_ends
+ are NULL if *range_alloc == 0. */
+ new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+ new_nranges);
+
+ if (__glibc_unlikely (new_array_start == NULL
+ || new_array_end == NULL))
+ {
+ re_free (new_array_start);
+ re_free (new_array_end);
+ return REG_ESPACE;
+ }
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_wc;
+ mbcset->range_ends[mbcset->nranges++] = end_wc;
+ }
+
+ /* Build the table for single byte characters. */
+ for (wchar_t wc = 0; wc < SBC_MAX; ++wc)
+ {
+ if (start_wc <= wc && wc <= end_wc)
+ bitset_set (sbcset, wc);
+ }
- /* Build the table for single byte characters. */
- for (wc = 0; wc < SBC_MAX; ++wc)
- {
- if (start_wc <= wc && wc <= end_wc)
- bitset_set (sbcset, wc);
- }
- }
-# else /* not RE_ENABLE_I18N */
- {
- unsigned int ch;
- start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
- : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
- : 0));
- end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
- : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
- : 0));
- if (start_ch > end_ch)
- return REG_ERANGE;
- /* Build the table for single byte characters. */
- for (ch = 0; ch < SBC_MAX; ++ch)
- if (start_ch <= ch && ch <= end_ch)
- bitset_set (sbcset, ch);
- }
-# endif /* not RE_ENABLE_I18N */
return REG_NOERROR;
}
#endif /* not _LIBC */
#ifndef _LIBC
-/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.
Build the collating element which is represented by NAME.
The result are written to MBCSET and SBCSET.
COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
pointer argument since we may update it. */
static reg_errcode_t
-# ifdef RE_ENABLE_I18N
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
- Idx *coll_sym_alloc, const unsigned char *name)
-# else /* not RE_ENABLE_I18N */
-build_collating_symbol (bitset_t sbcset, const unsigned char *name)
-# endif /* not RE_ENABLE_I18N */
+ Idx *coll_sym_alloc, const unsigned char *name,
+ uint_fast32_t nrules, int_fast32_t table_size,
+ const void *symb_table, const unsigned char *extra)
{
size_t name_len = strlen ((const char *) name);
if (__glibc_unlikely (name_len != 1))
@@ -2831,271 +2751,280 @@ build_collating_symbol (bitset_t sbcset, const
unsigned char *name)
}
#endif /* not _LIBC */
-/* This function parse bracket expression like "[abc]", "[a-c]",
- "[[.a-a.]]" etc. */
-
-static bin_tree_t *
-parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
- reg_syntax_t syntax, reg_errcode_t *err)
-{
#ifdef _LIBC
- const unsigned char *collseqmb;
- const char *collseqwc;
- uint32_t nrules;
- int32_t table_size;
- const int32_t *symb_table;
- const unsigned char *extra;
-
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Seek the collating symbol entry corresponding to NAME.
- Return the index of the symbol in the SYMB_TABLE,
- or -1 if not found. */
-
- auto inline int32_t
- __attribute__ ((always_inline))
- seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
- {
- int32_t elem;
-
- for (elem = 0; elem < table_size; elem++)
- if (symb_table[2 * elem] != 0)
- {
- int32_t idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- if (/* Compare the length of the name. */
- name_len == extra[idx]
- /* Compare the name. */
- && memcmp (name, &extra[idx + 1], name_len) == 0)
- /* Yep, this is the entry. */
- return elem;
- }
- return -1;
- }
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Seek the collating symbol entry corresponding to NAME.
+ Return the index of the symbol in the SYMB_TABLE,
+ or -1 if not found. */
+
+static __always_inline int32_t
+seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
+ const int32_t *symb_table,
+ int_fast32_t table_size,
+ const unsigned char *extra)
+{
+ int_fast32_t elem;
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Look up the collation sequence value of BR_ELEM.
- Return the value if succeeded, UINT_MAX otherwise. */
+ for (elem = 0; elem < table_size; elem++)
+ if (symb_table[2 * elem] != 0)
+ {
+ int32_t idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ if (/* Compare the length of the name. */
+ name_len == extra[idx]
+ /* Compare the name. */
+ && memcmp (name, &extra[idx + 1], name_len) == 0)
+ /* Yep, this is the entry. */
+ return elem;
+ }
+ return -1;
+}
- auto inline unsigned int
- __attribute__ ((always_inline))
- lookup_collation_sequence_value (bracket_elem_t *br_elem)
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Look up the collation sequence value of BR_ELEM.
+ Return the value if succeeded, UINT_MAX otherwise. */
+
+static __always_inline unsigned int
+lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
+ const unsigned char *collseqmb,
+ const char *collseqwc,
+ int_fast32_t table_size,
+ const int32_t *symb_table,
+ const unsigned char *extra)
+{
+ if (br_elem->type == SB_CHAR)
{
- if (br_elem->type == SB_CHAR)
- {
- /*
- if (MB_CUR_MAX == 1)
- */
- if (nrules == 0)
- return collseqmb[br_elem->opr.ch];
- else
- {
- wint_t wc = __btowc (br_elem->opr.ch);
- return __collseq_table_lookup (collseqwc, wc);
- }
- }
- else if (br_elem->type == MB_CHAR)
+ /* if (MB_CUR_MAX == 1) */
+ if (nrules == 0)
+ return collseqmb[br_elem->opr.ch];
+ else
{
- if (nrules != 0)
- return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ wint_t wc = __btowc (br_elem->opr.ch);
+ return __collseq_table_lookup (collseqwc, wc);
}
- else if (br_elem->type == COLL_SYM)
+ }
+ else if (br_elem->type == MB_CHAR)
+ {
+ if (nrules != 0)
+ return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ }
+ else if (br_elem->type == COLL_SYM)
+ {
+ size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+ if (nrules != 0)
{
- size_t sym_name_len = strlen ((char *) br_elem->opr.name);
- if (nrules != 0)
+ int32_t elem, idx;
+ elem = seek_collating_symbol_entry (br_elem->opr.name,
+ sym_name_len,
+ symb_table, table_size,
+ extra);
+ if (elem != -1)
{
- int32_t elem, idx;
- elem = seek_collating_symbol_entry (br_elem->opr.name,
- sym_name_len);
- if (elem != -1)
- {
- /* We found the entry. */
- idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- /* Skip the byte sequence of the collating element. */
- idx += 1 + extra[idx];
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~3;
- /* Skip the multibyte collation sequence value. */
- idx += sizeof (unsigned int);
- /* Skip the wide char sequence of the collating element. */
- idx += sizeof (unsigned int) *
- (1 + *(unsigned int *) (extra + idx));
- /* Return the collation sequence value. */
- return *(unsigned int *) (extra + idx);
- }
- else if (sym_name_len == 1)
- {
- /* No valid character. Match it as a single byte
- character. */
- return collseqmb[br_elem->opr.name[0]];
- }
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ /* Skip the byte sequence of the collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the multibyte collation sequence value. */
+ idx += sizeof (unsigned int);
+ /* Skip the wide char sequence of the collating element. */
+ idx += sizeof (unsigned int) *
+ (1 + *(unsigned int *) (extra + idx));
+ /* Return the collation sequence value. */
+ return *(unsigned int *) (extra + idx);
}
else if (sym_name_len == 1)
- return collseqmb[br_elem->opr.name[0]];
+ {
+ /* No valid character. Match it as a single byte
+ character. */
+ return collseqmb[br_elem->opr.name[0]];
+ }
}
- return UINT_MAX;
+ else if (sym_name_len == 1)
+ return collseqmb[br_elem->opr.name[0]];
}
+ return UINT_MAX;
+}
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Build the range expression which starts from START_ELEM, and ends
- at END_ELEM. The result are written to MBCSET and SBCSET.
- RANGE_ALLOC is the allocated size of mbcset->range_starts, and
- mbcset->range_ends, is a pointer argument since we may
- update it. */
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument since we may
+ update it. */
+
+static __always_inline reg_errcode_t
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc,
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem,
+ re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
+ const unsigned char *collseqmb, const char *collseqwc,
+ int_fast32_t table_size, const int32_t *symb_table,
+ const unsigned char *extra)
+{
+ unsigned int ch;
+ uint32_t start_collseq;
+ uint32_t end_collseq;
- auto inline reg_errcode_t
- __attribute__ ((always_inline))
- build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
- bracket_elem_t *start_elem, bracket_elem_t *end_elem)
- {
- unsigned int ch;
- uint32_t start_collseq;
- uint32_t end_collseq;
-
- /* Equivalence Classes and Character Classes can't be a range
- start/end. */
- if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
- || start_elem->type == CHAR_CLASS
- || end_elem->type == EQUIV_CLASS
- || end_elem->type == CHAR_CLASS))
- return REG_ERANGE;
+ /* Equivalence Classes and Character Classes can't be a range
+ start/end. */
+ if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
+ || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS
+ || end_elem->type == CHAR_CLASS))
+ return REG_ERANGE;
- /* FIXME: Implement rational ranges here, too. */
- start_collseq = lookup_collation_sequence_value (start_elem);
- end_collseq = lookup_collation_sequence_value (end_elem);
- /* Check start/end collation sequence values. */
- if (__glibc_unlikely (start_collseq == UINT_MAX
- || end_collseq == UINT_MAX))
- return REG_ECOLLATE;
- if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
- && start_collseq > end_collseq))
- return REG_ERANGE;
+ /* FIXME: Implement rational ranges here, too. */
+ start_collseq = lookup_collation_sequence_value (start_elem, nrules,
collseqmb, collseqwc,
+ table_size, symb_table,
extra);
+ end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb,
collseqwc,
+ table_size, symb_table, extra);
+ /* Check start/end collation sequence values. */
+ if (__glibc_unlikely (start_collseq == UINT_MAX
+ || end_collseq == UINT_MAX))
+ return REG_ECOLLATE;
+ if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
+ && start_collseq > end_collseq))
+ return REG_ERANGE;
- /* Got valid collation sequence values, add them as a new entry.
- However, if we have no collation elements, and the character set
- is single byte, the single byte character set that we
- build below suffices. */
- if (nrules > 0 || dfa->mb_cur_max > 1)
+ /* Got valid collation sequence values, add them as a new entry.
+ However, if we have no collation elements, and the character set
+ is single byte, the single byte character set that we
+ build below suffices. */
+ if (nrules > 0 || dfa->mb_cur_max > 1)
+ {
+ /* Check the space of the arrays. */
+ if (__glibc_unlikely (*range_alloc == mbcset->nranges))
{
- /* Check the space of the arrays. */
- if (__glibc_unlikely (*range_alloc == mbcset->nranges))
- {
- /* There is not enough space, need realloc. */
- uint32_t *new_array_start;
- uint32_t *new_array_end;
- Idx new_nranges;
-
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
- new_array_start = re_realloc (mbcset->range_starts, uint32_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, uint32_t,
- new_nranges);
-
- if (__glibc_unlikely (new_array_start == NULL
- || new_array_end == NULL))
- return REG_ESPACE;
+ /* There is not enough space, need realloc. */
+ uint32_t *new_array_start;
+ uint32_t *new_array_end;
+ int new_nranges;
- mbcset->range_starts = new_array_start;
- mbcset->range_ends = new_array_end;
- *range_alloc = new_nranges;
- }
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
- mbcset->range_starts[mbcset->nranges] = start_collseq;
- mbcset->range_ends[mbcset->nranges++] = end_collseq;
- }
+ if (__glibc_unlikely (new_array_start == NULL
+ || new_array_end == NULL))
+ return REG_ESPACE;
- /* Build the table for single byte characters. */
- for (ch = 0; ch < SBC_MAX; ch++)
- {
- uint32_t ch_collseq;
- /*
- if (MB_CUR_MAX == 1)
- */
- if (nrules == 0)
- ch_collseq = collseqmb[ch];
- else
- ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
- if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
- bitset_set (sbcset, ch);
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
}
- return REG_NOERROR;
+
+ mbcset->range_starts[mbcset->nranges] = start_collseq;
+ mbcset->range_ends[mbcset->nranges++] = end_collseq;
}
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Build the collating element which is represented by NAME.
- The result are written to MBCSET and SBCSET.
- COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
- pointer argument since we may update it. */
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ch++)
+ {
+ uint32_t ch_collseq;
+ /* if (MB_CUR_MAX == 1) */
+ if (nrules == 0)
+ ch_collseq = collseqmb[ch];
+ else
+ ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+ if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+ bitset_set (sbcset, ch);
+ }
+ return REG_NOERROR;
+}
- auto inline reg_errcode_t
- __attribute__ ((always_inline))
- build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
- Idx *coll_sym_alloc, const unsigned char *name)
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument since we may update it. */
+
+static __always_inline reg_errcode_t
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+ Idx *coll_sym_alloc, const unsigned char *name,
+ uint_fast32_t nrules, int_fast32_t table_size,
+ const int32_t *symb_table, const unsigned char *extra)
+{
+ int32_t elem, idx;
+ size_t name_len = strlen ((const char *) name);
+ if (nrules != 0)
{
- int32_t elem, idx;
- size_t name_len = strlen ((const char *) name);
- if (nrules != 0)
+ elem = seek_collating_symbol_entry (name, name_len, symb_table,
+ table_size, extra);
+ if (elem != -1)
{
- elem = seek_collating_symbol_entry (name, name_len);
- if (elem != -1)
- {
- /* We found the entry. */
- idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- }
- else if (name_len == 1)
- {
- /* No valid character, treat it as a normal
- character. */
- bitset_set (sbcset, name[0]);
- return REG_NOERROR;
- }
- else
- return REG_ECOLLATE;
-
- /* Got valid collation sequence, add it as a new entry. */
- /* Check the space of the arrays. */
- if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
- {
- /* Not enough, realloc it. */
- /* +1 in case of mbcset->ncoll_syms is 0. */
- Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
- /* Use realloc since mbcset->coll_syms is NULL
- if *alloc == 0. */
- int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
- new_coll_sym_alloc);
- if (__glibc_unlikely (new_coll_syms == NULL))
- return REG_ESPACE;
- mbcset->coll_syms = new_coll_syms;
- *coll_sym_alloc = new_coll_sym_alloc;
- }
- mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ }
+ else if (name_len == 1)
+ {
+ /* No valid character, treat it as a normal
+ character. */
+ bitset_set (sbcset, name[0]);
return REG_NOERROR;
}
else
+ return REG_ECOLLATE;
+
+ /* Got valid collation sequence, add it as a new entry. */
+ /* Check the space of the arrays. */
+ if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
{
- if (__glibc_unlikely (name_len != 1))
- return REG_ECOLLATE;
- else
- {
- bitset_set (sbcset, name[0]);
- return REG_NOERROR;
- }
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->ncoll_syms is 0. */
+ int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ /* Use realloc since mbcset->coll_syms is NULL
+ if *alloc == 0. */
+ int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+ new_coll_sym_alloc);
+ if (__glibc_unlikely (new_coll_syms == NULL))
+ return REG_ESPACE;
+ mbcset->coll_syms = new_coll_syms;
+ *coll_sym_alloc = new_coll_sym_alloc;
}
+ mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+ return REG_NOERROR;
}
-#endif
+ else
+ {
+ if (__glibc_unlikely (name_len != 1))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ }
+}
+#endif /* _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+ "[[.a-a.]]" etc. */
+
+static bin_tree_t *
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err)
+{
+ const unsigned char *collseqmb = NULL;
+ const char *collseqwc = NULL;
+ uint_fast32_t nrules = 0;
+ int_fast32_t table_size = 0;
+ const void *symb_table = NULL;
+ const unsigned char *extra = NULL;
re_token_t br_token;
re_bitset_ptr_t sbcset;
-#ifdef RE_ENABLE_I18N
re_charset_t *mbcset;
Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
Idx equiv_class_alloc = 0, char_class_alloc = 0;
-#endif /* not RE_ENABLE_I18N */
bool non_match = false;
bin_tree_t *work_tree;
int token_len;
@@ -3111,26 +3040,17 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
re_token_t *token,
*/
collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
- symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
- _NL_COLLATE_SYMB_TABLEMB);
+ symb_table = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_TABLEMB);
extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
_NL_COLLATE_SYMB_EXTRAMB);
}
#endif
sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
-#ifdef RE_ENABLE_I18N
mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
-#endif /* RE_ENABLE_I18N */
-#ifdef RE_ENABLE_I18N
if (__glibc_unlikely (sbcset == NULL || mbcset == NULL))
-#else
- if (__glibc_unlikely (sbcset == NULL))
-#endif /* RE_ENABLE_I18N */
{
re_free (sbcset);
-#ifdef RE_ENABLE_I18N
re_free (mbcset);
-#endif
*err = REG_ESPACE;
return NULL;
}
@@ -3143,9 +3063,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
re_token_t *token,
}
if (token->type == OP_NON_MATCH_LIST)
{
-#ifdef RE_ENABLE_I18N
mbcset->non_match = 1;
-#endif /* not RE_ENABLE_I18N */
non_match = true;
if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
bitset_set (sbcset, '\n');
@@ -3228,18 +3146,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
re_token_t *token,
token_len = peek_token_bracket (token, regexp, syntax);
-#ifdef _LIBC
*err = build_range_exp (sbcset, mbcset, &range_alloc,
- &start_elem, &end_elem);
-#else
-# ifdef RE_ENABLE_I18N
- *err = build_range_exp (syntax, sbcset,
- dfa->mb_cur_max > 1 ? mbcset : NULL,
- &range_alloc, &start_elem, &end_elem);
-# else
- *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
-# endif
-#endif /* RE_ENABLE_I18N */
+ &start_elem, &end_elem,
+ dfa, syntax, nrules, collseqmb, collseqwc,
+ table_size, symb_table, extra);
if (__glibc_unlikely (*err != REG_NOERROR))
goto parse_bracket_exp_free_return;
}
@@ -3250,7 +3160,6 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
re_token_t *token,
case SB_CHAR:
bitset_set (sbcset, start_elem.opr.ch);
break;
-#ifdef RE_ENABLE_I18N
case MB_CHAR:
/* Check whether the array has enough space. */
if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars))
@@ -3268,30 +3177,24 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
re_token_t *token,
}
mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
break;
-#endif /* RE_ENABLE_I18N */
case EQUIV_CLASS:
*err = build_equiv_class (sbcset,
-#ifdef RE_ENABLE_I18N
mbcset, &equiv_class_alloc,
-#endif /* RE_ENABLE_I18N */
start_elem.opr.name);
if (__glibc_unlikely (*err != REG_NOERROR))
goto parse_bracket_exp_free_return;
break;
case COLL_SYM:
*err = build_collating_symbol (sbcset,
-#ifdef RE_ENABLE_I18N
mbcset, &coll_sym_alloc,
-#endif /* RE_ENABLE_I18N */
- start_elem.opr.name);
+ start_elem.opr.name,
+ nrules, table_size, symb_table,
extra);
if (__glibc_unlikely (*err != REG_NOERROR))
goto parse_bracket_exp_free_return;
break;
case CHAR_CLASS:
*err = build_charclass (regexp->trans, sbcset,
-#ifdef RE_ENABLE_I18N
mbcset, &char_class_alloc,
-#endif /* RE_ENABLE_I18N */
(const char *) start_elem.opr.name,
syntax);
if (__glibc_unlikely (*err != REG_NOERROR))
@@ -3317,7 +3220,6 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
re_token_t *token,
if (non_match)
bitset_not (sbcset);
-#ifdef RE_ENABLE_I18N
/* Ensure only single byte characters are set. */
if (dfa->mb_cur_max > 1)
bitset_mask (sbcset, dfa->sb_char);
@@ -3361,11 +3263,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
re_token_t *token,
}
}
else
-#endif /* not RE_ENABLE_I18N */
{
-#ifdef RE_ENABLE_I18N
free_charset (mbcset);
-#endif
/* Build a tree for simple bracket. */
br_token.type = SIMPLE_BRACKET;
br_token.opr.sbcset = sbcset;
@@ -3379,9 +3278,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
re_token_t *token,
*err = REG_ESPACE;
parse_bracket_exp_free_return:
re_free (sbcset);
-#ifdef RE_ENABLE_I18N
free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
return NULL;
}
@@ -3392,7 +3289,6 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t
*regexp,
re_token_t *token, int token_len, re_dfa_t *dfa,
reg_syntax_t syntax, bool accept_hyphen)
{
-#ifdef RE_ENABLE_I18N
int cur_char_size;
cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
if (cur_char_size > 1)
@@ -3402,7 +3298,6 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t
*regexp,
re_string_skip_bytes (regexp, cur_char_size);
return REG_NOERROR;
}
-#endif /* RE_ENABLE_I18N */
re_string_skip_bytes (regexp, token_len); /* Skip a token. */
if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
|| token->type == OP_OPEN_EQUIV_CLASS)
@@ -3475,12 +3370,8 @@ parse_bracket_symbol (bracket_elem_t *elem, re_string_t
*regexp,
is a pointer argument since we may update it. */
static reg_errcode_t
-#ifdef RE_ENABLE_I18N
build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
Idx *equiv_class_alloc, const unsigned char *name)
-#else /* not RE_ENABLE_I18N */
-build_equiv_class (bitset_t sbcset, const unsigned char *name)
-#endif /* not RE_ENABLE_I18N */
{
#ifdef _LIBC
uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -3560,14 +3451,9 @@ build_equiv_class (bitset_t sbcset, const unsigned char
*name)
is a pointer argument since we may update it. */
static reg_errcode_t
-#ifdef RE_ENABLE_I18N
build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
re_charset_t *mbcset, Idx *char_class_alloc,
const char *class_name, reg_syntax_t syntax)
-#else /* not RE_ENABLE_I18N */
-build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
- const char *class_name, reg_syntax_t syntax)
-#endif /* not RE_ENABLE_I18N */
{
int i;
const char *name = class_name;
@@ -3578,7 +3464,6 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
&& (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
name = "alpha";
-#ifdef RE_ENABLE_I18N
/* Check the space of the arrays. */
if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes))
{
@@ -3594,7 +3479,6 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
*char_class_alloc = new_char_class_alloc;
}
mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
-#endif /* RE_ENABLE_I18N */
#define BUILD_CHARCLASS_LOOP(ctype_func) \
do { \
@@ -3649,10 +3533,8 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE
trans,
reg_errcode_t *err)
{
re_bitset_ptr_t sbcset;
-#ifdef RE_ENABLE_I18N
re_charset_t *mbcset;
Idx alloc = 0;
-#endif /* not RE_ENABLE_I18N */
reg_errcode_t ret;
bin_tree_t *tree;
@@ -3662,7 +3544,6 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE
trans,
*err = REG_ESPACE;
return NULL;
}
-#ifdef RE_ENABLE_I18N
mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
if (__glibc_unlikely (mbcset == NULL))
{
@@ -3671,21 +3552,14 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE
trans,
return NULL;
}
mbcset->non_match = non_match;
-#endif /* RE_ENABLE_I18N */
/* We don't care the syntax in this case. */
- ret = build_charclass (trans, sbcset,
-#ifdef RE_ENABLE_I18N
- mbcset, &alloc,
-#endif /* RE_ENABLE_I18N */
- class_name, 0);
+ ret = build_charclass (trans, sbcset, mbcset, &alloc, class_name, 0);
if (__glibc_unlikely (ret != REG_NOERROR))
{
re_free (sbcset);
-#ifdef RE_ENABLE_I18N
free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
*err = ret;
return NULL;
}
@@ -3697,11 +3571,9 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE
trans,
if (non_match)
bitset_not (sbcset);
-#ifdef RE_ENABLE_I18N
/* Ensure only single byte characters are set. */
if (dfa->mb_cur_max > 1)
bitset_mask (sbcset, dfa->sb_char);
-#endif
/* Build a tree for simple bracket. */
re_token_t br_token = { .type = SIMPLE_BRACKET, .opr.sbcset = sbcset };
@@ -3709,7 +3581,6 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE
trans,
if (__glibc_unlikely (tree == NULL))
goto build_word_op_espace;
-#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
{
bin_tree_t *mbc_tree;
@@ -3730,15 +3601,10 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE
trans,
free_charset (mbcset);
return tree;
}
-#else /* not RE_ENABLE_I18N */
- return tree;
-#endif /* not RE_ENABLE_I18N */
build_word_op_espace:
re_free (sbcset);
-#ifdef RE_ENABLE_I18N
free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
*err = REG_ESPACE;
return NULL;
}
@@ -3771,21 +3637,19 @@ fetch_number (re_string_t *input, re_token_t *token,
reg_syntax_t syntax)
return num;
}
-#ifdef RE_ENABLE_I18N
static void
free_charset (re_charset_t *cset)
{
re_free (cset->mbchars);
-# ifdef _LIBC
+#ifdef _LIBC
re_free (cset->coll_syms);
re_free (cset->equiv_classes);
-# endif
+#endif
re_free (cset->range_starts);
re_free (cset->range_ends);
re_free (cset->char_classes);
re_free (cset);
}
-#endif /* RE_ENABLE_I18N */
/* Functions for binary tree operation. */
@@ -3851,13 +3715,10 @@ mark_opt_subexp (void *extra, bin_tree_t *node)
static void
free_token (re_token_t *node)
{
-#ifdef RE_ENABLE_I18N
if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
free_charset (node->opr.mbcset);
- else
-#endif /* RE_ENABLE_I18N */
- if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
- re_free (node->opr.sbcset);
+ else if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+ re_free (node->opr.sbcset);
}
/* Worker function for tree walking. Free the allocated memory inside NODE
diff --git a/support/regex_internal.c b/support/regex_internal.c
index aefcfa2..9767cd0 100644
--- a/support/regex_internal.c
+++ b/support/regex_internal.c
@@ -30,10 +30,8 @@ static re_dfastate_t *create_cd_newstate (const re_dfa_t
*dfa,
re_hashval_t hash);
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
Idx new_buf_len);
-#ifdef RE_ENABLE_I18N
static void build_wcs_buffer (re_string_t *pstr);
static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
-#endif /* RE_ENABLE_I18N */
static void build_upper_buffer (re_string_t *pstr);
static void re_string_translate_buffer (re_string_t *pstr);
static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
@@ -91,7 +89,6 @@ re_string_construct (re_string_t *pstr, const char *str, Idx
len,
if (icase)
{
-#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
{
while (1)
@@ -109,16 +106,13 @@ re_string_construct (re_string_t *pstr, const char *str,
Idx len,
}
}
else
-#endif /* RE_ENABLE_I18N */
build_upper_buffer (pstr);
}
else
{
-#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
build_wcs_buffer (pstr);
else
-#endif /* RE_ENABLE_I18N */
{
if (trans != NULL)
re_string_translate_buffer (pstr);
@@ -139,7 +133,6 @@ static reg_errcode_t
__attribute_warn_unused_result__
re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
{
-#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
{
wint_t *new_wcs;
@@ -162,7 +155,6 @@ re_string_realloc_buffers (re_string_t *pstr, Idx
new_buf_len)
pstr->offsets = new_offsets;
}
}
-#endif /* RE_ENABLE_I18N */
if (pstr->mbs_allocated)
{
unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
@@ -194,7 +186,6 @@ re_string_construct_common (const char *str, Idx len,
re_string_t *pstr,
pstr->raw_stop = pstr->stop;
}
-#ifdef RE_ENABLE_I18N
/* Build wide character buffer PSTR->WCS.
If the byte sequence of the string are:
@@ -530,7 +521,6 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx,
wint_t *last_wc)
*last_wc = wc;
return rawbuf_idx;
}
-#endif /* RE_ENABLE_I18N */
/* Build the buffer PSTR->MBS, and apply the translation if we need.
This function is used in case of REG_ICASE. */
@@ -585,10 +575,8 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int
eflags)
else
{
/* Reset buffer. */
-#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
-#endif /* RE_ENABLE_I18N */
pstr->len = pstr->raw_len;
pstr->stop = pstr->raw_stop;
pstr->valid_len = 0;
@@ -608,7 +596,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int
eflags)
if (__glibc_likely (offset < pstr->valid_raw_len))
{
/* Yes, move them to the front of the buffer. */
-#ifdef RE_ENABLE_I18N
if (__glibc_unlikely (pstr->offsets_needed))
{
Idx low = 0, high = pstr->valid_len, mid;
@@ -672,15 +659,12 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int
eflags)
}
}
else
-#endif
{
pstr->tip_context = re_string_context_at (pstr, offset - 1,
eflags);
-#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
memmove (pstr->wcs, pstr->wcs + offset,
(pstr->valid_len - offset) * sizeof (wint_t));
-#endif /* RE_ENABLE_I18N */
if (__glibc_unlikely (pstr->mbs_allocated))
memmove (pstr->mbs, pstr->mbs + offset,
pstr->valid_len - offset);
@@ -691,7 +675,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int
eflags)
}
else
{
-#ifdef RE_ENABLE_I18N
/* No, skip all characters until IDX. */
Idx prev_valid_len = pstr->valid_len;
@@ -701,9 +684,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int
eflags)
pstr->stop = pstr->raw_stop - idx + offset;
pstr->offsets_needed = 0;
}
-#endif
pstr->valid_len = 0;
-#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
{
Idx wcs_idx;
@@ -787,7 +768,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int
eflags)
pstr->valid_raw_len = pstr->valid_len;
}
else
-#endif /* RE_ENABLE_I18N */
{
int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
pstr->valid_raw_len = 0;
@@ -807,7 +787,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int
eflags)
pstr->stop -= offset;
/* Then build the buffers. */
-#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
{
if (pstr->icase)
@@ -820,7 +799,6 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int
eflags)
build_wcs_buffer (pstr);
}
else
-#endif /* RE_ENABLE_I18N */
if (__glibc_unlikely (pstr->mbs_allocated))
{
if (pstr->icase)
@@ -846,28 +824,22 @@ re_string_peek_byte_case (const re_string_t *pstr, Idx
idx)
if (__glibc_likely (!pstr->mbs_allocated))
return re_string_peek_byte (pstr, idx);
-#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1
&& ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
return re_string_peek_byte (pstr, idx);
-#endif
off = pstr->cur_idx + idx;
-#ifdef RE_ENABLE_I18N
if (pstr->offsets_needed)
off = pstr->offsets[off];
-#endif
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
-#ifdef RE_ENABLE_I18N
/* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
this function returns CAPITAL LETTER I instead of first byte of
DOTLESS SMALL LETTER I. The latter would confuse the parser,
since peek_byte_case doesn't advance cur_idx in any way. */
if (pstr->offsets_needed && !isascii (ch))
return re_string_peek_byte (pstr, idx);
-#endif
return ch;
}
@@ -878,7 +850,6 @@ re_string_fetch_byte_case (re_string_t *pstr)
if (__glibc_likely (!pstr->mbs_allocated))
return re_string_fetch_byte (pstr);
-#ifdef RE_ENABLE_I18N
if (pstr->offsets_needed)
{
Idx off;
@@ -904,7 +875,6 @@ re_string_fetch_byte_case (re_string_t *pstr)
re_string_char_size_at (pstr, pstr->cur_idx));
return ch;
}
-#endif
return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
}
@@ -912,10 +882,8 @@ re_string_fetch_byte_case (re_string_t *pstr)
static void
re_string_destruct (re_string_t *pstr)
{
-#ifdef RE_ENABLE_I18N
re_free (pstr->wcs);
re_free (pstr->offsets);
-#endif /* RE_ENABLE_I18N */
if (pstr->mbs_allocated)
re_free (pstr->mbs);
}
@@ -933,7 +901,6 @@ re_string_context_at (const re_string_t *input, Idx idx,
int eflags)
if (__glibc_unlikely (idx == input->len))
return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
: CONTEXT_NEWLINE | CONTEXT_ENDBUF);
-#ifdef RE_ENABLE_I18N
if (input->mb_cur_max > 1)
{
wint_t wc;
@@ -953,7 +920,6 @@ re_string_context_at (const re_string_t *input, Idx idx,
int eflags)
? CONTEXT_NEWLINE : 0);
}
else
-#endif
{
c = re_string_byte_at (input, idx);
if (bitset_contain (input->word_char, c))
@@ -1451,11 +1417,9 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
}
dfa->nodes[dfa->nodes_len] = token;
dfa->nodes[dfa->nodes_len].constraint = 0;
-#ifdef RE_ENABLE_I18N
dfa->nodes[dfa->nodes_len].accept_mb =
((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
|| token.type == COMPLEX_BRACKET);
-#endif
dfa->nexts[dfa->nodes_len] = -1;
re_node_set_init_empty (dfa->edests + dfa->nodes_len);
re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
@@ -1651,9 +1615,7 @@ create_ci_newstate (const re_dfa_t *dfa, const
re_node_set *nodes,
re_token_type_t type = node->type;
if (type == CHARACTER && !node->constraint)
continue;
-#ifdef RE_ENABLE_I18N
newstate->accept_mb |= node->accept_mb;
-#endif /* RE_ENABLE_I18N */
/* If the state has the halt node, the state is a halt state. */
if (type == END_OF_RE)
@@ -1705,9 +1667,7 @@ create_cd_newstate (const re_dfa_t *dfa, const
re_node_set *nodes,
if (type == CHARACTER && !constraint)
continue;
-#ifdef RE_ENABLE_I18N
newstate->accept_mb |= node->accept_mb;
-#endif /* RE_ENABLE_I18N */
/* If the state has the halt node, the state is a halt state. */
if (type == END_OF_RE)
diff --git a/support/regex_internal.h b/support/regex_internal.h
index 1245e78..8493db2 100644
--- a/support/regex_internal.h
+++ b/support/regex_internal.h
@@ -116,10 +116,6 @@
# define gettext_noop(String) String
#endif
-#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE) || _LIBC
-# define RE_ENABLE_I18N
-#endif
-
/* Number of ASCII characters. */
#define ASCII_CHARS 0x80
@@ -150,6 +146,11 @@
# define __regfree regfree
#endif /* not _LIBC */
+/* Types related to integers. Unless protected by #ifdef _LIBC, the
+ regex code should avoid exact-width types like int32_t and uint64_t
+ as some non-GCC platforms lack them, an issue when this code is
+ used in Gnulib. */
+
#ifndef SSIZE_MAX
# define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
#endif
@@ -246,10 +247,8 @@ typedef enum
SIMPLE_BRACKET = 3,
OP_BACK_REF = 4,
OP_PERIOD = 5,
-#ifdef RE_ENABLE_I18N
COMPLEX_BRACKET = 6,
OP_UTF8_PERIOD = 7,
-#endif /* RE_ENABLE_I18N */
/* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
when the debugger shows values of this enum type. */
@@ -287,30 +286,29 @@ typedef enum
} re_token_type_t;
-#ifdef RE_ENABLE_I18N
typedef struct
{
/* Multibyte characters. */
wchar_t *mbchars;
+#ifdef _LIBC
/* Collating symbols. */
-# ifdef _LIBC
int32_t *coll_syms;
-# endif
+#endif
+#ifdef _LIBC
/* Equivalence classes. */
-# ifdef _LIBC
int32_t *equiv_classes;
-# endif
+#endif
/* Range expressions. */
-# ifdef _LIBC
+#ifdef _LIBC
uint32_t *range_starts;
uint32_t *range_ends;
-# else /* not _LIBC */
+#else
wchar_t *range_starts;
wchar_t *range_ends;
-# endif /* not _LIBC */
+#endif
/* Character classes. */
wctype_t *char_classes;
@@ -333,7 +331,6 @@ typedef struct
/* # of character classes. */
Idx nchar_classes;
} re_charset_t;
-#endif /* RE_ENABLE_I18N */
typedef struct
{
@@ -341,9 +338,7 @@ typedef struct
{
unsigned char c; /* for CHARACTER */
re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
-#ifdef RE_ENABLE_I18N
re_charset_t *mbcset; /* for COMPLEX_BRACKET */
-#endif /* RE_ENABLE_I18N */
Idx idx; /* for BACK_REF */
re_context_type ctx_type; /* for ANCHOR */
} opr;
@@ -355,12 +350,10 @@ typedef struct
unsigned int constraint : 10; /* context constraint */
unsigned int duplicated : 1;
unsigned int opt_subexp : 1;
-#ifdef RE_ENABLE_I18N
unsigned int accept_mb : 1;
/* These 2 bits can be moved into the union if needed (e.g. if running out
of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
unsigned int mb_partial : 1;
-#endif
unsigned int word_char : 1;
} re_token_t;
@@ -375,12 +368,10 @@ struct re_string_t
REG_ICASE, upper cases of the string are stored, otherwise MBS points
the same address that RAW_MBS points. */
unsigned char *mbs;
-#ifdef RE_ENABLE_I18N
/* Store the wide character string which is corresponding to MBS. */
wint_t *wcs;
Idx *offsets;
mbstate_t cur_state;
-#endif
/* Index in RAW_MBS. Each character mbs[i] corresponds to
raw_mbs[raw_mbs_idx + i]. */
Idx raw_mbs_idx;
@@ -779,7 +770,6 @@ bitset_mask (bitset_t dest, const bitset_t src)
dest[bitset_i] &= src[bitset_i];
}
-#ifdef RE_ENABLE_I18N
/* Functions for re_string. */
static int
__attribute__ ((pure, unused))
@@ -803,15 +793,15 @@ re_string_wchar_at (const re_string_t *pstr, Idx idx)
return (wint_t) pstr->wcs[idx];
}
-# ifdef _LIBC
-# include <locale/weight.h>
-# endif
+#ifdef _LIBC
+# include <locale/weight.h>
+#endif
static int
__attribute__ ((pure, unused))
re_string_elem_size_at (const re_string_t *pstr, Idx idx)
{
-# ifdef _LIBC
+#ifdef _LIBC
const unsigned char *p, *extra;
const int32_t *table, *indirect;
uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -827,11 +817,10 @@ re_string_elem_size_at (const re_string_t *pstr, Idx idx)
findidx (table, indirect, extra, &p, pstr->len - idx);
return p - pstr->mbs - idx;
}
- else
-# endif /* _LIBC */
- return 1;
+#endif /* _LIBC */
+
+ return 1;
}
-#endif /* RE_ENABLE_I18N */
#ifdef _LIBC
# if __GNUC__ >= 7
diff --git a/support/regexec.c b/support/regexec.c
index 6aeba3c..3196708 100644
--- a/support/regexec.c
+++ b/support/regexec.c
@@ -67,11 +67,9 @@ static reg_errcode_t set_regs (const regex_t *preg,
bool fl_backtrack);
static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
-#ifdef RE_ENABLE_I18N
static int sift_states_iter_mb (const re_match_context_t *mctx,
re_sift_context_t *sctx,
Idx node_idx, Idx str_idx, Idx max_str_idx);
-#endif /* RE_ENABLE_I18N */
static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
re_sift_context_t *sctx);
static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
@@ -123,10 +121,8 @@ static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
re_match_context_t *mctx,
re_dfastate_t *pstate);
#endif
-#ifdef RE_ENABLE_I18N
static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
re_dfastate_t *pstate);
-#endif /* RE_ENABLE_I18N */
static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
const re_node_set *nodes);
static reg_errcode_t get_subexp (re_match_context_t *mctx,
@@ -156,14 +152,12 @@ static reg_errcode_t expand_bkref_cache
(re_match_context_t *mctx,
re_node_set *cur_nodes, Idx cur_str,
Idx subexp_num, int type);
static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
-#ifdef RE_ENABLE_I18N
static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
const re_string_t *input, Idx idx);
-# ifdef _LIBC
+#ifdef _LIBC
static unsigned int find_collation_sequence_value (const unsigned char *mbs,
size_t name_len);
-# endif /* _LIBC */
-#endif /* RE_ENABLE_I18N */
+#endif
static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
const re_dfastate_t *state,
re_node_set *states_node,
@@ -779,12 +773,10 @@ re_search_internal (const regex_t *preg, const char
*string, Idx length,
if (__glibc_unlikely (err != REG_NOERROR))
goto free_return;
-#ifdef RE_ENABLE_I18N
- /* Don't consider this char as a possible match start if it part,
- yet isn't the head, of a multibyte character. */
+ /* Don't consider this char as a possible match start if it part,
+ yet isn't the head, of a multibyte character. */
if (!sb && !re_string_first_byte (&mctx.input, 0))
continue;
-#endif
/* It seems to be appropriate one, then use the matcher. */
/* We assume that the matching starts from 0. */
@@ -858,7 +850,6 @@ re_search_internal (const regex_t *preg, const char
*string, Idx length,
for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
if (pmatch[reg_idx].rm_so != -1)
{
-#ifdef RE_ENABLE_I18N
if (__glibc_unlikely (mctx.input.offsets_needed != 0))
{
pmatch[reg_idx].rm_so =
@@ -870,9 +861,6 @@ re_search_internal (const regex_t *preg, const char
*string, Idx length,
? mctx.input.valid_raw_len
: mctx.input.offsets[pmatch[reg_idx].rm_eo]);
}
-#else
- DEBUG_ASSERT (mctx.input.offsets_needed == 0);
-#endif
pmatch[reg_idx].rm_so += match_first;
pmatch[reg_idx].rm_eo += match_first;
}
@@ -996,8 +984,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
We must select appropriate initial state depending on the context,
since initial states may have constraints like "\<", "^", etc.. */
-static inline re_dfastate_t *
-__attribute__ ((always_inline))
+static __always_inline re_dfastate_t *
acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
Idx idx)
{
@@ -1261,12 +1248,9 @@ proceed_next_node (const re_match_context_t *mctx, Idx
nregs, regmatch_t *regs,
Idx naccepted = 0;
re_token_type_t type = dfa->nodes[node].type;
-#ifdef RE_ENABLE_I18N
if (dfa->nodes[node].accept_mb)
naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
- else
-#endif /* RE_ENABLE_I18N */
- if (type == OP_BACK_REF)
+ else if (type == OP_BACK_REF)
{
Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
if (subexp_idx < nregs)
@@ -1634,12 +1618,10 @@ build_sifted_states (const re_match_context_t *mctx,
re_sift_context_t *sctx,
bool ok;
DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
-#ifdef RE_ENABLE_I18N
/* If the node may accept "multi byte". */
if (dfa->nodes[prev_node].accept_mb)
naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
str_idx, sctx->last_str_idx);
-#endif /* RE_ENABLE_I18N */
/* We don't check backreferences here.
See update_cur_sifted_state(). */
@@ -1688,6 +1670,7 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx
next_state_log_idx)
if (top < next_state_log_idx)
{
+ DEBUG_ASSERT (mctx->state_log != NULL);
memset (mctx->state_log + top + 1, '\0',
sizeof (re_dfastate_t *) * (next_state_log_idx - top));
mctx->state_log_top = next_state_log_idx;
@@ -2176,7 +2159,6 @@ sift_states_bkref (const re_match_context_t *mctx,
re_sift_context_t *sctx,
}
-#ifdef RE_ENABLE_I18N
static int
sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
Idx node_idx, Idx str_idx, Idx max_str_idx)
@@ -2196,8 +2178,6 @@ sift_states_iter_mb (const re_match_context_t *mctx,
re_sift_context_t *sctx,
'naccepted' bytes input. */
return naccepted;
}
-#endif /* RE_ENABLE_I18N */
-
/* Functions for state transition. */
@@ -2215,7 +2195,6 @@ transit_state (reg_errcode_t *err, re_match_context_t
*mctx,
re_dfastate_t **trtable;
unsigned char ch;
-#ifdef RE_ENABLE_I18N
/* If the current state can accept multibyte. */
if (__glibc_unlikely (state->accept_mb))
{
@@ -2223,7 +2202,6 @@ transit_state (reg_errcode_t *err, re_match_context_t
*mctx,
if (__glibc_unlikely (*err != REG_NOERROR))
return NULL;
}
-#endif /* RE_ENABLE_I18N */
/* Then decide the next state with the single byte. */
#if 0
@@ -2444,7 +2422,6 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t
*mctx,
}
#endif
-#ifdef RE_ENABLE_I18N
static reg_errcode_t
transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
{
@@ -2512,7 +2489,6 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t
*pstate)
}
return REG_NOERROR;
}
-#endif /* RE_ENABLE_I18N */
static reg_errcode_t
transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
@@ -3002,9 +2978,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx,
Idx str_idx,
const re_dfa_t *const dfa = mctx->dfa;
bool ok;
Idx cur_idx;
-#ifdef RE_ENABLE_I18N
reg_errcode_t err = REG_NOERROR;
-#endif
re_node_set union_set;
re_node_set_init_empty (&union_set);
for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
@@ -3013,7 +2987,6 @@ check_arrival_add_next_nodes (re_match_context_t *mctx,
Idx str_idx,
Idx cur_node = cur_nodes->elems[cur_idx];
DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
-#ifdef RE_ENABLE_I18N
/* If the node may accept "multi byte". */
if (dfa->nodes[cur_node].accept_mb)
{
@@ -3051,7 +3024,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx,
Idx str_idx,
}
}
}
-#endif /* RE_ENABLE_I18N */
+
if (naccepted
|| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
{
@@ -3475,18 +3448,15 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const
re_dfastate_t *state,
}
else if (type == OP_PERIOD)
{
-#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
bitset_merge (accepts, dfa->sb_char);
else
-#endif
bitset_set_all (accepts);
if (!(dfa->syntax & RE_DOT_NEWLINE))
bitset_clear (accepts, '\n');
if (dfa->syntax & RE_DOT_NOT_NULL)
bitset_clear (accepts, '\0');
}
-#ifdef RE_ENABLE_I18N
else if (type == OP_UTF8_PERIOD)
{
if (ASCII_CHARS % BITSET_WORD_BITS == 0)
@@ -3498,7 +3468,6 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const
re_dfastate_t *state,
if (dfa->syntax & RE_DOT_NOT_NULL)
bitset_clear (accepts, '\0');
}
-#endif
else
continue;
@@ -3529,12 +3498,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const
re_dfastate_t *state,
bitset_empty (accepts);
continue;
}
-#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= (dfa->word_char[j] |
~dfa->sb_char[j]));
else
-#endif
for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= dfa->word_char[j]);
if (!any_set)
@@ -3548,12 +3515,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const
re_dfastate_t *state,
bitset_empty (accepts);
continue;
}
-#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= ~(dfa->word_char[j] &
dfa->sb_char[j]));
else
-#endif
for (j = 0; j < BITSET_WORDS; ++j)
any_set |= (accepts[j] &= ~dfa->word_char[j]);
if (!any_set)
@@ -3630,7 +3595,6 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const
re_dfastate_t *state,
return -1;
}
-#ifdef RE_ENABLE_I18N
/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
Return the number of the bytes the node accepts.
STR_IDX is the current index of the input string.
@@ -3639,9 +3603,9 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const
re_dfastate_t *state,
one collating element like '.', '[a-z]', opposite to the other nodes
can only accept one byte. */
-# ifdef _LIBC
-# include <locale/weight.h>
-# endif
+#ifdef _LIBC
+# include <locale/weight.h>
+#endif
static int
check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
@@ -3725,12 +3689,12 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx
node_idx,
if (node->type == COMPLEX_BRACKET)
{
const re_charset_t *cset = node->opr.mbcset;
-# ifdef _LIBC
+#ifdef _LIBC
const unsigned char *pin
= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
Idx j;
uint32_t nrules;
-# endif /* _LIBC */
+#endif
int match_len = 0;
wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
? re_string_wchar_at (input, str_idx) : 0);
@@ -3753,7 +3717,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx
node_idx,
}
}
-# ifdef _LIBC
+#ifdef _LIBC
nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
if (nrules != 0)
{
@@ -3842,7 +3806,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx
node_idx,
}
}
else
-# endif /* _LIBC */
+#endif /* _LIBC */
{
/* match with range expression? */
for (i = 0; i < cset->nranges; ++i)
@@ -3868,7 +3832,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx
node_idx,
return 0;
}
-# ifdef _LIBC
+#ifdef _LIBC
static unsigned int
find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
{
@@ -3926,8 +3890,7 @@ find_collation_sequence_value (const unsigned char *mbs,
size_t mbs_len)
return UINT_MAX;
}
}
-# endif /* _LIBC */
-#endif /* RE_ENABLE_I18N */
+#endif /* _LIBC */
/* Check whether the node accepts the byte which is IDX-th
byte of the INPUT. */
@@ -3950,12 +3913,10 @@ check_node_accept (const re_match_context_t *mctx,
const re_token_t *node,
return false;
break;
-#ifdef RE_ENABLE_I18N
case OP_UTF8_PERIOD:
if (ch >= ASCII_CHARS)
return false;
FALLTHROUGH;
-#endif
case OP_PERIOD:
if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
|| (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
@@ -4016,7 +3977,6 @@ extend_buffers (re_match_context_t *mctx, int min_len)
/* Then reconstruct the buffers. */
if (pstr->icase)
{
-#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
{
ret = build_wcs_upper_buffer (pstr);
@@ -4024,16 +3984,13 @@ extend_buffers (re_match_context_t *mctx, int min_len)
return ret;
}
else
-#endif /* RE_ENABLE_I18N */
build_upper_buffer (pstr);
}
else
{
-#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
build_wcs_buffer (pstr);
else
-#endif /* RE_ENABLE_I18N */
{
if (pstr->trans != NULL)
re_string_translate_buffer (pstr);
-----------------------------------------------------------------------
Summary of changes:
build-aux/ChangeLog | 4 +
build-aux/texinfo.tex | 14 +-
doc/ChangeLog | 4 +
doc/texinfo.tex | 14 +-
support/ChangeLog | 5 +
support/intprops.h | 15 +-
support/regcomp.c | 813 ++++++++++++++++++++---------------------------
support/regex_internal.c | 40 ---
support/regex_internal.h | 49 ++-
support/regexec.c | 77 +----
10 files changed, 409 insertions(+), 626 deletions(-)
hooks/post-receive
--
gawk
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [SCM] gawk branch, gawk-5.1-stable, updated. gawk-4.1.0-4365-gff3c50b,
Arnold Robbins <=