[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
grep branch, master, updated. v2.18-31-gbc0732d
From: |
Paul Eggert |
Subject: |
grep branch, master, updated. v2.18-31-gbc0732d |
Date: |
Mon, 24 Mar 2014 01:13:10 +0000 |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".
The branch, master has been updated
via bc0732da3a2c636b87a0173c2129162dbd806037 (commit)
via 671698472dea3b5ba4bb71074aa01b8c48bdb6c0 (commit)
from b639643840ef506594b6c46e5b24d9980a33e78e (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=bc0732da3a2c636b87a0173c2129162dbd806037
commit bc0732da3a2c636b87a0173c2129162dbd806037
Author: Paul Eggert <address@hidden>
Date: Sun Mar 23 17:38:17 2014 -0700
grep: fix and simplify grep -iF optimization
* src/grep.c (check_any_alphabets): Remove.
(fgrep_to_grep_pattern): Fix problems when mbrtowc returns -1 or -2.
Simplify a bit.
(main): Don't bother optimizing 'grep -iF PAT' when PAT contains no
alphabetics; it's so rare it's not worth the complexity.
diff --git a/src/grep.c b/src/grep.c
index 709e7b5..a1bccdb 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1904,74 +1904,40 @@ parse_grep_colors (void)
? (*(s) = wctob ((wint_t) (wc)), 1) \
: wcrtomb (s, wc, ps))
-/* Check wether a string has any alphabets. */
-static bool
-check_any_alphabets (size_t len, char const *keys)
-{
- while (len)
- {
- mbstate_t mb_state = { 0 };
- wchar_t wc;
- size_t n = MBRTOWC (&wc, keys, len, &mb_state);
-
- if ((size_t) -2 <= n)
- {
- keys++;
- len--;
- continue;
- }
-
- wchar_t folded[CASE_FOLDED_BUFSIZE];
- int nfolded = case_folded_counterparts (wc, folded);
- if (nfolded > 0)
- return true;
- keys += n;
- len -= n;
- }
-
- return false;
-}
-
/* Change a pattern for fgrep into grep. */
static void
fgrep_to_grep_pattern (size_t len, char const *keys,
size_t *new_len, char **new_keys)
{
- *new_keys = xnmalloc (len + 1, 2);
- char *p = *new_keys;
+ char *p = *new_keys = xnmalloc (len + 1, 2);
+ mbstate_t mb_state = { 0 };
+ size_t n;
- while (len)
+ for (; len; keys += n, len -= n)
{
- mbstate_t mb_state = { 0 };
wchar_t wc;
- size_t n = MBRTOWC (&wc, keys, len, &mb_state);
-
- if ((size_t) -2 <= n)
- *p++ = *keys++;
- else if (n == 1)
+ n = MBRTOWC (&wc, keys, len, &mb_state);
+ switch (n)
{
- switch (*keys)
- {
- case '\\':
- case '[':
- case ']':
- case '^':
- case '$':
- case '.':
- case '*':
- *p++ = '\\';
- default:
- *p++ = *keys;
- }
- }
- else
- {
- memcpy (p, keys, n);
- p += n;
- }
+ case (size_t) -2:
+ n = len;
+ /* Fall through. */
+ default:
+ p = mempcpy (p, keys, n);
+ break;
- keys += n;
- len -= n;
+ case (size_t) -1:
+ memset (&mb_state, 0, sizeof mb_state);
+ /* Fall through. */
+ case 1:
+ *p = '\\';
+ p += strchr ("$*.[\\^", *keys) != NULL;
+ /* Fall through. */
+ case 0:
+ *p++ = *keys;
+ n = 1;
+ break;
+ }
}
*new_len = p - *new_keys;
@@ -2452,25 +2418,19 @@ main (int argc, char **argv)
else
usage (EXIT_TROUBLE);
- /* If the matcher is fgrep and case-insensitive and keys including any
- alphabets, change it into grep matcher by escape of keys. If keys
- include no alphabet, can turn off match_icase flag. */
- if (match_icase && MB_CUR_MAX > 1 && keycc
- && compile == Fcompile)
+ /* If case-insensitive fgrep in a multibyte locale, improve
+ performance by using grep instead. */
+ if (match_icase && compile == Fcompile && MB_CUR_MAX > 1)
{
- if (check_any_alphabets (keycc, keys))
- {
- size_t new_keycc;
- char *new_keys;
- fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys);
- free (keys);
- keys = new_keys;
- keycc = new_keycc;
- matcher = NULL;
- setmatcher ("grep");
- }
- else
- match_icase = 0;
+ size_t new_keycc;
+ char *new_keys;
+ fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys);
+ free (keys);
+ keys = new_keys;
+ keycc = new_keycc;
+ matcher = "grep";
+ compile = Gcompile;
+ execute = EGexecute;
}
/* Case-insensitive matching is expensive in multibyte locales
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=671698472dea3b5ba4bb71074aa01b8c48bdb6c0
commit bc0732da3a2c636b87a0173c2129162dbd806037
Author: Paul Eggert <address@hidden>
Date: Sun Mar 23 17:38:17 2014 -0700
grep: fix and simplify grep -iF optimization
* src/grep.c (check_any_alphabets): Remove.
(fgrep_to_grep_pattern): Fix problems when mbrtowc returns -1 or -2.
Simplify a bit.
(main): Don't bother optimizing 'grep -iF PAT' when PAT contains no
alphabetics; it's so rare it's not worth the complexity.
diff --git a/src/grep.c b/src/grep.c
index 709e7b5..a1bccdb 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1904,74 +1904,40 @@ parse_grep_colors (void)
? (*(s) = wctob ((wint_t) (wc)), 1) \
: wcrtomb (s, wc, ps))
-/* Check wether a string has any alphabets. */
-static bool
-check_any_alphabets (size_t len, char const *keys)
-{
- while (len)
- {
- mbstate_t mb_state = { 0 };
- wchar_t wc;
- size_t n = MBRTOWC (&wc, keys, len, &mb_state);
-
- if ((size_t) -2 <= n)
- {
- keys++;
- len--;
- continue;
- }
-
- wchar_t folded[CASE_FOLDED_BUFSIZE];
- int nfolded = case_folded_counterparts (wc, folded);
- if (nfolded > 0)
- return true;
- keys += n;
- len -= n;
- }
-
- return false;
-}
-
/* Change a pattern for fgrep into grep. */
static void
fgrep_to_grep_pattern (size_t len, char const *keys,
size_t *new_len, char **new_keys)
{
- *new_keys = xnmalloc (len + 1, 2);
- char *p = *new_keys;
+ char *p = *new_keys = xnmalloc (len + 1, 2);
+ mbstate_t mb_state = { 0 };
+ size_t n;
- while (len)
+ for (; len; keys += n, len -= n)
{
- mbstate_t mb_state = { 0 };
wchar_t wc;
- size_t n = MBRTOWC (&wc, keys, len, &mb_state);
-
- if ((size_t) -2 <= n)
- *p++ = *keys++;
- else if (n == 1)
+ n = MBRTOWC (&wc, keys, len, &mb_state);
+ switch (n)
{
- switch (*keys)
- {
- case '\\':
- case '[':
- case ']':
- case '^':
- case '$':
- case '.':
- case '*':
- *p++ = '\\';
- default:
- *p++ = *keys;
- }
- }
- else
- {
- memcpy (p, keys, n);
- p += n;
- }
+ case (size_t) -2:
+ n = len;
+ /* Fall through. */
+ default:
+ p = mempcpy (p, keys, n);
+ break;
- keys += n;
- len -= n;
+ case (size_t) -1:
+ memset (&mb_state, 0, sizeof mb_state);
+ /* Fall through. */
+ case 1:
+ *p = '\\';
+ p += strchr ("$*.[\\^", *keys) != NULL;
+ /* Fall through. */
+ case 0:
+ *p++ = *keys;
+ n = 1;
+ break;
+ }
}
*new_len = p - *new_keys;
@@ -2452,25 +2418,19 @@ main (int argc, char **argv)
else
usage (EXIT_TROUBLE);
- /* If the matcher is fgrep and case-insensitive and keys including any
- alphabets, change it into grep matcher by escape of keys. If keys
- include no alphabet, can turn off match_icase flag. */
- if (match_icase && MB_CUR_MAX > 1 && keycc
- && compile == Fcompile)
+ /* If case-insensitive fgrep in a multibyte locale, improve
+ performance by using grep instead. */
+ if (match_icase && compile == Fcompile && MB_CUR_MAX > 1)
{
- if (check_any_alphabets (keycc, keys))
- {
- size_t new_keycc;
- char *new_keys;
- fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys);
- free (keys);
- keys = new_keys;
- keycc = new_keycc;
- matcher = NULL;
- setmatcher ("grep");
- }
- else
- match_icase = 0;
+ size_t new_keycc;
+ char *new_keys;
+ fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys);
+ free (keys);
+ keys = new_keys;
+ keycc = new_keycc;
+ matcher = "grep";
+ compile = Gcompile;
+ execute = EGexecute;
}
/* Case-insensitive matching is expensive in multibyte locales
-----------------------------------------------------------------------
Summary of changes:
src/grep.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 54 insertions(+), 0 deletions(-)
hooks/post-receive
--
grep
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- grep branch, master, updated. v2.18-31-gbc0732d,
Paul Eggert <=