[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Optimize away some mbsinit calls
From: |
Bruno Haible |
Subject: |
Optimize away some mbsinit calls |
Date: |
Tue, 11 Jul 2023 21:39:23 +0200 |
When the module 'mbrtoc32-regular' is in use, we can make use of its
guarantees in the caller code.
On glibc systems in particular, we have the mbsinit() call inside
rpl_mbrtoc32. We can optimize the mbsinit() call outside.
According to my profilings (of an 'mbuiter' using code), this reduces
the time spent in mbsinit() from 4% to 2%. That is, it provides a 2%
speedup. I'm attaching the profiling data (before and after).
2023-07-11 Bruno Haible <bruno@clisp.org>
Optimize away some mbsinit calls.
* lib/mbiter.h (mbiter_multi_next): When the module 'mbrtoc32-regular'
is in use, don't invoke mbsinit and don't compare the mbrtoc32 result
against (size_t)(-3).
* lib/mbuiter.h (mbuiter_multi_next): Likewise.
* lib/mbfile.h (mbfile_multi_getc): Likewise.
* lib/mbswidth.c (mbsnwidth): Likewise.
* lib/mbmemcasecoll.c (apply_c32tolower): Likewise.
* lib/quotearg.c (quotearg_buffer_restyled): Likewise.
diff --git a/lib/mbfile.h b/lib/mbfile.h
index 6c971e64ab..74d4986577 100644
--- a/lib/mbfile.h
+++ b/lib/mbfile.h
@@ -97,7 +97,11 @@ mbfile_multi_getc (struct mbchar *mbc, struct mbfile_multi
*mbf)
/* If mbf->state is not in an initial state, some more 32-bit wide character
may be hiding in the state. We need to call mbrtoc32 again. */
+ #if GNULIB_MBRTOC32_REGULAR
+ assert (mbsinit (&mbf->state));
+ #else
if (mbsinit (&mbf->state))
+ #endif
{
/* Before using mbrtoc32, we need at least one byte. */
if (new_bufcount == 0)
@@ -185,10 +189,12 @@ mbfile_multi_getc (struct mbchar *mbc, struct
mbfile_multi *mbf)
assert (mbf->buf[0] == '\0');
assert (mbc->wc == 0);
}
+ #if !GNULIB_MBRTOC32_REGULAR
else if (bytes == (size_t) -3)
/* The previous multibyte sequence produced an additional 32-bit
wide character. */
bytes = 0;
+ #endif
mbc->wc_valid = true;
break;
}
diff --git a/lib/mbiter.h b/lib/mbiter.h
index 963ccff1f7..179338a856 100644
--- a/lib/mbiter.h
+++ b/lib/mbiter.h
@@ -169,15 +169,19 @@ mbiter_multi_next (struct mbiter_multi *iter)
assert (*iter->cur.ptr == '\0');
assert (iter->cur.wc == 0);
}
+ #if !GNULIB_MBRTOC32_REGULAR
else if (iter->cur.bytes == (size_t) -3)
/* The previous multibyte sequence produced an additional 32-bit
wide character. */
iter->cur.bytes = 0;
+ #endif
iter->cur.wc_valid = true;
/* When in an initial state, we can go back treating ASCII
characters more quickly. */
+ #if !GNULIB_MBRTOC32_REGULAR
if (mbsinit (&iter->state))
+ #endif
iter->in_shift = false;
}
}
diff --git a/lib/mbmemcasecoll.c b/lib/mbmemcasecoll.c
index f79f262dc0..0b765ff05f 100644
--- a/lib/mbmemcasecoll.c
+++ b/lib/mbmemcasecoll.c
@@ -54,7 +54,7 @@ apply_c32tolower (const char *inbuf, size_t inbufsize,
mbstate_t state;
memset (&state, '\0', sizeof (mbstate_t));
- do
+ for (;;)
{
char32_t wc1;
size_t n1;
@@ -86,8 +86,10 @@ apply_c32tolower (const char *inbuf, size_t inbufsize,
if (n1 == 0) /* NUL character? */
n1 = 1;
+ #if !GNULIB_MBRTOC32_REGULAR
else if (n1 == (size_t)(-3))
n1 = 0;
+ #endif
wc2 = c32tolower (wc1);
if (wc2 != wc1)
@@ -112,8 +114,11 @@ apply_c32tolower (const char *inbuf, size_t inbufsize,
inbuf += n1;
remaining -= n1;
}
+ #if !GNULIB_MBRTOC32_REGULAR
+ if (mbsinit (&state))
+ #endif
+ break;
}
- while (! mbsinit (&state));
}
/* Verify the output buffer was large enough. */
diff --git a/lib/mbswidth.c b/lib/mbswidth.c
index 6b26c6a599..a1613dcad6 100644
--- a/lib/mbswidth.c
+++ b/lib/mbswidth.c
@@ -95,7 +95,7 @@ mbsnwidth (const char *string, size_t nbytes, int flags)
{
mbstate_t mbstate;
memset (&mbstate, 0, sizeof mbstate);
- do
+ for (;;)
{
char32_t wc;
size_t bytes;
@@ -132,8 +132,10 @@ mbsnwidth (const char *string, size_t nbytes, int flags)
if (bytes == 0)
/* A null wide character was encountered. */
bytes = 1;
+ #if !GNULIB_MBRTOC32_REGULAR
else if (bytes == (size_t) -3)
bytes = 0;
+ #endif
w = c32width (wc);
if (w >= 0)
@@ -158,8 +160,11 @@ mbsnwidth (const char *string, size_t nbytes, int flags)
return -1;
p += bytes;
+ #if !GNULIB_MBRTOC32_REGULAR
+ if (mbsinit (&mbstate))
+ #endif
+ break;
}
- while (! mbsinit (&mbstate));
}
break;
}
diff --git a/lib/mbuiter.h b/lib/mbuiter.h
index 7900a48715..7cadb8b402 100644
--- a/lib/mbuiter.h
+++ b/lib/mbuiter.h
@@ -176,15 +176,19 @@ mbuiter_multi_next (struct mbuiter_multi *iter)
assert (*iter->cur.ptr == '\0');
assert (iter->cur.wc == 0);
}
+ #if !GNULIB_MBRTOC32_REGULAR
else if (iter->cur.bytes == (size_t) -3)
/* The previous multibyte sequence produced an additional 32-bit
wide character. */
iter->cur.bytes = 0;
+ #endif
iter->cur.wc_valid = true;
/* When in an initial state, we can go back treating ASCII
characters more quickly. */
+ #if !GNULIB_MBRTOC32_REGULAR
if (mbsinit (&iter->state))
+ #endif
iter->in_shift = false;
}
}
diff --git a/lib/quotearg.c b/lib/quotearg.c
index db915efa67..5b26055b2e 100644
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -614,7 +614,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
if (argsize == SIZE_MAX)
argsize = strlen (arg);
- do
+ for (;;)
{
char32_t w;
size_t bytes = mbrtoc32 (&w, &arg[i + m],
@@ -635,8 +635,10 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
}
else
{
+ #if !GNULIB_MBRTOC32_REGULAR
if (bytes == (size_t) -3)
bytes = 0;
+ #endif
/* Work around a bug with older shells that "see" a '\'
that is really the 2nd byte of a multibyte
character.
In practice the problem is limited to ASCII
@@ -661,8 +663,11 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
printable = false;
m += bytes;
}
+ #if !GNULIB_MBRTOC32_REGULAR
+ if (mbsinit (&mbstate))
+ #endif
+ break;
}
- while (! mbsinit (&mbstate));
}
c_and_shell_quote_compat = printable;
callgrind.out.before
Description: application/kcachegrind
callgrind.out.after
Description: application/kcachegrind
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Optimize away some mbsinit calls,
Bruno Haible <=