bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Optimize away some mbsinit calls


From: Bruno Haible
Subject: Optimize away some mbsinit calls
Date: Tue, 11 Jul 2023 21:39:23 +0200

When the module 'mbrtoc32-regular' is in use, we can make use of its
guarantees in the caller code.

On glibc systems in particular, we have the mbsinit() call inside
rpl_mbrtoc32. We can optimize the mbsinit() call outside.

According to my profilings (of an 'mbuiter' using code), this reduces
the time spent in mbsinit() from 4% to 2%. That is, it provides a 2%
speedup. I'm attaching the profiling data (before and after).


2023-07-11  Bruno Haible  <bruno@clisp.org>

        Optimize away some mbsinit calls.
        * lib/mbiter.h (mbiter_multi_next): When the module 'mbrtoc32-regular'
        is in use, don't invoke mbsinit and don't compare the mbrtoc32 result
        against (size_t)(-3).
        * lib/mbuiter.h (mbuiter_multi_next): Likewise.
        * lib/mbfile.h (mbfile_multi_getc): Likewise.
        * lib/mbswidth.c (mbsnwidth): Likewise.
        * lib/mbmemcasecoll.c (apply_c32tolower): Likewise.
        * lib/quotearg.c (quotearg_buffer_restyled): Likewise.

diff --git a/lib/mbfile.h b/lib/mbfile.h
index 6c971e64ab..74d4986577 100644
--- a/lib/mbfile.h
+++ b/lib/mbfile.h
@@ -97,7 +97,11 @@ mbfile_multi_getc (struct mbchar *mbc, struct mbfile_multi 
*mbf)
 
   /* If mbf->state is not in an initial state, some more 32-bit wide character
      may be hiding in the state.  We need to call mbrtoc32 again.  */
+  #if GNULIB_MBRTOC32_REGULAR
+  assert (mbsinit (&mbf->state));
+  #else
   if (mbsinit (&mbf->state))
+  #endif
     {
       /* Before using mbrtoc32, we need at least one byte.  */
       if (new_bufcount == 0)
@@ -185,10 +189,12 @@ mbfile_multi_getc (struct mbchar *mbc, struct 
mbfile_multi *mbf)
               assert (mbf->buf[0] == '\0');
               assert (mbc->wc == 0);
             }
+          #if !GNULIB_MBRTOC32_REGULAR
           else if (bytes == (size_t) -3)
             /* The previous multibyte sequence produced an additional 32-bit
                wide character.  */
             bytes = 0;
+          #endif
           mbc->wc_valid = true;
           break;
         }
diff --git a/lib/mbiter.h b/lib/mbiter.h
index 963ccff1f7..179338a856 100644
--- a/lib/mbiter.h
+++ b/lib/mbiter.h
@@ -169,15 +169,19 @@ mbiter_multi_next (struct mbiter_multi *iter)
               assert (*iter->cur.ptr == '\0');
               assert (iter->cur.wc == 0);
             }
+          #if !GNULIB_MBRTOC32_REGULAR
           else if (iter->cur.bytes == (size_t) -3)
             /* The previous multibyte sequence produced an additional 32-bit
                wide character.  */
             iter->cur.bytes = 0;
+          #endif
           iter->cur.wc_valid = true;
 
           /* When in an initial state, we can go back treating ASCII
              characters more quickly.  */
+          #if !GNULIB_MBRTOC32_REGULAR
           if (mbsinit (&iter->state))
+          #endif
             iter->in_shift = false;
         }
     }
diff --git a/lib/mbmemcasecoll.c b/lib/mbmemcasecoll.c
index f79f262dc0..0b765ff05f 100644
--- a/lib/mbmemcasecoll.c
+++ b/lib/mbmemcasecoll.c
@@ -54,7 +54,7 @@ apply_c32tolower (const char *inbuf, size_t inbufsize,
       mbstate_t state;
 
       memset (&state, '\0', sizeof (mbstate_t));
-      do
+      for (;;)
         {
           char32_t wc1;
           size_t n1;
@@ -86,8 +86,10 @@ apply_c32tolower (const char *inbuf, size_t inbufsize,
 
               if (n1 == 0) /* NUL character? */
                 n1 = 1;
+              #if !GNULIB_MBRTOC32_REGULAR
               else if (n1 == (size_t)(-3))
                 n1 = 0;
+              #endif
 
               wc2 = c32tolower (wc1);
               if (wc2 != wc1)
@@ -112,8 +114,11 @@ apply_c32tolower (const char *inbuf, size_t inbufsize,
               inbuf += n1;
               remaining -= n1;
             }
+          #if !GNULIB_MBRTOC32_REGULAR
+          if (mbsinit (&state))
+          #endif
+            break;
         }
-      while (! mbsinit (&state));
     }
 
   /* Verify the output buffer was large enough.  */
diff --git a/lib/mbswidth.c b/lib/mbswidth.c
index 6b26c6a599..a1613dcad6 100644
--- a/lib/mbswidth.c
+++ b/lib/mbswidth.c
@@ -95,7 +95,7 @@ mbsnwidth (const char *string, size_t nbytes, int flags)
               {
                 mbstate_t mbstate;
                 memset (&mbstate, 0, sizeof mbstate);
-                do
+                for (;;)
                   {
                     char32_t wc;
                     size_t bytes;
@@ -132,8 +132,10 @@ mbsnwidth (const char *string, size_t nbytes, int flags)
                     if (bytes == 0)
                       /* A null wide character was encountered.  */
                       bytes = 1;
+                    #if !GNULIB_MBRTOC32_REGULAR
                     else if (bytes == (size_t) -3)
                       bytes = 0;
+                    #endif
 
                     w = c32width (wc);
                     if (w >= 0)
@@ -158,8 +160,11 @@ mbsnwidth (const char *string, size_t nbytes, int flags)
                         return -1;
 
                     p += bytes;
+                    #if !GNULIB_MBRTOC32_REGULAR
+                    if (mbsinit (&mbstate))
+                    #endif
+                      break;
                   }
-                while (! mbsinit (&mbstate));
               }
               break;
           }
diff --git a/lib/mbuiter.h b/lib/mbuiter.h
index 7900a48715..7cadb8b402 100644
--- a/lib/mbuiter.h
+++ b/lib/mbuiter.h
@@ -176,15 +176,19 @@ mbuiter_multi_next (struct mbuiter_multi *iter)
               assert (*iter->cur.ptr == '\0');
               assert (iter->cur.wc == 0);
             }
+          #if !GNULIB_MBRTOC32_REGULAR
           else if (iter->cur.bytes == (size_t) -3)
             /* The previous multibyte sequence produced an additional 32-bit
                wide character.  */
             iter->cur.bytes = 0;
+          #endif
           iter->cur.wc_valid = true;
 
           /* When in an initial state, we can go back treating ASCII
              characters more quickly.  */
+          #if !GNULIB_MBRTOC32_REGULAR
           if (mbsinit (&iter->state))
+          #endif
             iter->in_shift = false;
         }
     }
diff --git a/lib/quotearg.c b/lib/quotearg.c
index db915efa67..5b26055b2e 100644
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -614,7 +614,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
                 if (argsize == SIZE_MAX)
                   argsize = strlen (arg);
 
-                do
+                for (;;)
                   {
                     char32_t w;
                     size_t bytes = mbrtoc32 (&w, &arg[i + m],
@@ -635,8 +635,10 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
                       }
                     else
                       {
+                        #if !GNULIB_MBRTOC32_REGULAR
                         if (bytes == (size_t) -3)
                           bytes = 0;
+                        #endif
                         /* Work around a bug with older shells that "see" a '\'
                            that is really the 2nd byte of a multibyte 
character.
                            In practice the problem is limited to ASCII
@@ -661,8 +663,11 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
                           printable = false;
                         m += bytes;
                       }
+                    #if !GNULIB_MBRTOC32_REGULAR
+                    if (mbsinit (&mbstate))
+                    #endif
+                      break;
                   }
-                while (! mbsinit (&mbstate));
               }
 
             c_and_shell_quote_compat = printable;

Attachment: callgrind.out.before
Description: application/kcachegrind

Attachment: callgrind.out.after
Description: application/kcachegrind

PNG image

PNG image


reply via email to

[Prev in Thread] Current Thread [Next in Thread]