texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[6892] Fix conversion of text with libiconv when conversion is stateful.


From: Eli Zaretskii
Subject: [6892] Fix conversion of text with libiconv when conversion is stateful.
Date: Fri, 25 Dec 2015 14:41:01 +0000

Revision: 6892
          http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=6892
Author:   eliz
Date:     2015-12-25 14:40:58 +0000 (Fri, 25 Dec 2015)
Log Message:
-----------
Fix conversion of text with libiconv when conversion is stateful.

Modified Paths:
--------------
    trunk/ChangeLog
    trunk/info/info-utils.c
    trunk/info/info-utils.h

Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog     2015-12-23 15:08:30 UTC (rev 6891)
+++ trunk/ChangeLog     2015-12-25 14:40:58 UTC (rev 6892)
@@ -1,3 +1,19 @@
+2015-12-25  Eli Zaretskii  <address@hidden>
+
+       * info/info-utils.c (copy_converting): When 'iconv' finishes
+       conversion, call it with 2nd and 3rd arguments NULL to make sure
+       it flushes the last converted character to the output buffer.
+       This avoids losing the last character when libiconv conversion is
+       stateful.  Restore the input pointer before calling copy_direct if
+       'iconv' failed to convert a character.
+       (text_buffer_iconv): Use ICONV_CONST to avoid compiler warnings on
+       platforms where the 2nd argument to 'iconv' should have the
+       'const' qualifier.  Use the passed 'iconv_state' argument rather
+       than the global 'iconv_to_output'.
+
+       * info/info-utils.h (text_buffer_iconv): Adjust the prototype to
+       use ICONV_CONST.
+
 2015-12-23  Gavin Smith  <address@hidden>
 
        * info/search.c (looking_at_line): Check line case-insensitively 

Modified: trunk/info/info-utils.c
===================================================================
--- trunk/info/info-utils.c     2015-12-23 15:08:30 UTC (rev 6891)
+++ trunk/info/info-utils.c     2015-12-25 14:40:58 UTC (rev 6892)
@@ -830,14 +830,14 @@
 #if !HAVE_ICONV
   return 0;
 #else
-  size_t bytes_left;
+  size_t bytes_left, orig_bytes_left;
   int extra_at_end;
   size_t iconv_ret;
   long output_start;
 
   size_t utf8_char_free; 
   char utf8_char[4]; /* Maximum 4 bytes in a UTF-8 character */
-  char *utf8_char_ptr;
+  char *utf8_char_ptr, *orig_inptr;
   size_t i;
   
   /* Use n as an estimate of how many bytes will be required
@@ -850,9 +850,15 @@
   while (bytes_left >= 0)
     {
       iconv_ret = text_buffer_iconv (&output_buf, iconv_to_output,
-                                     &inptr, &bytes_left);
+                                     (ICONV_CONST char **)&inptr, &bytes_left);
 
-      if (iconv_ret != (size_t) -1)
+      /* Make sure libiconv flushes out the last converted character.
+        This is required when the conversion is stateful, in which
+        case libiconv might not output the last charcater, waiting to
+        see whether it should be combined with the next one.  */
+      if (iconv_ret != (size_t) -1
+         && text_buffer_iconv (&output_buf, iconv_to_output,
+                               NULL, NULL) != (size_t) -1)
         /* Success: all of input converted. */
         break;
 
@@ -912,25 +918,47 @@
       /* We want to read exactly one character.  Do this by
          restricting size of output buffer. */
       utf8_char_ptr = utf8_char;
+      orig_inptr = inptr;
+      orig_bytes_left = bytes_left;
       for (i = 1; i <= 4; i++)
         {
           utf8_char_free = i;
-          iconv_ret = iconv (iconv_to_utf8, &inptr, &bytes_left,
-                             &utf8_char_ptr, &utf8_char_free);
-          /* If we managed to write a character: */
-          if (utf8_char_ptr > utf8_char) break;
+          errno = 0;
+          iconv_ret = iconv (iconv_to_utf8, (ICONV_CONST char **)&inptr,
+                             &bytes_left, &utf8_char_ptr, &utf8_char_free);
+          if ((iconv_ret == (size_t) -1 && errno != E2BIG)
+              /* If we managed to convert a character: */
+              || utf8_char_ptr > utf8_char)
+            break;
         }
 
       /* errno == E2BIG if iconv ran out of output buffer,
          which is expected. */
       if (iconv_ret == (size_t) -1 && errno != E2BIG)
-        /* Character is not recognized.  Copy a single byte. */
-        copy_direct (1);
+       {
+         /* Character is not recognized.  Copy a single byte.  */
+         inptr = orig_inptr;   /* iconv might have incremented inptr  */
+         copy_direct (1);
+         bytes_left = orig_bytes_left - 1;
+       }
       else
         {
           utf8_char_ptr = utf8_char;
           /* i is width of UTF-8 character */
           degrade_utf8 (&utf8_char_ptr, &i);
+         /* If we are done, make sure iconv flushes the last character.  */
+         if (bytes_left <= 0)
+           {
+             utf8_char_ptr = utf8_char;
+             i = 4;
+             iconv (iconv_to_utf8, NULL, NULL,
+                    &utf8_char_ptr, &utf8_char_free);
+             if (utf8_char_ptr > utf8_char)
+               {
+                 utf8_char_ptr = utf8_char;
+                 degrade_utf8 (&utf8_char_ptr, &i);
+               }
+           }
         }
     }
 
@@ -1925,7 +1953,7 @@
 /* Run iconv using text buffer as output buffer. */
 size_t
 text_buffer_iconv (struct text_buffer *buf, iconv_t iconv_state,
-                   char **inbuf, size_t *inbytesleft)
+                   ICONV_CONST char **inbuf, size_t *inbytesleft)
 {
   size_t out_bytes_left;
   char *outptr;
@@ -1933,7 +1961,7 @@
 
   outptr = text_buffer_base (buf) + text_buffer_off (buf);
   out_bytes_left = text_buffer_space_left (buf);
-  iconv_ret = iconv (iconv_to_output, inbuf, inbytesleft,
+  iconv_ret = iconv (iconv_state, inbuf, inbytesleft,
                      &outptr, &out_bytes_left);
 
   text_buffer_off (buf) = outptr - text_buffer_base (buf);    

Modified: trunk/info/info-utils.h
===================================================================
--- trunk/info/info-utils.h     2015-12-23 15:08:30 UTC (rev 6891)
+++ trunk/info/info-utils.h     2015-12-25 14:40:58 UTC (rev 6892)
@@ -123,7 +123,7 @@
 size_t text_buffer_space_left (struct text_buffer *buf);
 #if HAVE_ICONV
 size_t text_buffer_iconv (struct text_buffer *buf, iconv_t iconv_state,
-                          char **inbuf, size_t *inbytesleft);
+                          ICONV_CONST char **inbuf, size_t *inbytesleft);
 #endif
 size_t text_buffer_add_string (struct text_buffer *buf, const char *str,
                               size_t len);




reply via email to

[Prev in Thread] Current Thread [Next in Thread]