texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Patrice Dumas
Date: Thu, 27 Jul 2023 17:18:10 -0400 (EDT)

branch: master
commit 2dbf62ceaaabd243c9cf3dad9af90050aab6b76e
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Thu Jul 27 23:16:19 2023 +0200

    * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_piece)
    (parse_texi_text, parse_texi_line),
    tp/Texinfo/XS/parsetexi/Parsetexi.xs (parse_piece, parse_string)
    (parse_text): encode strings passed to the C parser to UTF-8
    explicitly to make those strings byte strings.  In the C parser, use
    the macro corresponding to bytes to get the strings.
---
 ChangeLog                            |  9 +++++++++
 tp/Texinfo/XS/parsetexi/Parsetexi.pm | 22 ++++++++++------------
 tp/Texinfo/XS/parsetexi/Parsetexi.xs |  6 +++---
 tp/Texinfo/XS/parsetexi/labels.c     |  2 +-
 4 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 1b7b893357..767f57dbfc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2023-07-27  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_piece)
+       (parse_texi_text, parse_texi_line),
+       tp/Texinfo/XS/parsetexi/Parsetexi.xs (parse_piece, parse_string)
+       (parse_text): encode strings passed to the C parser to UTF-8
+       explicitly to make those strings byte strings.  In the C parser, use
+       the macro corresponding to bytes to get the strings.
+
 2023-07-27  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/XS/parsetexi/api.c (store_additional_info)
diff --git a/tp/Texinfo/XS/parsetexi/Parsetexi.pm 
b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
index 63f9dad186..5677c0cdd3 100644
--- a/tp/Texinfo/XS/parsetexi/Parsetexi.pm
+++ b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
@@ -337,11 +337,9 @@ sub parse_texi_piece($$;$)
 
   $self = parser() if (!defined($self));
 
-  # make sure that internal byte buffer is in UTF-8 before we pass
-  # it in to the XS code.
-  utf8::upgrade($text);
-
-  parse_piece($text, $line_nr);
+  # pass a binary UTF-8 encoded string to C code
+  my $utf8_bytes = Encode::encode('utf-8', $text);
+  parse_piece($utf8_bytes, $line_nr);
   my $tree = build_texinfo_tree ();
 
   get_parser_info($self);
@@ -361,11 +359,9 @@ sub parse_texi_text($$;$)
 
   $self = parser() if (!defined($self));
 
-  # make sure that internal byte buffer is in UTF-8 before we pass
-  # it in to the XS code.
-  utf8::upgrade($text);
-
-  parse_text($text, $line_nr);
+  # pass a binary UTF-8 encoded string to C code
+  my $utf8_bytes = Encode::encode('utf-8', $text);
+  parse_text($utf8_bytes, $line_nr);
   my $tree = build_texinfo_tree ();
 
   get_parser_info($self);
@@ -387,8 +383,10 @@ sub parse_texi_line($$;$)
   $line_nr = 1 if (not defined($line_nr));
 
   $self = parser() if (!defined($self));
-  utf8::upgrade($text);
-  parse_string($text, $line_nr);
+
+  # pass a binary UTF-8 encoded string to C code
+  my $utf8_bytes = Encode::encode('utf-8', $text);
+  parse_string($utf8_bytes, $line_nr);
   my $tree = build_texinfo_tree ();
 
   _set_errors_node_lists_labels_indices($self);
diff --git a/tp/Texinfo/XS/parsetexi/Parsetexi.xs 
b/tp/Texinfo/XS/parsetexi/Parsetexi.xs
index a02649c573..58a00def3b 100644
--- a/tp/Texinfo/XS/parsetexi/Parsetexi.xs
+++ b/tp/Texinfo/XS/parsetexi/Parsetexi.xs
@@ -53,17 +53,17 @@ parse_file(filename)
 
 void
 parse_piece(string, line_nr)
-        char * string
+        char *string = (char *)SvPVbyte_nolen($arg);
         int line_nr
 
 void
 parse_string(string, line_nr)
-        char * string
+        char *string = (char *)SvPVbyte_nolen($arg);
         int line_nr
 
 void
 parse_text(string, line_nr)
-        char * string
+        char *string = (char *)SvPVbyte_nolen($arg);
         int line_nr
 
 void
diff --git a/tp/Texinfo/XS/parsetexi/labels.c b/tp/Texinfo/XS/parsetexi/labels.c
index bda9965b6e..3c9eaa81f2 100644
--- a/tp/Texinfo/XS/parsetexi/labels.c
+++ b/tp/Texinfo/XS/parsetexi/labels.c
@@ -91,7 +91,7 @@ check_register_target_element_label (ELEMENT *label_element,
    @anchor first argument, float second argument) mainly to check that
    the syntax for an external node is not used.  In that case modify_node
    is set to 0 and the node is not modified, and added elements are
-   collected in a thirs field of the returned object,
+   collected in a third field of the returned object,
      out_of_tree_elements - elements collected in manual_content or
                             node_content and not in the node
  */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]