[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Thu, 27 Jul 2023 17:18:10 -0400 (EDT) |
branch: master
commit 2dbf62ceaaabd243c9cf3dad9af90050aab6b76e
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Thu Jul 27 23:16:19 2023 +0200
* tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_piece)
(parse_texi_text, parse_texi_line),
tp/Texinfo/XS/parsetexi/Parsetexi.xs (parse_piece, parse_string)
(parse_text): encode strings passed to the C parser to UTF-8
explicitly to make those strings byte strings. In the C parser, use
the macro corresponding to bytes to get the strings.
---
ChangeLog | 9 +++++++++
tp/Texinfo/XS/parsetexi/Parsetexi.pm | 22 ++++++++++------------
tp/Texinfo/XS/parsetexi/Parsetexi.xs | 6 +++---
tp/Texinfo/XS/parsetexi/labels.c | 2 +-
4 files changed, 23 insertions(+), 16 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 1b7b893357..767f57dbfc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2023-07-27 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_piece)
+ (parse_texi_text, parse_texi_line),
+ tp/Texinfo/XS/parsetexi/Parsetexi.xs (parse_piece, parse_string)
+ (parse_text): encode strings passed to the C parser to UTF-8
+ explicitly to make those strings byte strings. In the C parser, use
+ the macro corresponding to bytes to get the strings.
+
2023-07-27 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/XS/parsetexi/api.c (store_additional_info)
diff --git a/tp/Texinfo/XS/parsetexi/Parsetexi.pm
b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
index 63f9dad186..5677c0cdd3 100644
--- a/tp/Texinfo/XS/parsetexi/Parsetexi.pm
+++ b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
@@ -337,11 +337,9 @@ sub parse_texi_piece($$;$)
$self = parser() if (!defined($self));
- # make sure that internal byte buffer is in UTF-8 before we pass
- # it in to the XS code.
- utf8::upgrade($text);
-
- parse_piece($text, $line_nr);
+ # pass a binary UTF-8 encoded string to C code
+ my $utf8_bytes = Encode::encode('utf-8', $text);
+ parse_piece($utf8_bytes, $line_nr);
my $tree = build_texinfo_tree ();
get_parser_info($self);
@@ -361,11 +359,9 @@ sub parse_texi_text($$;$)
$self = parser() if (!defined($self));
- # make sure that internal byte buffer is in UTF-8 before we pass
- # it in to the XS code.
- utf8::upgrade($text);
-
- parse_text($text, $line_nr);
+ # pass a binary UTF-8 encoded string to C code
+ my $utf8_bytes = Encode::encode('utf-8', $text);
+ parse_text($utf8_bytes, $line_nr);
my $tree = build_texinfo_tree ();
get_parser_info($self);
@@ -387,8 +383,10 @@ sub parse_texi_line($$;$)
$line_nr = 1 if (not defined($line_nr));
$self = parser() if (!defined($self));
- utf8::upgrade($text);
- parse_string($text, $line_nr);
+
+ # pass a binary UTF-8 encoded string to C code
+ my $utf8_bytes = Encode::encode('utf-8', $text);
+ parse_string($utf8_bytes, $line_nr);
my $tree = build_texinfo_tree ();
_set_errors_node_lists_labels_indices($self);
diff --git a/tp/Texinfo/XS/parsetexi/Parsetexi.xs
b/tp/Texinfo/XS/parsetexi/Parsetexi.xs
index a02649c573..58a00def3b 100644
--- a/tp/Texinfo/XS/parsetexi/Parsetexi.xs
+++ b/tp/Texinfo/XS/parsetexi/Parsetexi.xs
@@ -53,17 +53,17 @@ parse_file(filename)
void
parse_piece(string, line_nr)
- char * string
+ char *string = (char *)SvPVbyte_nolen($arg);
int line_nr
void
parse_string(string, line_nr)
- char * string
+ char *string = (char *)SvPVbyte_nolen($arg);
int line_nr
void
parse_text(string, line_nr)
- char * string
+ char *string = (char *)SvPVbyte_nolen($arg);
int line_nr
void
diff --git a/tp/Texinfo/XS/parsetexi/labels.c b/tp/Texinfo/XS/parsetexi/labels.c
index bda9965b6e..3c9eaa81f2 100644
--- a/tp/Texinfo/XS/parsetexi/labels.c
+++ b/tp/Texinfo/XS/parsetexi/labels.c
@@ -91,7 +91,7 @@ check_register_target_element_label (ELEMENT *label_element,
@anchor first argument, float second argument) mainly to check that
the syntax for an external node is not used. In that case modify_node
is set to 0 and the node is not modified, and added elements are
- collected in a thirs field of the returned object,
+ collected in a third field of the returned object,
out_of_tree_elements - elements collected in manual_content or
node_content and not in the node
*/