[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
branch master updated: Decode input data to perl internal and encode mes
From: |
Patrice Dumas |
Subject: |
branch master updated: Decode input data to perl internal and encode messages |
Date: |
Sun, 20 Feb 2022 16:22:00 -0500 |
This is an automated email from the git hooks/post-receive script.
pertusus pushed a commit to branch master
in repository texinfo.
The following commit(s) were added to refs/heads/master by this push:
new fd69ad4620 Decode input data to perl internal and encode messages
fd69ad4620 is described below
commit fd69ad4620dc4b7aac4aec5022a8f531b92f7d2c
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sun Feb 20 22:21:48 2022 +0100
Decode input data to perl internal and encode messages
* tp/Texinfo/Common.pm (@variable_string_settables),
tp/Texinfo/Config.pm (_GNUT_encode_message, _GNUT_document_warn),
tp/texi2any.pl ($main_program_set_options, _decode_i18n_string)
(_encode_message, document_warn, handle_errors, _decode_input)
(Getopt::Long::GetOptions): add DATA_INPUT_ENCODING_NAME and
MESSAGE_OUTPUT_ENCODING_NAME set in the default case to the
locale encoding, to decode from environment, command line and
translated messages and encode messages. Add FILE_NAMES_ENCODING_NAME
for file names encoding but do not use it yet.
---
ChangeLog | 14 +++++++
tp/Texinfo/Common.pm | 3 ++
tp/Texinfo/Config.pm | 20 ++++++++-
tp/texi2any.pl | 114 ++++++++++++++++++++++++++++++++++++++++-----------
4 files changed, 126 insertions(+), 25 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index e51f333601..30dd2ed88b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2022-02-20 Patrice Dumas <pertusus@free.fr>
+
+ Decode input data to perl internal and encode messages
+
+ * tp/Texinfo/Common.pm (@variable_string_settables),
+ tp/Texinfo/Config.pm (_GNUT_encode_message, _GNUT_document_warn),
+ tp/texi2any.pl ($main_program_set_options, _decode_i18n_string)
+ (_encode_message, document_warn, handle_errors, _decode_input)
+ (Getopt::Long::GetOptions): add DATA_INPUT_ENCODING_NAME and
+ MESSAGE_OUTPUT_ENCODING_NAME set in the default case to the
+ locale encoding, to decode from environment, command line and
+ translated messages and encode messages. Add FILE_NAMES_ENCODING_NAME
+ for file names encoding but do not use it yet.
+
2022-02-20 Gavin Smith <gavinsmith0123@gmail.com>
@include file name encoding
diff --git a/tp/Texinfo/Common.pm b/tp/Texinfo/Common.pm
index 00d15aa693..5df9685190 100644
--- a/tp/Texinfo/Common.pm
+++ b/tp/Texinfo/Common.pm
@@ -286,6 +286,7 @@ my @variable_string_settables = (
'EXTERNAL_CROSSREF_SPLIT',
'EXTERNAL_DIR',
'EXTRA_HEAD',
+'FILE_NAMES_ENCODING_NAME',
'FOOTNOTE_END_HEADER_LEVEL',
'FOOTNOTE_SEPARATE_HEADER_LEVEL',
'FRAMES',
@@ -311,11 +312,13 @@ my @variable_string_settables = (
'L2H_L2H',
'L2H_SKIP',
'L2H_TMP',
+'DATA_INPUT_ENCODING_NAME',
'MATHJAX_SCRIPT',
'MATHJAX_SOURCE',
'MAX_HEADER_LEVEL',
'MENU_ENTRY_COLON',
'MENU_SYMBOL',
+'MESSAGE_OUTPUT_ENCODING_NAME',
'MONOLITHIC',
'NO_CSS',
'NO_NUMBER_FOOTNOTE_SYMBOL',
diff --git a/tp/Texinfo/Config.pm b/tp/Texinfo/Config.pm
index 97b1a4c89e..a2573aa2f9 100644
--- a/tp/Texinfo/Config.pm
+++ b/tp/Texinfo/Config.pm
@@ -36,6 +36,9 @@ use Carp;
# for __( and p__( and some functions
use Texinfo::Common;
+# for Encode::encode
+use Encode;
+
# for error messages, passed from main program through initialization
# function.
@@ -80,13 +83,26 @@ sub GNUT_initialize_config($$$) {
return $init_files_options;
}
+# duplicated from texi2any.pl
+sub _GNUT_encode_message($)
+{
+ my $text = shift;
+ my $encoding = texinfo_get_conf('MESSAGE_OUTPUT_ENCODING_NAME');
+ if (defined($encoding)) {
+ return Encode::encode($encoding, $text);
+ } else {
+ return $text;
+ }
+}
+
# duplicated from texi2any.pl
sub _GNUT_document_warn($) {
return if (texinfo_get_conf('NO_WARN'));
my $text = shift;
chomp ($text);
- warn(sprintf(__p("program name: warning: warning_message",
- "%s: warning: %s\n"), $real_command_name, $text));
+ warn(_GNUT_encode_message(
+ sprintf(__p("program name: warning: warning_message",
+ "%s: warning: %s\n"), $real_command_name, $text)));
}
# called from texi2any.pl main program.
diff --git a/tp/texi2any.pl b/tp/texi2any.pl
index c7262bb47e..f828772a5f 100755
--- a/tp/texi2any.pl
+++ b/tp/texi2any.pl
@@ -25,6 +25,10 @@ require 5.00405;
use strict;
+# to determine the locale encoding
+use I18N::Langinfo qw(langinfo CODESET);
+# to decode command line arguments
+use Encode;
# for file names portability
use File::Spec;
# to determine the path separator and null file
@@ -273,6 +277,15 @@ if ($texinfo_dtd_version eq '@' . 'TEXINFO_DTD_VERSION@') {
}
}
}
+
+# the encoding used to decode command line arguments, and also for
+# file names encoding, perl is expecting sequences of bytes, not unicode
+# code points.
+my $locale_encoding = langinfo(CODESET);
+$locale_encoding = undef if ($locale_encoding eq '');
+my $file_name_encoding = $locale_encoding;
+$file_name_encoding = 'utf-8' if (not defined($file_name_encoding));
+
# Used in case it is not hardcoded in configure and for standalone perl module
$texinfo_dtd_version = $configured_version
if (!defined($texinfo_dtd_version));
@@ -286,6 +299,9 @@ my $main_program_set_options = {
'PACKAGE_URL' => $configured_url,
'PROGRAM' => $real_command_name,
'TEXINFO_DTD_VERSION' => $texinfo_dtd_version,
+ 'DATA_INPUT_ENCODING_NAME' => $locale_encoding,
+ 'MESSAGE_OUTPUT_ENCODING_NAME' => $locale_encoding,
+ 'FILE_NAMES_ENCODING_NAME' => $file_name_encoding,
};
# defaults for options relevant in the main program. Also used as
@@ -326,6 +342,47 @@ foreach my $texinfo_config_dir (@language_config_dirs) {
push @program_init_dirs, File::Spec->catdir($texinfo_config_dir, 'init');
}
+sub _decode_i18n_string($$)
+{
+ my $string = shift;
+ my $encoding = shift;
+ return Encode::decode($encoding, $string);
+}
+
+# FIXME should we reset the messages encoding if 'DATA_INPUT_ENCODING_NAME'
+# is reset?
+my $messages_encoding = get_conf('DATA_INPUT_ENCODING_NAME');
+if (defined($messages_encoding) and $messages_encoding ne 'us-ascii') {
+ my $Encode_encoding_object = find_encoding($messages_encoding);
+ my $perl_messages_encoding = $Encode_encoding_object->name();
+ Locale::Messages::bind_textdomain_codeset($messages_textdomain,
+ $messages_encoding);
+ if ($perl_messages_encoding) {
+ Locale::Messages::bind_textdomain_filter($messages_textdomain,
+ \&_decode_i18n_string, $perl_messages_encoding);
+ }
+}
+
+sub _encode_message($)
+{
+ my $text = shift;
+ my $encoding = get_conf('MESSAGE_OUTPUT_ENCODING_NAME');
+ if (defined($encoding)) {
+ return Encode::encode($encoding, $text);
+ } else {
+ return $text;
+ }
+}
+
+sub document_warn($) {
+ return if (get_conf('NO_WARN'));
+ my $text = shift;
+ chomp ($text);
+ warn(_encode_message(
+ sprintf(__p("program name: warning: warning_message",
+ "%s: warning: %s\n"), $real_command_name, $text)));
+}
+
sub locate_and_load_init_file($$)
{
my $filename = shift;
@@ -556,14 +613,6 @@ sub set_format($;$$)
return $new_format;
}
-sub document_warn($) {
- return if (get_conf('NO_WARN'));
- my $text = shift;
- chomp ($text);
- warn(sprintf(__p("program name: warning: warning_message",
- "%s: warning: %s\n"), $real_command_name, $text));
-}
-
sub _exit($$)
{
my $error_count = shift;
@@ -588,8 +637,8 @@ sub handle_errors($$$)
my ($errors, $new_error_count) = $self->errors();
$error_count += $new_error_count if ($new_error_count);
foreach my $error_message (@$errors) {
- warn $error_message->{'error_line'} if ($error_message->{'type'} eq 'error'
- or !get_conf('NO_WARN'));
+ warn _encode_message($error_message->{'error_line'})
+ if ($error_message->{'type'} eq 'error' or !get_conf('NO_WARN'));
}
_exit($error_count, $opened_files);
@@ -610,6 +659,18 @@ sub _get_converter_default($)
return undef;
}
+sub _decode_input($)
+{
+ my $text = shift;
+
+ my $encoding = get_conf('DATA_INPUT_ENCODING_NAME');
+ if (defined($encoding)) {
+ return Encode::decode($encoding, $text);
+ } else {
+ return $text;
+ }
+}
+
# translation related todo to be done when the string change anyway to
# avoid requiring translation
sub makeinfo_help()
@@ -778,12 +839,13 @@ Texinfo home page: http://www.gnu.org/software/texinfo/")
."\n";
my $Xopt_arg_nr = 0;
my $result_options = Getopt::Long::GetOptions (
- 'help|h' => sub { print makeinfo_help(); exit 0; },
- 'version|V' => sub {print "$program_name (GNU texinfo)
$configured_version\n\n";
- printf __("Copyright (C) %s Free Software Foundation, Inc.
+ 'help|h' => sub { print _encode_message(makeinfo_help()); exit 0; },
+ 'version|V' => sub {
+ print _encode_message("$program_name (GNU texinfo)
$configured_version\n\n");
+ print _encode_message(sprintf __("Copyright (C) %s Free Software
Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
-There is NO WARRANTY, to the extent permitted by law.\n"), "2021";
+There is NO WARRANTY, to the extent permitted by law.\n"), "2021");
exit 0;},
'macro-expand|E=s' => sub { set_from_cmdline('MACRO_EXPAND', $_[1]); },
'ifhtml!' => sub { set_expansion('html', $_[1]); },
@@ -803,7 +865,9 @@ There is NO WARRANTY, to the extent permitted by law.\n"),
"2021";
if ($_[1] eq 'end' or $_[1] eq 'separate') {
set_from_cmdline('footnotestyle', $_[1]);
} else {
- die sprintf(__("%s: --footnote-style arg must be `separate' or `end',
not `%s'.\n"), $real_command_name, $_[1]);
+ # FIXME decode/encode?
+ die sprintf(__("%s: --footnote-style arg must be `separate' or `end',
not `%s'.\n"),
+ $real_command_name, $_[1]);
}
},
'split=s' => sub { my $split = $_[1];
@@ -830,11 +894,12 @@ There is NO WARRANTY, to the extent permitted by
law.\n"), "2021";
$format = 'plaintext' if (!$_[1] and $format eq 'info');
},
'output|out|o=s' => sub {
my $var = 'OUTFILE';
+ # do not decode before calling -d as -d expects bytes
if ($_[1] =~ m:/$: or -d $_[1]) {
set_from_cmdline($var, undef);
$var = 'SUBDIR';
}
- set_from_cmdline($var, $_[1]);
+ set_from_cmdline($var, _decode_input($_[1]));
push @texi2dvi_args, '-o', $_[1];
},
'no-validate|no-pointer-validate' => sub {
@@ -844,9 +909,10 @@ There is NO WARRANTY, to the extent permitted by law.\n"),
"2021";
'verbose|v!' => sub {set_from_cmdline('VERBOSE', $_[1]);
push @texi2dvi_args, '--verbose'; },
'document-language=s' => sub {
- set_from_cmdline('documentlanguage', $_[1]);
+ my $documentlanguage = _decode_input($_[1]);
+ set_from_cmdline('documentlanguage', $documentlanguage);
my @messages
- = Texinfo::Common::warn_unknown_language($_[1]);
+ =
Texinfo::Common::warn_unknown_language($documentlanguage);
foreach my $message (@messages) {
document_warn($message);
}
@@ -855,22 +921,23 @@ There is NO WARRANTY, to the extent permitted by
law.\n"), "2021";
my $var = $_[1];
my @field = split (/\s+/, $var, 2);
if (@field == 1) {
- $parser_options->{'values'}->{$var} = 1;
+ $parser_options->{'values'}->{_decode_input($var)} = 1;
push @texi2dvi_args, "--command=\@set $var 1";
} else {
- $parser_options->{'values'}->{$field[0]} = $field[1];
+ $parser_options->{'values'}->{_decode_input($field[0])}
+ = _decode_input($field[1]);
push @texi2dvi_args, "--command=\@set $field[0] $field[1]";
}
},
'U=s' => sub {
- delete $parser_options->{'values'}->{$_[1]};
+ delete $parser_options->{'values'}->{_decode_input($_[1])};
push @texi2dvi_args, "--command=\@clear $_[1]";
},
'init-file=s' => sub {
locate_and_load_init_file($_[1], [ @conf_dirs, @program_init_dirs ]);
},
'set-customization-variable|c=s' => sub {
- my $var_val = $_[1];
+ my $var_val = _decode_input($_[1]);
if ($var_val =~ s/^(\w+)\s*=?\s*//) {
my $var = $1;
my $value = $var_val;
@@ -1000,7 +1067,8 @@ if (defined($init_file_format)) {
if (defined($ENV{'TEXINFO_OUTPUT_FORMAT'})
and $ENV{'TEXINFO_OUTPUT_FORMAT'} ne '') {
- $format = set_format($ENV{'TEXINFO_OUTPUT_FORMAT'}, $format, 1);
+ $format = set_format(_decode_input($ENV{'TEXINFO_OUTPUT_FORMAT'}),
+ $format, 1);
}
if ($call_texi2dvi) {
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- branch master updated: Decode input data to perl internal and encode messages,
Patrice Dumas <=