[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
branch master updated: Decode inputs for encoded characters
From: |
Patrice Dumas |
Subject: |
branch master updated: Decode inputs for encoded characters |
Date: |
Tue, 22 Feb 2022 16:36:56 -0500 |
This is an automated email from the git hooks/post-receive script.
pertusus pushed a commit to branch master
in repository texinfo.
The following commit(s) were added to refs/heads/master by this push:
new 1837347ed7 Decode inputs for encoded characters
1837347ed7 is described below
commit 1837347ed7398115e9defc0ee439c946e87866e7
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Tue Feb 22 22:36:38 2022 +0100
Decode inputs for encoded characters
* doc/texinfo.texi (HTML CSS), tp/Texinfo/Convert/HTML.pm
(_process_css_file): process @charset to get the encoding, and use
utf-8 as default as described.
* tp/texi2any.pl: decode more input from command line that are
combined with messages or strings from document.
Import Encode symbols explicitely.
---
ChangeLog | 12 ++
doc/texinfo.texi | 14 +-
tp/Texinfo/Common.pm | 1 +
tp/Texinfo/Convert/HTML.pm | 23 +++
tp/Texinfo/ParserNonXS.pm | 2 +-
tp/tests/formatting/Makefile.am | 3 +-
"tp/tests/formatting/c\303\252ss.css" | 4 +
tp/tests/formatting/list-of-tests | 8 +
"tp/tests/formatting/os\303\251.texi" | 15 ++
.../non_ascii_command_line/Chapteur.html | 72 +++++++++
.../res_parser/non_ascii_command_line/index.html | 71 +++++++++
.../non_ascii_command_line/int\303\251rnal.txt" | 2 +
.../os\303\251-texinfo.texi" | 13 ++
.../non_ascii_command_line/os\303\251.1" | 0
.../non_ascii_command_line/os\303\251.2" | 3 +
.../non_ascii_command_line/os\303\251_abt.html" | 164 +++++++++++++++++++++
"tp/tests/formatting/\303\247ss.css" | 10 ++
tp/tests/run_parser_all.sh | 8 +-
.../formatting_non_ascii_command_line.sh | 19 +++
tp/texi2any.pl | 39 ++---
20 files changed, 455 insertions(+), 28 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 05fd7fea35..eed1307152 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2022-02-22 Patrice Dumas <pertusus@free.fr>
+
+ Decode inputs for encoded characters
+
+ * doc/texinfo.texi (HTML CSS), tp/Texinfo/Convert/HTML.pm
+ (_process_css_file): process @charset to get the encoding, and use
+ utf-8 as default as described.
+
+ * tp/texi2any.pl: decode more input from command line that are
+ combined with messages or strings from document.
+ Import Encode symbols explicitely.
+
2022-02-22 Gavin Smith <gavinsmith0123@gmail.com>
Avoid double encoding error messages with XS parser
diff --git a/doc/texinfo.texi b/doc/texinfo.texi
index 5bf77482f1..d70e7ec36f 100644
--- a/doc/texinfo.texi
+++ b/doc/texinfo.texi
@@ -18369,6 +18369,11 @@ The option @option{--css-include=@var{file}} includes
the contents
details are somewhat tricky, as described in the following, to provide
maximum flexibility.
+@cindex @samp{@@charset} specification, in CSS files
+The CSS file first line may be a @samp{@@charset} directive. If present,
+this directive is used to determine the encoding of the CSS file. The
+line is not copied into the output.
+
@cindex @samp{@@import} specifications, in CSS files
The CSS file may begin with so-called @samp{@@import} directives,
which link to external CSS specifications for browsers to use when
@@ -18378,12 +18383,9 @@ explain how @command{makeinfo} handles them.
@cindex Comments, in CSS files
There can be more than one @samp{@@import}, but they have to come
-first in the file, with only whitespace and comments interspersed, no
-normal definitions. (Technical exception: a @samp{@@charset}
-directive may precede the @samp{@@import}'s. This does not alter
-@command{makeinfo}'s behavior, it just copies the @samp{@@charset} if
-present.) Comments in CSS files are delimited by @samp{/* ... */}, as
-in C@. An @samp{@@import} directive must be in one of these two forms:
+first in the file, with only whitespace and comments interspersed, no normal
+definitions. Comments in CSS files are delimited by @samp{/* ... */}, as in
+C@. An @samp{@@import} directive must be in one of these two forms:
@example
@@import url(http://example.org/foo.css);
diff --git a/tp/Texinfo/Common.pm b/tp/Texinfo/Common.pm
index 5df9685190..d3f69efd87 100644
--- a/tp/Texinfo/Common.pm
+++ b/tp/Texinfo/Common.pm
@@ -28,6 +28,7 @@ use 5.006;
# to determine the null file
use Config;
use File::Spec;
+# for find_encoding, resolve_alias and maybe utf8 related functions
use Encode;
use Texinfo::Documentlanguages;
diff --git a/tp/Texinfo/Convert/HTML.pm b/tp/Texinfo/Convert/HTML.pm
index e331a51e28..f29f3f832e 100644
--- a/tp/Texinfo/Convert/HTML.pm
+++ b/tp/Texinfo/Convert/HTML.pm
@@ -48,6 +48,8 @@ use File::Copy qw(copy);
use Storable;
+use Encode qw(find_encoding);
+
use Texinfo::Common;
use Texinfo::Config;
use Texinfo::Convert::Unicode;
@@ -7089,6 +7091,27 @@ sub _process_css_file($$$)
my $line_nr = 0;
while (my $line = <$fh>) {
$line_nr++;
+ if ($line_nr == 1) {
+ # the rule is to assume utf-8. There could also be a BOM, and
+ # the Content-Type: HTTP header but it is not relevant here.
+ # https://developer.mozilla.org/en-US/docs/Web/CSS/@charset
+ my $charset = 'utf-8';
+ my $charset_line;
+ if ($line =~ /^\@charset *"([^"]+)" *; *$/) {
+ $charset = $1;
+ $charset_line = 1;
+ }
+ my $Encode_encoding_object = find_encoding($charset);
+ if (defined($Encode_encoding_object)) {
+ my $input_perl_encoding = $Encode_encoding_object->name();
+ if ($input_perl_encoding eq 'utf-8') {
+ binmode($fh, ":utf8");
+ } else {
+ binmode($fh, ":encoding($input_perl_encoding)");
+ }
+ }
+ next if ($charset_line);
+ }
#print STDERR "Line: $line";
if ($in_rules) {
push @$rules, $line;
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index 0b8a3c1e0c..14fbc5cf14 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -37,7 +37,7 @@ use Carp qw(cluck);
use Data::Dumper;
# to detect if an encoding may be used to open the files
-use Encode;
+use Encode qw(find_encoding);
# for fileparse
use File::Basename;
diff --git a/tp/tests/formatting/Makefile.am b/tp/tests/formatting/Makefile.am
index 9c23f259db..ff32fec9d7 100644
--- a/tp/tests/formatting/Makefile.am
+++ b/tp/tests/formatting/Makefile.am
@@ -3,7 +3,8 @@ EXTRA_DIST = \
ignore_and_comments.texi split_nocopying.texi \
inc_file.texi test_need.texi \
lightweight_markups.texi japanese_long_name.texi \
- file.css list-of-tests res_parser
+ osé.texi \
+ çss.css cêss.css file.css list-of-tests res_parser
DISTCLEANFILES = tests.log tests.out
diff --git "a/tp/tests/formatting/c\303\252ss.css"
"b/tp/tests/formatting/c\303\252ss.css"
new file mode 100644
index 0000000000..e50300e009
--- /dev/null
+++ "b/tp/tests/formatting/c\303\252ss.css"
@@ -0,0 +1,4 @@
+@charset "iso-8859-15";
+
+ul.mark-euro {list-style-type: "�"}
+ul.mark-n�ni {list-style-type: "v��a"}
diff --git a/tp/tests/formatting/list-of-tests
b/tp/tests/formatting/list-of-tests
index 1812a91573..42b635c039 100644
--- a/tp/tests/formatting/list-of-tests
+++ b/tp/tests/formatting/list-of-tests
@@ -9,3 +9,11 @@ simplest_test_css simplest.texi --css-include file.css
# check that command line overrides document
documentlanguage_cmdline documentlanguage.texi --document-language=fr
+
+# some command-line arguments when incorrect cause texi2any to die.
+# easily tested by calling directly ./texi2any.pl and checking visually:
+# ./texi2any.pl --footnote-style=bâd
+# ./texi2any.pl --paragraph-indent=ïndent
+# check non ascii command line arguments
+non_ascii_command_line osé.texi --html --split=Mekanïk
--document-language=Destruktïw -c 'Kommandöh vâl' -D TÛT -D 'vùr ké' -U ôndef
-c 'FORMAT_MENU mînù' --macro-expand=@OUT_DIR@osé-texinfo.texi
--internal-links=@OUT_DIR@intérnal.txt --css-include çss.css --css-include
cêss.css --css-ref=rëf --css-ref=öref
+
diff --git "a/tp/tests/formatting/os\303\251.texi"
"b/tp/tests/formatting/os\303\251.texi"
new file mode 100644
index 0000000000..c273904f72
--- /dev/null
+++ "b/tp/tests/formatting/os\303\251.texi"
@@ -0,0 +1,15 @@
+\input texinfo.tex
+
+@setfilename osé.info
+
+@node Top
+@top Tôp
+
+@node Chaptêur
+@chapter Chapteùr
+
+@ifset TÛT
+isset TÛT
+@end ifset
+
+value vùr @value{vùr}.
diff --git
a/tp/tests/formatting/res_parser/non_ascii_command_line/Chapteur.html
b/tp/tests/formatting/res_parser/non_ascii_command_line/Chapteur.html
new file mode 100644
index 0000000000..78818eed59
--- /dev/null
+++ b/tp/tests/formatting/res_parser/non_ascii_command_line/Chapteur.html
@@ -0,0 +1,72 @@
+<!DOCTYPE html>
+<html>
+<!-- Created by texinfo, http://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>1 Chapteùr (Tôp)</title>
+
+<meta name="description" content="1 Chapteùr (Tôp)">
+<meta name="keywords" content="1 Chapteùr (Tôp)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="texi2any">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<style type="text/css">
+<!--
+/* a comment */
+@import "éfile2.css" tv
+;
+
+/* another comment
+@import
+*/
+
+@import ("strânge\" ;file") ;
+
+
+span.program-in-footer {font-size: smaller}
+@media tv { h3 {text-align: left} }
+ul.mark-euro {list-style-type: "€"}
+ul.mark-néni {list-style-type: "vàça"}
+
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="rëf">
+<link rel="stylesheet" type="text/css" href="öref">
+
+
+</head>
+
+<body lang="Destruktïw">
+<div class="chapter-level-extent" id="Chapt_00eaur">
+<table class="nav-panel" cellpadding="1" cellspacing="1" border="0">
+<tr><td valign="middle" align="left">[<a href="index.html" title="Beginning of
this chapter or previous chapter"> << </a>]</td>
+<td valign="middle" align="left">[<a href="index.html" title="Previous section
in reading order"> < </a>]</td>
+<td valign="middle" align="left">[<a href="index.html" title="Up section"> Up
</a>]</td>
+<td valign="middle" align="left">[ > ]</td>
+<td valign="middle" align="left">[ >> ]</td>
+<td valign="middle" align="left"> </td>
+<td valign="middle" align="left"> </td>
+<td valign="middle" align="left"> </td>
+<td valign="middle" align="left"> </td>
+<td valign="middle" align="left">[<a href="index.html" title="Cover (top) of
document">Top</a>]</td>
+<td valign="middle" align="left">[Contents]</td>
+<td valign="middle" align="left">[Index]</td>
+<td valign="middle" align="left">[<a href="osé_abt.html#SEC_About"
title="About (help)"> ? </a>]</td>
+</tr></table>
+<hr>
+<h1 class="chapter" id="Chapteur">1 Chapteùr</h1>
+
+<p>isset TÛT
+</p>
+<p>value vùr ké.
+</p></div>
+<hr>
+<p>
+ <span class="program-in-footer">This document was generated on <em
class="emph">a sunny day</em> using <a class="uref"
href="http://www.gnu.org/software/texinfo/"><em
class="emph">texi2any</em></a>.</span>
+</p>
+
+
+</body>
+</html>
diff --git a/tp/tests/formatting/res_parser/non_ascii_command_line/index.html
b/tp/tests/formatting/res_parser/non_ascii_command_line/index.html
new file mode 100644
index 0000000000..d6a7955d49
--- /dev/null
+++ b/tp/tests/formatting/res_parser/non_ascii_command_line/index.html
@@ -0,0 +1,71 @@
+<!DOCTYPE html>
+<html>
+<!-- Created by texinfo, http://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>Tôp</title>
+
+<meta name="description" content="Tôp">
+<meta name="keywords" content="Tôp">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="texi2any">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<style type="text/css">
+<!--
+/* a comment */
+@import "éfile2.css" tv
+;
+
+/* another comment
+@import
+*/
+
+@import ("strânge\" ;file") ;
+
+
+span.program-in-footer {font-size: smaller}
+@media tv { h3 {text-align: left} }
+ul.mark-euro {list-style-type: "€"}
+ul.mark-néni {list-style-type: "vàça"}
+
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="rëf">
+<link rel="stylesheet" type="text/css" href="öref">
+
+
+</head>
+
+<body lang="Destruktïw">
+
+<div class="top-level-extent" id="Top">
+<table class="nav-panel" cellpadding="1" cellspacing="1" border="0">
+<tr><td valign="middle" align="left">[ < ]</td>
+<td valign="middle" align="left">[<a href="Chapteur.html" title="Next section
in reading order"> > </a>]</td>
+<td valign="middle" align="left"> </td>
+<td valign="middle" align="left">[Contents]</td>
+<td valign="middle" align="left">[Index]</td>
+<td valign="middle" align="left">[<a href="osé_abt.html#SEC_About"
title="About (help)"> ? </a>]</td>
+</tr></table>
+<hr>
+<h1 class="top" id="Top-1">Tôp</h1>
+
+</div>
+<hr>
+<table class="nav-panel" cellpadding="1" cellspacing="1" border="0">
+<tr><td valign="middle" align="left">[ < ]</td>
+<td valign="middle" align="left">[<a href="Chapteur.html" title="Next section
in reading order"> > </a>]</td>
+<td valign="middle" align="left"> </td>
+<td valign="middle" align="left">[Contents]</td>
+<td valign="middle" align="left">[Index]</td>
+<td valign="middle" align="left">[<a href="osé_abt.html#SEC_About"
title="About (help)"> ? </a>]</td>
+</tr></table>
+<p>
+ <span class="program-in-footer">This document was generated on <em
class="emph">a sunny day</em> using <a class="uref"
href="http://www.gnu.org/software/texinfo/"><em
class="emph">texi2any</em></a>.</span>
+</p>
+
+
+</body>
+</html>
diff --git
"a/tp/tests/formatting/res_parser/non_ascii_command_line/int\303\251rnal.txt"
"b/tp/tests/formatting/res_parser/non_ascii_command_line/int\303\251rnal.txt"
new file mode 100644
index 0000000000..9b0c060cc5
--- /dev/null
+++
"b/tp/tests/formatting/res_parser/non_ascii_command_line/int\303\251rnal.txt"
@@ -0,0 +1,2 @@
+index.html toc Tôp
+Chapteur.html toc 1 Chapteùr
diff --git
"a/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251-texinfo.texi"
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251-texinfo.texi"
new file mode 100644
index 0000000000..587bad7166
--- /dev/null
+++
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251-texinfo.texi"
@@ -0,0 +1,13 @@
+\input texinfo.tex
+
+@setfilename osé.info
+
+@node Top
+@top Tôp
+
+@node Chaptêur
+@chapter Chapteùr
+
+isset TÛT
+
+value vùr ké.
diff --git
"a/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251.1"
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251.1"
new file mode 100644
index 0000000000..e69de29bb2
diff --git
"a/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251.2"
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251.2"
new file mode 100644
index 0000000000..3d62935143
--- /dev/null
+++ "b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251.2"
@@ -0,0 +1,3 @@
+texi2any: warning: Mekanïk is not a valid split possibility
+texi2any: warning: Destruktïw is not a valid language code
+texi2any: warning: unknown variable from command line: Kommandöh
diff --git
"a/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251_abt.html"
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251_abt.html"
new file mode 100644
index 0000000000..8c1656506c
--- /dev/null
+++
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251_abt.html"
@@ -0,0 +1,164 @@
+<!DOCTYPE html>
+<html>
+<!-- Created by texinfo, http://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>About This Document (Tôp)</title>
+
+<meta name="description" content="About This Document (Tôp)">
+<meta name="keywords" content="About This Document (Tôp)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="texi2any">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<style type="text/css">
+<!--
+/* a comment */
+@import "éfile2.css" tv
+;
+
+/* another comment
+@import
+*/
+
+@import ("strânge\" ;file") ;
+
+
+span.program-in-footer {font-size: smaller}
+@media tv { h3 {text-align: left} }
+ul.mark-euro {list-style-type: "€"}
+ul.mark-néni {list-style-type: "vàça"}
+
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="rëf">
+<link rel="stylesheet" type="text/css" href="öref">
+
+
+</head>
+
+<body lang="Destruktïw">
+<div class="element-about" id="SEC_About">
+<table class="nav-panel" cellpadding="1" cellspacing="1" border="0">
+<tr><td valign="middle" align="left">[<a href="index.html" title="Cover (top)
of document">Top</a>]</td>
+<td valign="middle" align="left">[Contents]</td>
+<td valign="middle" align="left">[Index]</td>
+<td valign="middle" align="left">[<a href="#SEC_About" title="About (help)"> ?
</a>]</td>
+</tr></table>
+<hr>
+<h1 class="about-heading">About This Document</h1>
+
+<p>
+ This document was generated on <em class="emph">a sunny day</em> using <a
class="uref" href="http://www.gnu.org/software/texinfo/"><em
class="emph">texi2any</em></a>.
+</p>
+<p>
+ The buttons in the navigation panels have the following meaning:
+</p>
+<table border="1">
+ <tr>
+ <th> Button </th>
+ <th> Name </th>
+ <th> Go to </th>
+ <th> From 1.2.3 go to</th>
+ </tr>
+ <tr>
+ <td align="center"> [ << ] </td>
+ <td align="center">FastBack</td>
+ <td>Beginning of this chapter or previous chapter</td>
+ <td>1</td>
+ </tr>
+ <tr>
+ <td align="center"> [ < ] </td>
+ <td align="center">Back</td>
+ <td>Previous section in reading order</td>
+ <td>1.2.2</td>
+ </tr>
+ <tr>
+ <td align="center"> [ Up ] </td>
+ <td align="center">Up</td>
+ <td>Up section</td>
+ <td>1.2</td>
+ </tr>
+ <tr>
+ <td align="center"> [ > ] </td>
+ <td align="center">Forward</td>
+ <td>Next section in reading order</td>
+ <td>1.2.4</td>
+ </tr>
+ <tr>
+ <td align="center"> [ >> ] </td>
+ <td align="center">FastForward</td>
+ <td>Next chapter</td>
+ <td>2</td>
+ </tr>
+ <tr>
+ <td align="center"> [Top] </td>
+ <td align="center">Top</td>
+ <td>Cover (top) of document</td>
+ <td> </td>
+ </tr>
+ <tr>
+ <td align="center"> [Contents] </td>
+ <td align="center">Contents</td>
+ <td>Table of contents</td>
+ <td> </td>
+ </tr>
+ <tr>
+ <td align="center"> [Index] </td>
+ <td align="center">Index</td>
+ <td>Index</td>
+ <td> </td>
+ </tr>
+ <tr>
+ <td align="center"> [ ? ] </td>
+ <td align="center">About</td>
+ <td>About (help)</td>
+ <td> </td>
+ </tr>
+</table>
+
+<p>
+ where the <strong class="strong"> Example </strong> assumes that the current
position is at <strong class="strong"> Subsubsection One-Two-Three </strong> of
a document of the following structure:
+</p>
+
+<ul>
+ <li> 1. Section One
+ <ul>
+ <li>1.1 Subsection One-One
+ <ul>
+ <li>...</li>
+ </ul>
+ </li>
+ <li>1.2 Subsection One-Two
+ <ul>
+ <li>1.2.1 Subsubsection One-Two-One</li>
+ <li>1.2.2 Subsubsection One-Two-Two</li>
+ <li>1.2.3 Subsubsection One-Two-Three
+ <strong><== Current Position </strong></li>
+ <li>1.2.4 Subsubsection One-Two-Four</li>
+ </ul>
+ </li>
+ <li>1.3 Subsection One-Three
+ <ul>
+ <li>...</li>
+ </ul>
+ </li>
+ <li>1.4 Subsection One-Four</li>
+ </ul>
+ </li>
+</ul>
+</div><hr>
+<table class="nav-panel" cellpadding="1" cellspacing="1" border="0">
+<tr><td valign="middle" align="left">[<a href="index.html" title="Cover (top)
of document">Top</a>]</td>
+<td valign="middle" align="left">[Contents]</td>
+<td valign="middle" align="left">[Index]</td>
+<td valign="middle" align="left">[<a href="#SEC_About" title="About (help)"> ?
</a>]</td>
+</tr></table>
+<p>
+ <span class="program-in-footer">This document was generated on <em
class="emph">a sunny day</em> using <a class="uref"
href="http://www.gnu.org/software/texinfo/"><em
class="emph">texi2any</em></a>.</span>
+</p>
+
+
+</body>
+</html>
diff --git "a/tp/tests/formatting/\303\247ss.css"
"b/tp/tests/formatting/\303\247ss.css"
new file mode 100644
index 0000000000..bf9d7c42c5
--- /dev/null
+++ "b/tp/tests/formatting/\303\247ss.css"
@@ -0,0 +1,10 @@
+/* a comment */
+@import "éfile2.css" tv
+;
+
+/* another comment
+@import
+*/
+
+@import ("strânge\" ;file") ;
+@media tv { h3 {text-align: left} }
diff --git a/tp/tests/run_parser_all.sh b/tp/tests/run_parser_all.sh
index 5c475d4c46..932dcc6d7b 100755
--- a/tp/tests/run_parser_all.sh
+++ b/tp/tests/run_parser_all.sh
@@ -116,7 +116,7 @@ post_process_output ()
fi
}
-LC_ALL=C; export LC_ALL
+LC_ALL=C.UTF-8; export LC_ALL
prepended_command=
#prepended_command=time
@@ -228,7 +228,8 @@ if [ "z$clean" = 'zyes' -o "z$copy" = 'zyes' ]; then
# there are better ways
dir=`echo $line | awk '{print $1}'`
file=`echo $line | awk '{print $2}'`
- remaining=`echo $line | sed 's/[a-zA-Z0-9_./-]* *[a-zA-Z0-9_./-]* *//'`
+ #remaining=`echo $line | sed 's/[a-zA-Z0-9_./-]* *[a-zA-Z0-9_./-]* *//'`
+ remaining=`echo $line | sed 's/[a-zA-Z0-9_é./-]* *[a-zA-Z0-9_é./-]* *//'`
[ "z$dir" = 'z' -o "z$file" = 'z' ] && continue
if [ "z$clean" = 'zyes' ]; then
for command_dir in $commands; do
@@ -284,7 +285,8 @@ while read line; do
fi
basename=`basename $file .texi`
- remaining=`echo $line | sed 's/[a-zA-Z0-9_./-]* *[a-zA-Z0-9_./-]* *//'`
+ #remaining=`echo $line | sed 's/[a-zA-Z0-9_./-]* *[a-zA-Z0-9_./-]* *//'`
+ remaining=`echo $line | sed 's/[a-zA-Z0-9_é./-]* *[a-zA-Z0-9_é./-]* *//'`
src_file="$srcdir/$testdir/$file"
for command_dir in $commands; do
diff --git a/tp/tests/test_scripts/formatting_non_ascii_command_line.sh
b/tp/tests/test_scripts/formatting_non_ascii_command_line.sh
new file mode 100755
index 0000000000..c83a211a26
--- /dev/null
+++ b/tp/tests/test_scripts/formatting_non_ascii_command_line.sh
@@ -0,0 +1,19 @@
+#! /bin/sh
+# This file generated by maintain/regenerate_cmd_tests.sh
+
+if test z"$srcdir" = "z"; then
+ srcdir=.
+fi
+
+one_test_logs_dir=test_log
+
+
+dir=formatting
+name='non_ascii_command_line'
+mkdir -p $dir
+
+"$srcdir"/run_parser_all.sh -dir $dir $name
+exit_status=$?
+cat $dir/$one_test_logs_dir/$name.log
+exit $exit_status
+
diff --git a/tp/texi2any.pl b/tp/texi2any.pl
index 67a7dca142..8be0945de9 100755
--- a/tp/texi2any.pl
+++ b/tp/texi2any.pl
@@ -28,7 +28,7 @@ use strict;
# to determine the locale encoding
use I18N::Langinfo qw(langinfo CODESET);
# to decode command line arguments
-use Encode;
+use Encode qw(decode encode find_encoding);
# for file names portability
use File::Spec;
# to determine the path separator and null file
@@ -345,7 +345,7 @@ sub _decode_i18n_string($$)
{
my $string = shift;
my $encoding = shift;
- return Encode::decode($encoding, $string);
+ return decode($encoding, $string);
}
sub _encode_message($)
@@ -353,7 +353,7 @@ sub _encode_message($)
my $text = shift;
my $encoding = get_conf('MESSAGE_OUTPUT_ENCODING_NAME');
if (defined($encoding)) {
- return Encode::encode($encoding, $text);
+ return encode($encoding, $text);
} else {
return $text;
}
@@ -682,7 +682,7 @@ sub _decode_input($)
my $encoding = get_conf('DATA_INPUT_ENCODING_NAME');
if (defined($encoding)) {
- return Encode::decode($encoding, $text);
+ return decode($encoding, $text);
} else {
return $text;
}
@@ -879,17 +879,18 @@ There is NO WARRANTY, to the extent permitted by
law.\n"), "2021");
'number-footnotes!' => sub { set_from_cmdline('NUMBER_FOOTNOTES', $_[1]); },
'node-files!' => sub { set_from_cmdline('NODE_FILES', $_[1]); },
'footnote-style=s' => sub {
- if ($_[1] eq 'end' or $_[1] eq 'separate') {
- set_from_cmdline('footnotestyle', $_[1]);
+ my $value = _decode_input($_[1]);
+ if ($value eq 'end' or $value eq 'separate') {
+ set_from_cmdline('footnotestyle', $value);
} else {
- # FIXME decode/encode?
- die sprintf(__("%s: --footnote-style arg must be `separate' or `end',
not `%s'.\n"),
- $real_command_name, $_[1]);
+ die _encode_message(
+ sprintf(__("%s: --footnote-style arg must be `separate' or `end',
not `%s'.\n"),
+ $real_command_name, $value));
}
},
- 'split=s' => sub { my $split = $_[1];
+ 'split=s' => sub { my $split = _decode_input($_[1]);
my @messages
- = Texinfo::Common::warn_unknown_split($_[1]);
+ = Texinfo::Common::warn_unknown_split($split);
if (@messages) {
foreach my $message (@messages) {
document_warn($message);
@@ -911,12 +912,10 @@ There is NO WARRANTY, to the extent permitted by
law.\n"), "2021");
$format = 'plaintext' if (!$_[1] and $format eq 'info');
},
'output|out|o=s' => sub {
my $var = 'OUTFILE';
- # do not decode before calling -d as -d expects bytes
if ($_[1] =~ m:/$: or -d $_[1]) {
set_from_cmdline($var, undef);
$var = 'SUBDIR';
}
- #set_from_cmdline($var, _decode_input($_[1]));
set_from_cmdline($var, $_[1]);
push @texi2dvi_args, '-o', $_[1];
},
@@ -972,12 +971,13 @@ There is NO WARRANTY, to the extent permitted by
law.\n"), "2021");
'error-limit|e=i' => sub { set_from_cmdline('ERROR_LIMIT', $_[1]); },
'split-size=s' => sub {set_from_cmdline('SPLIT_SIZE', $_[1])},
'paragraph-indent|p=s' => sub {
- my $value = $_[1];
+ my $value = _decode_input($_[1]);
if ($value =~ /^([0-9]+)$/ or $value eq 'none' or $value eq 'asis') {
- set_from_cmdline('paragraphindent', $_[1]);
+ set_from_cmdline('paragraphindent', $value);
} else {
- die sprintf(__("%s: --paragraph-indent arg must be
numeric/`none'/`asis', not `%s'.\n"),
- $real_command_name, $value);
+ die _encode_message(sprintf(
+ __("%s: --paragraph-indent arg must be numeric/`none'/`asis', not
`%s'.\n"),
+ $real_command_name, $value));
}
},
'fill-column|f=i' => sub {set_from_cmdline('FILLCOLUMN',$_[1]);},
@@ -1010,6 +1010,11 @@ There is NO WARRANTY, to the extent permitted by
law.\n"), "2021");
exit 1 if (!$result_options);
+# those are strings combined with output so decode
+my $ref_css_refs = get_conf('CSS_REFS');
+my @input_css_refs = @{$ref_css_refs};
+@$ref_css_refs = map {_decode_input($_)} @input_css_refs;
+
# Change some options depending on the settings of other ones set formats
sub process_config {
my $conf = shift;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- branch master updated: Decode inputs for encoded characters,
Patrice Dumas <=