[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
branch master updated: * tp/Texinfo/Encoding.pm (encoding_alias): use En
From: |
Patrice Dumas |
Subject: |
branch master updated: * tp/Texinfo/Encoding.pm (encoding_alias): use Encode::find_encoding, both for the perl name, and, with mime_name() lower cased for the output/HTML/input encoding name. Remove %encoding_aliases and %perl_charset_to_html. |
Date: |
Fri, 18 Feb 2022 16:51:13 -0500 |
This is an automated email from the git hooks/post-receive script.
pertusus pushed a commit to branch master
in repository texinfo.
The following commit(s) were added to refs/heads/master by this push:
new 0986f8eeab * tp/Texinfo/Encoding.pm (encoding_alias): use
Encode::find_encoding, both for the perl name, and, with mime_name() lower
cased for the output/HTML/input encoding name. Remove %encoding_aliases and
%perl_charset_to_html.
0986f8eeab is described below
commit 0986f8eeab7fe4cd0cf24ef596987d78c5458645
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri Feb 18 22:51:04 2022 +0100
* tp/Texinfo/Encoding.pm (encoding_alias): use Encode::find_encoding,
both for the perl name, and, with mime_name() lower cased for the
output/HTML/input encoding name. Remove %encoding_aliases and
%perl_charset_to_html.
---
ChangeLog | 7 +++++++
tp/Texinfo/Encoding.pm | 32 +++++++-------------------------
tp/Texinfo/XS/parsetexi/end_line.c | 12 +++++++-----
3 files changed, 21 insertions(+), 30 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index b10e737064..77ab5557b1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2022-02-18 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/Encoding.pm (encoding_alias): use Encode::find_encoding,
+ both for the perl name, and, with mime_name() lower cased for the
+ output/HTML/input encoding name. Remove %encoding_aliases and
+ %perl_charset_to_html.
+
2022-02-18 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/Convert/Unicode.pm (%unicode_to_eight_bit)
diff --git a/tp/Texinfo/Encoding.pm b/tp/Texinfo/Encoding.pm
index f70e4da229..6195737c3d 100644
--- a/tp/Texinfo/Encoding.pm
+++ b/tp/Texinfo/Encoding.pm
@@ -22,7 +22,7 @@ package Texinfo::Encoding;
use strict;
-use Encode;
+use Encode qw(find_encoding);
require Exporter;
use vars qw(@ISA @EXPORT_OK);
@@ -30,40 +30,22 @@ use vars qw(@ISA @EXPORT_OK);
@EXPORT_OK = qw( encoding_alias );
-
-# charset related definitions.
-
-my %perl_charset_to_html = (
- 'utf8' => 'utf-8',
- 'utf-8-strict' => 'utf-8',
- 'ascii' => 'us-ascii',
- 'shiftjis' => 'shift_jis',
-);
-
-# encoding name normalization to html-compatible encoding names
-my %encoding_aliases;
-
-foreach my $perl_charset (keys(%perl_charset_to_html)) {
- $encoding_aliases{$perl_charset} = $perl_charset_to_html{$perl_charset};
- $encoding_aliases{$perl_charset_to_html{$perl_charset}}
- = $perl_charset_to_html{$perl_charset};
-}
-
my %canonical_texinfo_encodings;
# These are the encodings from the texinfo manual
foreach my $canonical_encoding ('us-ascii', 'utf-8', 'iso-8859-1',
'iso-8859-15', 'iso-8859-2', 'koi8-r', 'koi8-u') {
$canonical_texinfo_encodings{$canonical_encoding} = 1;
- $encoding_aliases{$canonical_encoding} = $canonical_encoding;
}
sub encoding_alias($)
{
my $encoding = shift;
- my $perl_encoding = Encode::resolve_alias($encoding);
- my $canonical_output_encoding;
- if ($perl_encoding) {
- $canonical_output_encoding = $encoding_aliases{$perl_encoding};
+ my $enc = find_encoding($encoding);
+ my ($perl_encoding, $canonical_output_encoding);
+ if (defined($enc)) {
+ $perl_encoding = $enc->name();
+ # mime_name() is upper-case, our keys are lower case, set to lower case
+ $canonical_output_encoding = lc($enc->mime_name());
}
my $canonical_texinfo_encoding;
foreach my $possible_encoding ($encoding, $canonical_output_encoding,
diff --git a/tp/Texinfo/XS/parsetexi/end_line.c
b/tp/Texinfo/XS/parsetexi/end_line.c
index 625d51dc99..cd05a004c1 100644
--- a/tp/Texinfo/XS/parsetexi/end_line.c
+++ b/tp/Texinfo/XS/parsetexi/end_line.c
@@ -1523,9 +1523,10 @@ end_line_misc_line (ELEMENT *current)
struct encoding_map {
char *from; char *to;
};
- /* The map mimics Encode::resolve_alias() result. Even when
- the alias is not good, such as 'utf-8-strict' for 'utf-8'
- use the same mapping for consistency with the perl Parser
*/
+ /* The map mimics Encode::find_encoding()->name() result.
+ Even when the alias is not good, such as 'utf-8-strict'
+ for 'utf-8', use the same mapping for consistency with the
+ perl Parser */
static struct encoding_map map[] = {
"utf-8", "utf-8-strict",
"us-ascii", "ascii",
@@ -1550,7 +1551,7 @@ end_line_misc_line (ELEMENT *current)
{
command_warn (current, "unrecognized encoding name `%s'",
text);
- /* Texinfo::Encoding calls Encode::resolve_alias, so knows
+ /* Texinfo::Encoding calls Encode::find_encoding, so knows
about more encodings than what we know about here.
TODO: Check when perl_encoding could be defined when
texinfo_encoding isn't.
@@ -1559,7 +1560,8 @@ end_line_misc_line (ELEMENT *current)
}
- /* Set input_encoding from perl_encoding */
+ /* Set input_encoding from perl_encoding. In the perl parser,
+ lc(Encode::find_encoding()->mime_name()) is used */
input_encoding = 0;
if (perl_encoding)
{
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- branch master updated: * tp/Texinfo/Encoding.pm (encoding_alias): use Encode::find_encoding, both for the perl name, and, with mime_name() lower cased for the output/HTML/input encoding name. Remove %encoding_aliases and %perl_charset_to_html.,
Patrice Dumas <=