branch master updated: * tp/Texinfo/ParserNonXS.pm (_end_line_misc

texinfo-commits

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: * tp/Texinfo/ParserNonXS.pm (_end_line_misc_line)

From:	Patrice Dumas
Subject:	branch master updated: * tp/Texinfo/ParserNonXS.pm (_end_line_misc_line): inline and remove _encoding_alias, for simpler code more similar to XS parser.
Date:	Sun, 16 Jul 2023 17:00:25 -0400

This is an automated email from the git hooks/post-receive script.

pertusus pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 14ca81e499 * tp/Texinfo/ParserNonXS.pm (_end_line_misc_line): inline 
and remove _encoding_alias, for simpler code more similar to XS parser.
14ca81e499 is described below

commit 14ca81e499cae4383fee5a46ea124684c50ed365
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sun Jul 16 23:00:15 2023 +0200

    * tp/Texinfo/ParserNonXS.pm (_end_line_misc_line): inline and remove
    _encoding_alias, for simpler code more similar to XS parser.
    
    * tp/Texinfo/XS/parsetexi/end_line.c (end_line_misc_line): update
    comments and move a variable declaration.
---
 ChangeLog                          |  8 ++++++
 tp/Texinfo/ParserNonXS.pm          | 53 ++++++++++++++------------------------
 tp/Texinfo/XS/parsetexi/end_line.c | 25 +++++++-----------
 3 files changed, 37 insertions(+), 49 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 40da51f356..400e52f309 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2023-07-16  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/ParserNonXS.pm (_end_line_misc_line): inline and remove
+       _encoding_alias, for simpler code more similar to XS parser.
+
+       * tp/Texinfo/XS/parsetexi/end_line.c (end_line_misc_line): update
+       comments and move a variable declaration.
+
 2023-07-16  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/Convert/IXIN.pm (output_ixin): take encoding information
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index ccfb223fb4..085fe528d0 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -987,34 +987,6 @@ sub _setup_conf($$)
 # _next_text:                 present the next text fragment, from
 #                             pending text or line.
 
-# Taking an encoding name $ENCODING as argument, the function returns
-# $canonical_texinfo_encoding: the corresponding canonical Texinfo encoding,
-#                              as described in the Texinfo manual (or undef);
-# $perl_encoding:              an encoding name suitable for perl;
-# $canonical_output_encoding:  an encoding name suitable for most
-#                              output formats, especially HTML.
-sub _encoding_alias($)
-{
-  my $encoding = shift;
-  my $Encode_encoding_object = find_encoding($encoding);
-  my ($perl_encoding, $canonical_output_encoding);
-  if (defined($Encode_encoding_object)) {
-    $perl_encoding = $Encode_encoding_object->name();
-    # mime_name() is upper-case, our keys are lower case, set to lower case
-    $canonical_output_encoding = lc($Encode_encoding_object->mime_name());
-  }
-  my $canonical_texinfo_encoding;
-  foreach my $possible_encoding ($encoding, $canonical_output_encoding,
-                                            $perl_encoding) {
-    if (defined($possible_encoding)
-        and $canonical_texinfo_encodings{lc($possible_encoding)}) {
-      $canonical_texinfo_encoding = $possible_encoding;
-    }
-  }
-  return ($canonical_texinfo_encoding, $perl_encoding,
-          $canonical_output_encoding);
-}
-
 # context stack functions
 sub _init_context_stack($)
 {
@@ -3428,12 +3400,25 @@ sub _end_line_misc_line($$$)
                         = $self->{'info'}->{'input_encoding_name'}
           if defined $self->{'info'}->{'input_encoding_name'};
       } elsif ($command eq 'documentencoding') {
-        my ($texinfo_encoding, $perl_encoding, $input_encoding)
-           = _encoding_alias($text);
-        $self->_command_warn($current, $source_info,
-               __("encoding `%s' is not a canonical texinfo encoding"),
-                             $text)
-          if (!$texinfo_encoding or $texinfo_encoding ne lc($text));
+
+        # Warn if the encoding is not one of the encodings supported as an
+        # argument to @documentencoding, documented in Texinfo manual
+        unless ($canonical_texinfo_encodings{lc($text)}) {
+          $self->_command_warn($current, $source_info,
+                   __("encoding `%s' is not a canonical texinfo encoding"),
+                               $text)
+        }
+
+        # Set $perl_encoding  -- an encoding name suitable for perl;
+        #     $input_encoding -- for output within an HTML file, used
+        #                        in most output formats
+        my ($perl_encoding, $input_encoding);
+        my $Encode_encoding_object = find_encoding($text);
+        if (defined($Encode_encoding_object)) {
+          $perl_encoding = $Encode_encoding_object->name();
+          # mime_name() is upper-case, our keys are lower case, set to lower 
case
+          $input_encoding = lc($Encode_encoding_object->mime_name());
+        }
 
         if ($input_encoding) {
           $current->{'extra'}->{'input_encoding_name'} = $input_encoding;
diff --git a/tp/Texinfo/XS/parsetexi/end_line.c 
b/tp/Texinfo/XS/parsetexi/end_line.c
index 434c41d96e..972addc860 100644
--- a/tp/Texinfo/XS/parsetexi/end_line.c
+++ b/tp/Texinfo/XS/parsetexi/end_line.c
@@ -1458,23 +1458,16 @@ end_line_misc_line (ELEMENT *current)
           else if (current->cmd == CM_documentencoding)
             {
               int i; char *p, *text2;
-              char *texinfo_encoding = 0;
               char *input_encoding = 0;
-              /* See tp/Texinfo/Encoding.pm (whole file) */
-
-              /* Two concepts of encoding:
-                 texinfo_encoding -- one of the encodings supported as an
-                                     argument to @documentencoding, documented 
-                                     in Texinfo manual
-                 input_encoding -- for output within an HTML file, used
-                                   in most output formats */
 
               text2 = strdup (text);
               for (p = text2; *p; p++)
                 *p = tolower (*p);
 
-              /* Get texinfo_encoding from what was in the document */
+            /* Warn if the encoding is not one of the encodings supported as an
+               argument to @documentencoding, documented in Texinfo manual */
               {
+                char *texinfo_encoding = 0;
                 static char *canonical_encodings[] = {
                   "us-ascii", "utf-8", "iso-8859-1",
                   "iso-8859-15","iso-8859-2","koi8-r", "koi8-u",
@@ -1496,12 +1489,14 @@ end_line_misc_line (ELEMENT *current)
                   }
               }
 
+              /* Set input_encoding -- for output within an HTML file, used
+                                       in most output formats */
               {
                 struct encoding_map {
                     char *from; char *to;
                 };
 
-              /* Set input_encoding.  In the perl parser,
+              /* In the perl parser,
                  lc(Encode::find_encoding()->mime_name()) is used */
                 static struct encoding_map map[] = {
                       "utf-8", "utf-8",
@@ -1545,10 +1540,10 @@ end_line_misc_line (ELEMENT *current)
 
                   /* the Perl Parser calls Encode::find_encoding, so knows
                      about more encodings than what we know about here.
-                     TODO: Check when perl_encoding could be defined when
-                     texinfo_encoding isn't.
-                     Maybe we should check if an iconv conversion is possible
-                     from this encoding to UTF-8. */
+                     TODO: accept encoding not in encoding_map as long as
+                     an iconv conversion to UTF-8 is possible?
+                     Maybe we should check if an iconv conversion is
+                     possible from this encoding to UTF-8. */
                 }
             }
           else if (current->cmd == CM_documentlanguage)

[Prev in Thread]

Current Thread

[Next in Thread]

branch master updated: * tp/Texinfo/ParserNonXS.pm (_end_line_misc_line): inline and remove _encoding_alias, for simpler code more similar to XS parser., Patrice Dumas <=

Prev by Date: branch master updated: tp/Texinfo/XS/parsetexi/Parsetexi.pm: minor changes
Next by Date: branch master updated: tp/TODO: add changing the implementation of encoding in XS parser
Previous by thread: branch master updated: tp/Texinfo/XS/parsetexi/Parsetexi.pm: minor changes
Next by thread: branch master updated: tp/TODO: add changing the implementation of encoding in XS parser
Index(es):
- Date
- Thread