texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: * tp/Texinfo/Convert/Unicode.pm (%unicode_charact


From: Patrice Dumas
Subject: branch master updated: * tp/Texinfo/Convert/Unicode.pm (%unicode_character_brace_no_arg_commands): add a comment to described the failure with charnames::vianame.
Date: Fri, 28 Jul 2023 11:05:50 -0400

This is an automated email from the git hooks/post-receive script.

pertusus pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new a34a79f0a9 * tp/Texinfo/Convert/Unicode.pm 
(%unicode_character_brace_no_arg_commands): add a comment to described the 
failure with charnames::vianame.
a34a79f0a9 is described below

commit a34a79f0a9f494539274cfc412ab03043ad2f249
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri Jul 28 17:05:39 2023 +0200

    * tp/Texinfo/Convert/Unicode.pm
    (%unicode_character_brace_no_arg_commands): add a comment to described
    the failure with charnames::vianame.
    
    * tp/Texinfo/Convert/Unicode.pm (_eight_bit_and_unicode_point)
    (unicode_point_decoded_in_encoding): exclude 127, \x{7F}, DEL from the
    characters considered to be decoded.  It is probably actually decoded,
    but should not represent an accented character or a specific character.
---
 ChangeLog                     | 11 +++++++++++
 tp/Texinfo/Convert/Unicode.pm | 17 ++++++++++++-----
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0bdf3db308..5dbefd7cc0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2023-07-28  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/Convert/Unicode.pm
+       (%unicode_character_brace_no_arg_commands): add a comment to described
+       the failure with charnames::vianame.
+
+       * tp/Texinfo/Convert/Unicode.pm (_eight_bit_and_unicode_point)
+       (unicode_point_decoded_in_encoding): exclude 127, \x{7F}, DEL from the
+       characters considered to be decoded.  It is probably actually decoded,
+       but should not represent an accented character or a specific character.
+
 2023-07-28  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/Convert/Unicode.pm
diff --git a/tp/Texinfo/Convert/Unicode.pm b/tp/Texinfo/Convert/Unicode.pm
index f1e80bda7e..f8b0926b62 100644
--- a/tp/Texinfo/Convert/Unicode.pm
+++ b/tp/Texinfo/Convert/Unicode.pm
@@ -569,6 +569,13 @@ our %extra_unicode_map = (
 our %unicode_character_brace_no_arg_commands;
 foreach my $command (keys(%unicode_map)) {
   if ($unicode_map{$command} ne '') {
+# FIXME Using charnames::vianame as in the following is the clean documented
+# way to create an unicode character at runtime.  However, in tests of perl
+# 5.10.1 (on solaris), if charnames::vianame is used for @aa{} '00E5', uc()
+# on the resulting character does not leads to \x{00C5} (@AA{}) (when
+# formatting @sc{@aa{}} or @var{@aa{}} in plaintext).
+#    $unicode_character_brace_no_arg_commands{$command}
+#      = charnames::vianame("U+$unicode_map{$command}");
     my $char_nr = hex($unicode_map{$command});
     if ($char_nr > 126 and $char_nr < 255) {
       # this is very strange, indeed.  The reason lies certainly in the
@@ -578,8 +585,6 @@ foreach my $command (keys(%unicode_map)) {
     } else {
       $unicode_character_brace_no_arg_commands{$command} = chr($char_nr);
     }
-#    $unicode_character_brace_no_arg_commands{$command}
-#      = charnames::vianame("U+$unicode_map{$command}");
   }
 }
 
@@ -1349,8 +1354,9 @@ sub _eight_bit_and_unicode_point($$)
   my $encoding = shift;
 
   my ($eight_bit, $codepoint);
-  if (ord($char) <= 128) {
-    # 7bit ascii characters, the same in every 8bit encodings
+  if (ord($char) < 127) {
+    # 7bit ascii characters (excluding 127, \x{7F}, DEL), the same in every
+    # 8bit encodings
     $eight_bit = uc(sprintf("%02x",ord($char)));
     $codepoint = uc(sprintf("%04x",ord($char)));
   } elsif (ord($char) <= hex(0xFFFF)) {
@@ -1556,7 +1562,8 @@ sub unicode_point_decoded_in_encoding($$) {
     return 1 if ($encoding eq 'utf-8'
                     or ($unicode_to_eight_bit{$encoding}
                         and ($unicode_to_eight_bit{$encoding}->{$unicode_point}
-                             or hex($unicode_point) < 128)));
+                             # excludes 127 \x{7F} DEL
+                             or hex($unicode_point) < 127)));
   }
   return 0;
 }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]