texinfo/tp Texinfo/Convert/Converter.pm Texinfo...

texinfo-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
texinfo/tp Texinfo/Convert/Converter.pm Texinfo...

From:	Patrice Dumas
Subject:	texinfo/tp Texinfo/Convert/Converter.pm Texinfo...
Date:	Sun, 04 Sep 2011 18:13:17 +0000
CVSROOT:        /sources/texinfo
Module name:    texinfo
Changes by:     Patrice Dumas <pertusus>        11/09/04 18:13:17

Modified files:
        tp/Texinfo/Convert: Converter.pm Text.pm 
        tp/t           : accents.t 

Log message:
        Format stacks of accent commands and not accent command names, to
        still have the parent information.

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Converter.pm?cvsroot=texinfo&r1=1.32&r2=1.33
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Text.pm?cvsroot=texinfo&r1=1.56&r2=1.57
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/accents.t?cvsroot=texinfo&r1=1.11&r2=1.12

Patches:
Index: Texinfo/Convert/Converter.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Converter.pm,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -b -r1.32 -r1.33
--- Texinfo/Convert/Converter.pm        31 Aug 2011 22:54:51 -0000      1.32
+++ Texinfo/Convert/Converter.pm        4 Sep 2011 18:13:16 -0000       1.33
@@ -619,7 +619,7 @@
   my $result = $self->_convert({'contents' => $contents});
   
   foreach my $accent_command (reverse(@$stack)) {
-    $result = &$format_accents ($result, {'cmdname' => $accent_command}, 
+    $result = &$format_accents ($result, $accent_command, 
                                 $in_upper_case);
   }
   return $result;

Index: Texinfo/Convert/Text.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Text.pm,v
retrieving revision 1.56
retrieving revision 1.57
diff -u -b -r1.56 -r1.57
--- Texinfo/Convert/Text.pm     20 Aug 2011 14:38:47 -0000      1.56
+++ Texinfo/Convert/Text.pm     4 Sep 2011 18:13:16 -0000       1.57
@@ -25,7 +25,7 @@
 # accent commands list.
 use Texinfo::Common;
 use Texinfo::Convert::Unicode;
-#Â for debugging
+# for debugging
 use Texinfo::Convert::Texinfo;
 use Data::Dumper;
 use Carp qw(cluck);
@@ -545,9 +545,9 @@
 }
 
 # find the innermost accent and the correspponding text contents
-#Â FIXME This is not output dependent, so could be in 
-#Â Texinfo::Convert::Converter.  However this would create a 
-#Â dependency loop.
+# FIXME This is not output dependent, so could be in 
+# Texinfo::Convert::Converter.  However this would create a 
+# dependency loop.
 sub _find_innermost_accent_contents($;$)
 {
   my $current = shift;
@@ -556,7 +556,7 @@
   my $debug = 0;
  ACCENT:
   while (1) {
-    #Â the following can happen if called with a bad tree
+    # the following can happen if called with a bad tree
     if (!$current->{'cmdname'} 
         or !$accent_commands{$current->{'cmdname'}}) {
       #print STDERR "BUG: Not an accent command in accent\n";
@@ -565,7 +565,7 @@
       print STDERR Data::Dumper->Dump([$current]);
       last;
     }
-    push @accent_commands, $current->{'cmdname'};
+    push @accent_commands, $current;
     # A bogus accent
     if (!$current->{'args'}) {
       return ([], $current, address@hidden);
@@ -595,7 +595,7 @@
         }
       }
     }
-    #Â we go here if there was no nested accent
+    # we go here if there was no nested accent
     return ($text_contents, $current, address@hidden);
   }
 }
@@ -646,7 +646,7 @@
   #$debug = 1;
 
   # FIXME shouldn't it be better to format the innermost conntents with 
-  #Â a converter, if present?
+  # a converter, if present?
   my ($text, $innermost_accent, $stack) 
     = _find_innermost_accent($current, $encoding, $in_upper_case);
 
@@ -683,7 +683,8 @@
   }
 
   if ($debug) {
-    print STDERR "stack: ".join('|',@$stack)."\nPARTIAL_RESULTS_STACK:\n";
+    print STDERR "stack: ".join('|', map {$_->{'cmdname'}} @$stack)
+     ."\nPARTIAL_RESULTS_STACK:\n";
     foreach my $partial_result (@results_stack) {
       if (defined($partial_result->[0])) {
         print STDERR "   -> ".Encode::encode('utf8', $partial_result->[0])
@@ -712,7 +713,8 @@
     if ($debug) {
       my $eight_bit_txt = 'undef';
       $eight_bit_txt = $eight_bit if (defined($eight_bit));
-      print STDERR "" . Encode::encode('utf8', $char) . " 
($partial_result->[1]->{'cmdname'}), new_codepoint: $new_codepoint 8bit: 
$new_eight_bit old:$eight_bit_txt\n";
+      print STDERR "" . Encode::encode('utf8', $char) 
+        . " ($partial_result->[1]->{'cmdname'}), new_codepoint: $new_codepoint 
8bit: $new_eight_bit old:$eight_bit_txt\n";
     }
 
     # no corresponding eight bit character found for a composed character
@@ -788,7 +790,7 @@
 
   $result = uc($result) if ($in_upper_case and $result =~ /^\w$/);
   foreach my $accent_command (reverse(@$stack)) {
-    $result = ascii_accent ($result, {'cmdname' => $accent_command});
+    $result = ascii_accent ($result, $accent_command);
   }
   return $result;
 }
@@ -806,10 +808,9 @@
   while (@stack_accent_commands) {
     my $accent_command = shift @stack_accent_commands;
     my $formatted_result
-     = Texinfo::Convert::Unicode::unicode_accent($result, 
-       {'cmdname' => $accent_command});
+     = Texinfo::Convert::Unicode::unicode_accent($result, $accent_command);
     if (!defined($formatted_result)) {
-      push @stack_accent_commands, $accent_command;
+      unshift @stack_accent_commands, $accent_command;
     } else {
       $result = $formatted_result;
     }
@@ -817,8 +818,7 @@
   $result = uc ($result) if ($in_upper_case);
   while (@stack_accent_commands) {
     my $accent_command = shift @stack_accent_commands;
-    $result = &$format_accent($result, 
-       {'cmdname' => $accent_command}, $in_upper_case);
+    $result = &$format_accent($result, $accent_command, $in_upper_case);
   }
   return $result;
 }

Index: t/accents.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/accents.t,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -b -r1.11 -r1.12
--- t/accents.t 1 Jun 2011 21:30:11 -0000       1.11
+++ t/accents.t 4 Sep 2011 18:13:17 -0000       1.12
@@ -1,7 +1,7 @@
 use strict;
 
 use Test::More;
-BEGIN { plan tests => 50 };
+BEGIN { plan tests => 63 };
 
 use lib '../texi2html/lib/Unicode-EastAsianWidth/lib/';
 use lib '../texi2html/lib/libintl-perl/lib/';
@@ -22,12 +22,13 @@
   my $reference = $test->[2]; 
   my $parser = Texinfo::Parser::parser({'context' => 'preformatted'});
   my $tree = $parser->parse_texi_text($texi);
-  my ($text, $innermost_accent, $stack) = 
+  my ($text, $innermost_accent, $commands_stack) = 
     Texinfo::Convert::Text::_find_innermost_accent($tree->{'contents'}->[0]);
+  my @stack = map {$_->{'cmdname'}} @$commands_stack;
   if (defined($reference)) {
-    ok ($reference eq join('|',($text, @$stack)), 'innermost '.$name);
+    ok ($reference eq join('|',($text, @stack)), 'innermost '.$name);
   } else {
-    print STDERR join('|',($text, @$stack))."\n";
+    print STDERR join('|',($text, @stack))."\n";
   }
 }
 
@@ -43,6 +44,20 @@
   test_accent_stack($test);
 }
 
+sub ord_hex_string($)
+{
+  my $result = shift;
+  my $ord = '';
+  my $hex = '';
+  foreach my $char (split '', $result) {
+    $ord .= ord($char).'-';
+    $hex .= sprintf("%04x-", ord($char));
+  }
+  $ord =~ s/-$//;
+  $hex =~ s/-$//;
+  return ($ord, $hex);
+}
+
 sub test_enable_encoding ($)
 {
   my $test = shift;
@@ -51,6 +66,7 @@
   my $reference = $test->[2];
   my $reference_xml = $test->[3];
   my $reference_xml_entity = $test->[4];
+  my $reference_unicode = $test->[5];
   my $parser = Texinfo::Parser::parser({'context' => 'preformatted'});
   my $text_root = $parser->parse_texi_text($texi);
   my $tree = $text_root->{'contents'}->[0];
@@ -64,16 +80,14 @@
   $html_converter->{'conf'}->{'USE_NUMERIC_ENTITY'} = 1;
   my $result_xml_entity 
       = Texinfo::Convert::Converter::xml_accents($html_converter, $tree);
+  my $result_unicode = Texinfo::Convert::Text::unicode_accents($tree,
+                      \&Texinfo::Convert::Text::ascii_accent);
   if (defined($reference)) {
     #ok (Encode::decode('iso-8859-1', $reference) eq $result, $name);
     #ok ($reference eq Encode::encode('iso-8859-1', $result), $name);
     is (Encode::encode('iso-8859-1', $result), $reference, $name);
   } else {
-    my $ord = '';
-    foreach my $char (split '', $result) {
-      $ord .= ord($char).'-';
-    }
-    $ord =~ s/-$//;
+    my ($ord, $hex) = ord_hex_string($result);
     print STDERR "$name ($ord)--> utf8: ".Encode::encode('utf8', $result).
         " latin1: ".Encode::encode('iso-8859-1', $result)."\n";
   }
@@ -89,30 +103,55 @@
     print STDERR "$name xml entity: $result_xml_entity\n";
     #print STDERR "<p>$texi $name xml entity: $result_xml_entity\n</p>";
   }
+  if (defined($reference_unicode)) {
+    is ($result_unicode, $reference_unicode, "$name unicode");
+  } else {
+    my ($ord, $hex) = ord_hex_string($result);
+    my ($ord_unicode, $hex_unicode) = ord_hex_string($result_unicode);
+    print STDERR "$name ($ord/$hex)--> result utf8: ".Encode::encode('utf8', 
$result).
+         " ($ord_unicode/$hex_unicode)--> unicode: ".Encode::encode('utf8', 
$result_unicode)."\n";
+  }
+}
+
+sub chrx(@)
+{
+  my $result = '';
+  foreach my $hex_string(@_) {
+    $result .= chr(hex($hex_string));
+  }
+  return $result;
 }
 
 # some come from encodings/weird_accents.texi
 foreach my $test (
-  ['@~e',                   'no 8bit encoding',    "e~", 'e~', '&#7869;'],
-  ['@~n',                   'simple encoding',     chr(241), 
-                                                   '&ntilde;', '&ntilde;'],
-  ['@~{n}' ,                'brace encoding',      chr(241), 
-                                                   '&ntilde;', '&ntilde;'],
+  ['@~e',                   'no 8bit encoding',    "e~", 'e~', '&#7869;', 
+                                                   chrx('1ebd')],
+  ['@~n',                   'simple encoding',     chr(241), '&ntilde;', 
+                                                   '&ntilde;', chrx('00f1')],
+  ['@~{n}' ,                'brace encoding',      chr(241), '&ntilde;', 
+                                                   '&ntilde;', chrx('00f1')],
   ['@address@hidden',       'dotless',             chr(238), '&icirc;', 
-                                                             '&icirc;'],
-  ['@address@hidden',       'no 8bit dotless',     'i~', 'i~', '&#297;'],
-  ['@address@hidden@dotless{i}}}',   'no 8 cplx dotless',   'i~=', 'i~=', 
-                                                          '&#297;='],
-  ['@address@hidden@dotless{i}}}',   'complex dotless',     chr(238).'=', 
-                                                   '&icirc;=', '&icirc;='],
+                                                   '&icirc;', chrx('00ee')],
+  ['@address@hidden',       'no 8bit dotless',     'i~', 'i~', '&#297;',
+                                                   chrx('0129')],
+  ['@address@hidden@dotless{i}}}',   'no 8 cplx dotless',   'i~=', 'i~=', 
'&#297;=',
+                                                   chrx('0129','0304')],
+  ['@address@hidden@dotless{i}}}',   'complex dotless',     chr(238).'=', 
'&icirc;=', 
+                                                   '&icirc;=',
+                                                   chrx('00ee','0304')],
   ['@={@,address@hidden',         'complex encoding',    chr(241).',=', 
'&ntilde;,=',
-                                                                  
'&ntilde;,='],
-  ['@udotaccent{r}',        'udotaccent',          '.r', '.r', '&#7771;'],
-  ['@address@hidden',    'complex ubaraccent',  'a_=', 'a_=', 'a_='],
-  ['@address@hidden@`r}}',  'complex udotaccent',  '.r`^', '.r`^', '.r`^'],
+                                                   '&ntilde;,=', 
+                                                   chrx('0146','0303','0304')],
+  ['@udotaccent{r}',        'udotaccent',          '.r', '.r', '&#7771;',
+                                                   chrx('1e5b')],
+  ['@address@hidden',    'complex ubaraccent',  'a_=', 'a_=', 'a_=',
+                                                   chrx('0101','0332')],
+  ['@address@hidden@`r}}',  'complex udotaccent',  '.r`^', '.r`^', '.r`^',
+                                                   chrx('1e5b','0300','0302')],
   ['@address@hidden'{r}}',            'utf8 possible inside', 'r\'<', 
'r\'&lt;', 
-                                                    '&#341;&lt;'],
-  ['@address@hidden@\'address@hidden', 'command in accent',   '=', '=', '=']
+                                                    '&#341;&lt;', 
+                                                    chrx('0155','030c')],
+  ['@address@hidden@\'address@hidden', 'command in accent',   '=', '=', '=', 
chrx('0304')]
             ) {
   test_enable_encoding($test);
 }
[Prev in Thread]
Current Thread
[Next in Thread]
texinfo/tp Texinfo/Convert/Converter.pm Texinfo..., Patrice Dumas <=
- texinfo/tp Texinfo/Convert/Converter.pm Texinfo..., Patrice Dumas, 2011/09/17
- texinfo/tp Texinfo/Convert/Converter.pm Texinfo..., Patrice Dumas, 2011/09/29
Prev by Date: texinfo/tp Texinfo/Convert/HTML.pm t/init/icons...
Next by Date: texinfo/tp Texinfo/Convert/HTML.pm t/30sectioni...
Previous by thread: texinfo/tp Texinfo/Convert/HTML.pm t/init/icons...
Next by thread: texinfo/tp Texinfo/Convert/Converter.pm Texinfo...
Index(es):
- Date
- Thread