[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
texinfo/tp Texinfo/Convert/Converter.pm Texinfo...
From: |
Patrice Dumas |
Subject: |
texinfo/tp Texinfo/Convert/Converter.pm Texinfo... |
Date: |
Sun, 04 Sep 2011 18:13:17 +0000 |
CVSROOT: /sources/texinfo
Module name: texinfo
Changes by: Patrice Dumas <pertusus> 11/09/04 18:13:17
Modified files:
tp/Texinfo/Convert: Converter.pm Text.pm
tp/t : accents.t
Log message:
Format stacks of accent commands and not accent command names, to
still have the parent information.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Converter.pm?cvsroot=texinfo&r1=1.32&r2=1.33
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Text.pm?cvsroot=texinfo&r1=1.56&r2=1.57
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/accents.t?cvsroot=texinfo&r1=1.11&r2=1.12
Patches:
Index: Texinfo/Convert/Converter.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Converter.pm,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -b -r1.32 -r1.33
--- Texinfo/Convert/Converter.pm 31 Aug 2011 22:54:51 -0000 1.32
+++ Texinfo/Convert/Converter.pm 4 Sep 2011 18:13:16 -0000 1.33
@@ -619,7 +619,7 @@
my $result = $self->_convert({'contents' => $contents});
foreach my $accent_command (reverse(@$stack)) {
- $result = &$format_accents ($result, {'cmdname' => $accent_command},
+ $result = &$format_accents ($result, $accent_command,
$in_upper_case);
}
return $result;
Index: Texinfo/Convert/Text.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Text.pm,v
retrieving revision 1.56
retrieving revision 1.57
diff -u -b -r1.56 -r1.57
--- Texinfo/Convert/Text.pm 20 Aug 2011 14:38:47 -0000 1.56
+++ Texinfo/Convert/Text.pm 4 Sep 2011 18:13:16 -0000 1.57
@@ -25,7 +25,7 @@
# accent commands list.
use Texinfo::Common;
use Texinfo::Convert::Unicode;
-#Â for debugging
+# for debugging
use Texinfo::Convert::Texinfo;
use Data::Dumper;
use Carp qw(cluck);
@@ -545,9 +545,9 @@
}
# find the innermost accent and the correspponding text contents
-#Â FIXME This is not output dependent, so could be in
-#Â Texinfo::Convert::Converter. However this would create a
-#Â dependency loop.
+# FIXME This is not output dependent, so could be in
+# Texinfo::Convert::Converter. However this would create a
+# dependency loop.
sub _find_innermost_accent_contents($;$)
{
my $current = shift;
@@ -556,7 +556,7 @@
my $debug = 0;
ACCENT:
while (1) {
- #Â the following can happen if called with a bad tree
+ # the following can happen if called with a bad tree
if (!$current->{'cmdname'}
or !$accent_commands{$current->{'cmdname'}}) {
#print STDERR "BUG: Not an accent command in accent\n";
@@ -565,7 +565,7 @@
print STDERR Data::Dumper->Dump([$current]);
last;
}
- push @accent_commands, $current->{'cmdname'};
+ push @accent_commands, $current;
# A bogus accent
if (!$current->{'args'}) {
return ([], $current, address@hidden);
@@ -595,7 +595,7 @@
}
}
}
- #Â we go here if there was no nested accent
+ # we go here if there was no nested accent
return ($text_contents, $current, address@hidden);
}
}
@@ -646,7 +646,7 @@
#$debug = 1;
# FIXME shouldn't it be better to format the innermost conntents with
- #Â a converter, if present?
+ # a converter, if present?
my ($text, $innermost_accent, $stack)
= _find_innermost_accent($current, $encoding, $in_upper_case);
@@ -683,7 +683,8 @@
}
if ($debug) {
- print STDERR "stack: ".join('|',@$stack)."\nPARTIAL_RESULTS_STACK:\n";
+ print STDERR "stack: ".join('|', map {$_->{'cmdname'}} @$stack)
+ ."\nPARTIAL_RESULTS_STACK:\n";
foreach my $partial_result (@results_stack) {
if (defined($partial_result->[0])) {
print STDERR " -> ".Encode::encode('utf8', $partial_result->[0])
@@ -712,7 +713,8 @@
if ($debug) {
my $eight_bit_txt = 'undef';
$eight_bit_txt = $eight_bit if (defined($eight_bit));
- print STDERR "" . Encode::encode('utf8', $char) . "
($partial_result->[1]->{'cmdname'}), new_codepoint: $new_codepoint 8bit:
$new_eight_bit old:$eight_bit_txt\n";
+ print STDERR "" . Encode::encode('utf8', $char)
+ . " ($partial_result->[1]->{'cmdname'}), new_codepoint: $new_codepoint
8bit: $new_eight_bit old:$eight_bit_txt\n";
}
# no corresponding eight bit character found for a composed character
@@ -788,7 +790,7 @@
$result = uc($result) if ($in_upper_case and $result =~ /^\w$/);
foreach my $accent_command (reverse(@$stack)) {
- $result = ascii_accent ($result, {'cmdname' => $accent_command});
+ $result = ascii_accent ($result, $accent_command);
}
return $result;
}
@@ -806,10 +808,9 @@
while (@stack_accent_commands) {
my $accent_command = shift @stack_accent_commands;
my $formatted_result
- = Texinfo::Convert::Unicode::unicode_accent($result,
- {'cmdname' => $accent_command});
+ = Texinfo::Convert::Unicode::unicode_accent($result, $accent_command);
if (!defined($formatted_result)) {
- push @stack_accent_commands, $accent_command;
+ unshift @stack_accent_commands, $accent_command;
} else {
$result = $formatted_result;
}
@@ -817,8 +818,7 @@
$result = uc ($result) if ($in_upper_case);
while (@stack_accent_commands) {
my $accent_command = shift @stack_accent_commands;
- $result = &$format_accent($result,
- {'cmdname' => $accent_command}, $in_upper_case);
+ $result = &$format_accent($result, $accent_command, $in_upper_case);
}
return $result;
}
Index: t/accents.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/accents.t,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -b -r1.11 -r1.12
--- t/accents.t 1 Jun 2011 21:30:11 -0000 1.11
+++ t/accents.t 4 Sep 2011 18:13:17 -0000 1.12
@@ -1,7 +1,7 @@
use strict;
use Test::More;
-BEGIN { plan tests => 50 };
+BEGIN { plan tests => 63 };
use lib '../texi2html/lib/Unicode-EastAsianWidth/lib/';
use lib '../texi2html/lib/libintl-perl/lib/';
@@ -22,12 +22,13 @@
my $reference = $test->[2];
my $parser = Texinfo::Parser::parser({'context' => 'preformatted'});
my $tree = $parser->parse_texi_text($texi);
- my ($text, $innermost_accent, $stack) =
+ my ($text, $innermost_accent, $commands_stack) =
Texinfo::Convert::Text::_find_innermost_accent($tree->{'contents'}->[0]);
+ my @stack = map {$_->{'cmdname'}} @$commands_stack;
if (defined($reference)) {
- ok ($reference eq join('|',($text, @$stack)), 'innermost '.$name);
+ ok ($reference eq join('|',($text, @stack)), 'innermost '.$name);
} else {
- print STDERR join('|',($text, @$stack))."\n";
+ print STDERR join('|',($text, @stack))."\n";
}
}
@@ -43,6 +44,20 @@
test_accent_stack($test);
}
+sub ord_hex_string($)
+{
+ my $result = shift;
+ my $ord = '';
+ my $hex = '';
+ foreach my $char (split '', $result) {
+ $ord .= ord($char).'-';
+ $hex .= sprintf("%04x-", ord($char));
+ }
+ $ord =~ s/-$//;
+ $hex =~ s/-$//;
+ return ($ord, $hex);
+}
+
sub test_enable_encoding ($)
{
my $test = shift;
@@ -51,6 +66,7 @@
my $reference = $test->[2];
my $reference_xml = $test->[3];
my $reference_xml_entity = $test->[4];
+ my $reference_unicode = $test->[5];
my $parser = Texinfo::Parser::parser({'context' => 'preformatted'});
my $text_root = $parser->parse_texi_text($texi);
my $tree = $text_root->{'contents'}->[0];
@@ -64,16 +80,14 @@
$html_converter->{'conf'}->{'USE_NUMERIC_ENTITY'} = 1;
my $result_xml_entity
= Texinfo::Convert::Converter::xml_accents($html_converter, $tree);
+ my $result_unicode = Texinfo::Convert::Text::unicode_accents($tree,
+ \&Texinfo::Convert::Text::ascii_accent);
if (defined($reference)) {
#ok (Encode::decode('iso-8859-1', $reference) eq $result, $name);
#ok ($reference eq Encode::encode('iso-8859-1', $result), $name);
is (Encode::encode('iso-8859-1', $result), $reference, $name);
} else {
- my $ord = '';
- foreach my $char (split '', $result) {
- $ord .= ord($char).'-';
- }
- $ord =~ s/-$//;
+ my ($ord, $hex) = ord_hex_string($result);
print STDERR "$name ($ord)--> utf8: ".Encode::encode('utf8', $result).
" latin1: ".Encode::encode('iso-8859-1', $result)."\n";
}
@@ -89,30 +103,55 @@
print STDERR "$name xml entity: $result_xml_entity\n";
#print STDERR "<p>$texi $name xml entity: $result_xml_entity\n</p>";
}
+ if (defined($reference_unicode)) {
+ is ($result_unicode, $reference_unicode, "$name unicode");
+ } else {
+ my ($ord, $hex) = ord_hex_string($result);
+ my ($ord_unicode, $hex_unicode) = ord_hex_string($result_unicode);
+ print STDERR "$name ($ord/$hex)--> result utf8: ".Encode::encode('utf8',
$result).
+ " ($ord_unicode/$hex_unicode)--> unicode: ".Encode::encode('utf8',
$result_unicode)."\n";
+ }
+}
+
+sub chrx(@)
+{
+ my $result = '';
+ foreach my $hex_string(@_) {
+ $result .= chr(hex($hex_string));
+ }
+ return $result;
}
# some come from encodings/weird_accents.texi
foreach my $test (
- ['@~e', 'no 8bit encoding', "e~", 'e~', 'ẽ'],
- ['@~n', 'simple encoding', chr(241),
- 'ñ', 'ñ'],
- ['@~{n}' , 'brace encoding', chr(241),
- 'ñ', 'ñ'],
+ ['@~e', 'no 8bit encoding', "e~", 'e~', 'ẽ',
+ chrx('1ebd')],
+ ['@~n', 'simple encoding', chr(241), 'ñ',
+ 'ñ', chrx('00f1')],
+ ['@~{n}' , 'brace encoding', chr(241), 'ñ',
+ 'ñ', chrx('00f1')],
['@address@hidden', 'dotless', chr(238), 'î',
- 'î'],
- ['@address@hidden', 'no 8bit dotless', 'i~', 'i~', 'ĩ'],
- ['@address@hidden@dotless{i}}}', 'no 8 cplx dotless', 'i~=', 'i~=',
- 'ĩ='],
- ['@address@hidden@dotless{i}}}', 'complex dotless', chr(238).'=',
- 'î=', 'î='],
+ 'î', chrx('00ee')],
+ ['@address@hidden', 'no 8bit dotless', 'i~', 'i~', 'ĩ',
+ chrx('0129')],
+ ['@address@hidden@dotless{i}}}', 'no 8 cplx dotless', 'i~=', 'i~=',
'ĩ=',
+ chrx('0129','0304')],
+ ['@address@hidden@dotless{i}}}', 'complex dotless', chr(238).'=',
'î=',
+ 'î=',
+ chrx('00ee','0304')],
['@={@,address@hidden', 'complex encoding', chr(241).',=',
'ñ,=',
-
'ñ,='],
- ['@udotaccent{r}', 'udotaccent', '.r', '.r', 'ṛ'],
- ['@address@hidden', 'complex ubaraccent', 'a_=', 'a_=', 'a_='],
- ['@address@hidden@`r}}', 'complex udotaccent', '.r`^', '.r`^', '.r`^'],
+ 'ñ,=',
+ chrx('0146','0303','0304')],
+ ['@udotaccent{r}', 'udotaccent', '.r', '.r', 'ṛ',
+ chrx('1e5b')],
+ ['@address@hidden', 'complex ubaraccent', 'a_=', 'a_=', 'a_=',
+ chrx('0101','0332')],
+ ['@address@hidden@`r}}', 'complex udotaccent', '.r`^', '.r`^', '.r`^',
+ chrx('1e5b','0300','0302')],
['@address@hidden'{r}}', 'utf8 possible inside', 'r\'<',
'r\'<',
- 'ŕ<'],
- ['@address@hidden@\'address@hidden', 'command in accent', '=', '=', '=']
+ 'ŕ<',
+ chrx('0155','030c')],
+ ['@address@hidden@\'address@hidden', 'command in accent', '=', '=', '=',
chrx('0304')]
) {
test_enable_encoding($test);
}
- texinfo/tp Texinfo/Convert/Converter.pm Texinfo...,
Patrice Dumas <=