[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
texinfo/tp Texinfo/Convert/Paragraph.pm Texinfo...
From: |
Patrice Dumas |
Subject: |
texinfo/tp Texinfo/Convert/Paragraph.pm Texinfo... |
Date: |
Mon, 08 Nov 2010 00:04:37 +0000 |
CVSROOT: /sources/texinfo
Module name: texinfo
Changes by: Patrice Dumas <pertusus> 10/11/08 00:04:37
Modified files:
tp/Texinfo/Convert: Paragraph.pm Text.pm Unicode.pm
tp/t : accents.t paragraph.t
tp/t/results/invalid_nestings: accents.pl
Log message:
Handle inhibiting end sentence.
Convert a stack of accents.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Paragraph.pm?cvsroot=texinfo&r1=1.2&r2=1.3
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Text.pm?cvsroot=texinfo&r1=1.15&r2=1.16
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Unicode.pm?cvsroot=texinfo&r1=1.4&r2=1.5
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/accents.t?cvsroot=texinfo&r1=1.2&r2=1.3
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/paragraph.t?cvsroot=texinfo&r1=1.2&r2=1.3
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/results/invalid_nestings/accents.pl?cvsroot=texinfo&r1=1.11&r2=1.12
Patches:
Index: Texinfo/Convert/Paragraph.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Paragraph.pm,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- Texinfo/Convert/Paragraph.pm 7 Nov 2010 14:10:14 -0000 1.2
+++ Texinfo/Convert/Paragraph.pm 8 Nov 2010 00:04:37 -0000 1.3
@@ -118,7 +118,7 @@
return $result;
}
-# add a word and/or spaces.
+# add a word and/or spaces and end of sentence.
sub add_next($;$$$)
{
my $paragraph = shift;
@@ -147,12 +147,18 @@
$result .= $paragraph->end_line();
}
}
- if ($end_sentence) {
- $paragraph->{'end_sentence'} = 1;
+ if (defined($end_sentence)) {
+ $paragraph->{'end_sentence'} = $end_sentence;
}
return $result;
}
+sub inhibit_end_sentence($)
+{
+ my $paragraph = shift;
+ $paragraph->{'end_sentence'} = 0;
+}
+
my $end_sentence_character = quotemeta('.?!');
my $after_punctuation_characters = quotemeta('"\')]');
@@ -202,14 +208,13 @@
my $added_word = $1;
$result .= $paragraph->add_next($added_word);
# now check if it is considered as an end of sentence
- if (!$paragraph->{'end_sentence'}) {
- if ($paragraph->{'word'} =~
/[$end_sentence_character][$after_punctuation_characters]*$/
+ if (defined($paragraph->{'end_sentence'}) and
+ $added_word =~ /^[$after_punctuation_characters]*$/) {
+ # do nothing in the case of a continuation of
after_punctuation_characters
+ } elsif ($paragraph->{'word'} =~
/[$end_sentence_character][$after_punctuation_characters]*$/
and $paragraph->{'word'} !~
/[[:upper:]][$end_sentence_character][$after_punctuation_characters]*$/) {
$paragraph->{'end_sentence'} = 1;
}
- } elsif ($added_word !~ /^[$after_punctuation_characters]*$/) {
- delete $paragraph->{'end_sentence'};
- }
}
}
return $result;
Index: Texinfo/Convert/Text.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Text.pm,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -b -r1.15 -r1.16
--- Texinfo/Convert/Text.pm 6 Nov 2010 12:34:59 -0000 1.15
+++ Texinfo/Convert/Text.pm 8 Nov 2010 00:04:37 -0000 1.16
@@ -630,7 +630,7 @@
return ($eight_bit, $codepoint);
}
-sub eight_bit_accent($$$)
+sub eight_bit_accents($$$)
{
my $current = shift;
my $encoding = shift;
@@ -748,6 +748,32 @@
return $text . $accent;
}
+# format a stack of accents as ascii
+sub ascii_accents ($)
+{
+ my $current = shift;
+ my ($result, $innermost_accent, $stack) = _find_innermost_accent($current);
+
+ foreach my $accent_command (reverse(@$stack)) {
+ $result = ascii_accent ($result, {'cmdname' => $accent_command});
+ }
+ return $result;
+}
+
+# format a stack of accents as unicode
+sub unicode_accents ($$)
+{
+ my $current = shift;
+ my $format_accents = shift;
+ my ($result, $innermost_accent, $stack) = _find_innermost_accent($current);
+
+ foreach my $accent_command (reverse(@$stack)) {
+ $result = Texinfo::Convert::Unicode::unicode_accent($result,
+ {'cmdname' => $accent_command}, \&ascii_accent);
+ }
+ return $result;
+}
+
sub _normalise_space($)
{
return undef unless (defined ($_[0]));
@@ -803,14 +829,16 @@
} elsif ($accent_commands{$root->{'cmdname'}}) {
return '' if (!$root->{'args'});
if ($options->{'enable_encoding'} and $options->{'enable_encoding'} eq
'utf-8') {
- return
Texinfo::Convert::Unicode::unicode_accent(convert($root->{'args'}->[0],
$options),
- $root->{'cmdname'},
\&ascii_accent);
+ #return
Texinfo::Convert::Unicode::unicode_accent(convert($root->{'args'}->[0],
$options),
+ # $root, \&ascii_accent);
+ return unicode_accents($root, \&ascii_accent);
} elsif ($options->{'enable_encoding'}
and
$Texinfo::Commands::eight_bit_encoding_aliases{$options->{'enable_encoding'}}) {
- return eight_bit_accent($root, $options->{'enable_encoding'},
+ return eight_bit_accents($root, $options->{'enable_encoding'},
\&ascii_accent);
} else {
- return ascii_accent(convert($root->{'args'}->[0], $options), $root);
+ #return ascii_accent(convert($root->{'args'}->[0], $options), $root);
+ return ascii_accents($root);
}
} elsif ($root->{'cmdname'} eq 'image') {
return convert($root->{'args'}->[0], $options);
Index: Texinfo/Convert/Unicode.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Unicode.pm,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -b -r1.4 -r1.5
--- Texinfo/Convert/Unicode.pm 6 Nov 2010 12:34:59 -0000 1.4
+++ Texinfo/Convert/Unicode.pm 8 Nov 2010 00:04:37 -0000 1.5
@@ -557,4 +557,19 @@
return &$fallback_convert_accent($text, $command);
}
+sub unicode_text($$)
+{
+ my $self = shift;
+ my $command = shift;
+ my $text = $command->{'text'};
+
+ if (!$self->{'code'} and !$self->{'context'}->[-1] eq 'preformatted') {
+ $text =~ s/---/\x{2014}/g;
+ $text =~ s/--/\x{2013}/g;
+ $text =~ s/``/\x{201C}/g;
+ $text =~ s/''/\x{201D}/g;
+ }
+ return Unicode::Normalize::NFC($text);
+}
+
1;
Index: t/accents.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/accents.t,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- t/accents.t 6 Nov 2010 12:34:59 -0000 1.2
+++ t/accents.t 8 Nov 2010 00:04:37 -0000 1.3
@@ -45,7 +45,7 @@
my $parser = Texinfo::Parser::parser({'context' => 'preformatted'});
my $tree = $parser->parse_texi_text($texi);
my $result =
- Texinfo::Convert::Text::eight_bit_accent($tree->{'contents'}->[0],
+ Texinfo::Convert::Text::eight_bit_accents($tree->{'contents'}->[0],
'iso-8859-1', \&Texinfo::Convert::Text::ascii_accent);
if (defined($reference)) {
#ok (Encode::decode('iso-8859-1', $reference) eq $result, $name);
Index: t/paragraph.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/paragraph.t,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- t/paragraph.t 7 Nov 2010 14:10:14 -0000 1.2
+++ t/paragraph.t 8 Nov 2010 00:04:37 -0000 1.3
@@ -9,7 +9,7 @@
#use Test;
use Test::More;
-BEGIN { plan tests => 33 };
+BEGIN { plan tests => 38 };
use lib '../texi2html/lib/Unicode-EastAsianWidth/lib/';
#push @INC, '../texi2html/lib/Unicode-EastAsianWidth/lib/';
use Texinfo::Convert::Paragraph;
@@ -70,25 +70,25 @@
$result .= $para->add_next('_');
$result .= $para->wrap_next(' after');
$result .= $para->end();
-is ("aa.)_ after\n", $result, 'add char after end sentence');
+is ($result, "aa.)_ after\n", 'add char after end sentence');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
$result .= $para->end_line();
$result .= $para->wrap_next(' after');
$result .= $para->end();
-is ("\nafter\n", $result, 'space after end_line');
+is ($result, "\nafter\n", 'space after end_line');
#print STDERR "$result";
$para = Texinfo::Convert::Paragraph->new();
$result = '';
$result .= $para->wrap_next('aa.)');
$result .= $para->add_pending_word();
-is ('aa.)', $result, 'call add_pending_word');
+is ($result, 'aa.)', 'call add_pending_word');
$result = $para->end_line();
-is ("\n", $result, 'call end_line after add_pending_word');
+is ($result, "\n", 'call end_line after add_pending_word');
$result = $para->end();
-is ('', $result, 'call end after end_line');
+is ($result, '', 'call end after end_line');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
@@ -96,7 +96,7 @@
$result .= $para->add_pending_word();
$result .= $para->wrap_next(' after');
$result .= $para->end();
-is ("aa.) after\n", $result, 'space after sentence and add_pending_word');
+is ($result, "aa.) after\n", 'space after sentence and add_pending_word');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
@@ -104,7 +104,7 @@
$result .= $para->add_next('.', undef, 1);
$result .= $para->wrap_next(' after');
$result .= $para->end();
-is ("aA. after\n", $result, 'force end sentence after upper case');
+is ($result, "aA. after\n", 'force end sentence after upper case');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
@@ -112,7 +112,51 @@
$result .= $para->wrap_next('.');
$result .= $para->wrap_next(' after');
$result .= $para->end();
-is ("aA. after\n", $result, 'end sentence after upper case');
+is ($result, "aA. after\n", 'end sentence after upper case');
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aa.)');
+$result .= $para->wrap_next('))');
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ($result, "aa.))) after\n", 'continue with after_punctuation_characters');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aa.)');
+$para->inhibit_end_sentence();
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ($result, "aa.) after\n", 'inhibit end sentence');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aa.)');
+$para->inhibit_end_sentence();
+$result .= $para->add_next('_');
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ($result, "aa.)_ after\n", 'inhibit end sentence then add next');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aa.)');
+$para->inhibit_end_sentence();
+$result .= $para->wrap_next('aa.)');
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ($result, "aa.)aa.) after\n", 'cancel inhibit end sentence');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aa.)');
+$para->inhibit_end_sentence();
+$result .= $para->wrap_next('))');
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ($result, "aa.))) after\n", 'inhibit end sentence and ))');
+
+$para = Texinfo::Convert::Paragraph->new();
1;
Index: t/results/invalid_nestings/accents.pl
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/results/invalid_nestings/accents.pl,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -b -r1.11 -r1.12
--- t/results/invalid_nestings/accents.pl 1 Nov 2010 11:40:45 -0000
1.11
+++ t/results/invalid_nestings/accents.pl 8 Nov 2010 00:04:37 -0000
1.12
@@ -210,7 +210,7 @@
TeX~. @~.
Invalid
-a~. ^.
+~. ^.
';
$result_errors{'accents'} = [
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- texinfo/tp Texinfo/Convert/Paragraph.pm Texinfo...,
Patrice Dumas <=