[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
texinfo/tp Texinfo/Convert/Paragraph.pm t/parag...
From: |
Patrice Dumas |
Subject: |
texinfo/tp Texinfo/Convert/Paragraph.pm t/parag... |
Date: |
Sun, 07 Nov 2010 14:10:14 +0000 |
CVSROOT: /sources/texinfo
Module name: texinfo
Changes by: Patrice Dumas <pertusus> 10/11/07 14:10:14
Modified files:
tp/Texinfo/Convert: Paragraph.pm
tp/t : paragraph.t
Log message:
Fix leading space handling, and correct end sentence detection.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Paragraph.pm?cvsroot=texinfo&r1=1.1&r2=1.2
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/paragraph.t?cvsroot=texinfo&r1=1.1&r2=1.2
Patches:
Index: Texinfo/Convert/Paragraph.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Paragraph.pm,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -b -r1.1 -r1.2
--- Texinfo/Convert/Paragraph.pm 6 Nov 2010 13:19:37 -0000 1.1
+++ Texinfo/Convert/Paragraph.pm 7 Nov 2010 14:10:14 -0000 1.2
@@ -65,7 +65,7 @@
}
# end a line.
-sub _end_line($)
+sub end_line($)
{
my $paragraph = shift;
$paragraph->{'counter'} = 0;
@@ -119,11 +119,12 @@
}
# add a word and/or spaces.
-sub add_next($;$$)
+sub add_next($;$$$)
{
my $paragraph = shift;
my $word = shift;
my $space = shift;
+ my $end_sentence = shift;
my $result = '';
if (defined($word)) {
@@ -135,7 +136,7 @@
if ($paragraph->{'counter'} != 0 and
$paragraph->{'counter'} + _string_width($paragraph->{'word'}) +
_string_width($paragraph->{'space'}) > $paragraph->{'max'}) {
- $result .= $paragraph->_end_line();
+ $result .= $paragraph->end_line();
}
}
if (defined($space)) {
@@ -143,9 +144,12 @@
$paragraph->{'space'} = $space;
if ($paragraph->{'counter'} + _string_width($paragraph->{'space'})
> $paragraph->{'max'}) {
- $result .= $paragraph->_end_line();
+ $result .= $paragraph->end_line();
}
}
+ if ($end_sentence) {
+ $paragraph->{'end_sentence'} = 1;
+ }
return $result;
}
@@ -169,22 +173,17 @@
print STDERR "SPACES($paragraph->{'counter'})\n" if
($paragraph->{'debug'});
my $added_word = $paragraph->{'word'};
$result .= $paragraph->add_pending_word();
- if (defined($added_word)) {
- if (!$paragraph->{'frenchspacing'}
- and $added_word =~
/[$end_sentence_character][$after_punctuation_characters]*$/
- and $added_word !~
/[[:upper:]][$end_sentence_character][$after_punctuation_characters]*$/) {
+ if ($paragraph->{'counter'} != 0) {
+ if (!$paragraph->{'frenchspacing'} and $paragraph->{'end_sentence'}) {
$paragraph->{'space'} = ' ';
- print STDERR "NEW_SPACE_2\n" if ($paragraph->{'debug'});
} else {
$paragraph->{'space'} = ' ';
- print STDERR "NEW_SPACE_1\n" if ($paragraph->{'debug'});
}
- } else {
- $paragraph->{'space'} = ' ';
}
+ delete $paragraph->{'end_sentence'};
if ($paragraph->{'counter'} + _string_width($paragraph->{'space'})
> $paragraph->{'max'}) {
- $result .= $paragraph->_end_line();
+ $result .= $paragraph->end_line();
}
} elsif ($text =~ s/^(\p{Unicode::EastAsianWidth::InFullwidth})//) {
my $added = $1;
@@ -194,12 +193,23 @@
if ($paragraph->{'counter'} != 0 and
$paragraph->{'counter'} + _string_width($paragraph->{'word'})
> $paragraph->{'max'}) {
- $result .= $paragraph->_end_line();
+ $result .= $paragraph->end_line();
}
$result .= $paragraph->add_pending_word();
+ delete $paragraph->{'end_sentence'};
$paragraph->{'space'} = '';
} elsif ($text =~ s/^([^\s\p{Unicode::EastAsianWidth::InFullwidth}]+)//) {
- $result .= $paragraph->add_next($1);
+ my $added_word = $1;
+ $result .= $paragraph->add_next($added_word);
+ # now check if it is considered as an end of sentence
+ if (!$paragraph->{'end_sentence'}) {
+ if ($paragraph->{'word'} =~
/[$end_sentence_character][$after_punctuation_characters]*$/
+ and $paragraph->{'word'} !~
/[[:upper:]][$end_sentence_character][$after_punctuation_characters]*$/) {
+ $paragraph->{'end_sentence'} = 1;
+ }
+ } elsif ($added_word !~ /^[$after_punctuation_characters]*$/) {
+ delete $paragraph->{'end_sentence'};
+ }
}
}
return $result;
Index: t/paragraph.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/paragraph.t,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -b -r1.1 -r1.2
--- t/paragraph.t 6 Nov 2010 13:19:37 -0000 1.1
+++ t/paragraph.t 7 Nov 2010 14:10:14 -0000 1.2
@@ -9,7 +9,7 @@
#use Test;
use Test::More;
-BEGIN { plan tests => 19 };
+BEGIN { plan tests => 33 };
use lib '../texi2html/lib/Unicode-EastAsianWidth/lib/';
#push @INC, '../texi2html/lib/Unicode-EastAsianWidth/lib/';
use Texinfo::Convert::Paragraph;
@@ -41,6 +41,9 @@
test_para(['word'], "word\n", 'word');
test_para(['word other'], "word other\n", 'two_words');
+test_para(['word '], "word\n", 'trailing spaces');
+test_para([' word'], "word\n", 'leading spaces');
+test_para([' ', ' word'], "word\n", 'double leading spaces');
test_para(['word other'], "word\nother\n", 'two_words_max', {'max' => 2});
test_para(['word other'], "word\nother\n", 'two_words_max_less_one', {'max' =>
3});
test_para(['word other'], "word\nother\n", 'two_words_max_exact', {'max' =>
4});
@@ -51,11 +54,65 @@
test_para(['word other'], "word other\n", 'two_words_two_spaces');
test_para(['word. other'], "word. other\n", 'two_words_dot');
test_para(['word. other'], "word. other\n", 'two_words_dot_one_space');
+test_para(['word.) other'], "word.) other\n",
'two_words_dot_paren_one_space');
test_para(['worD. other'], "worD. other\n", 'two_words_dot_upper');
test_para(['word','other'], "wordother\n", 'concatenate');
test_para(['word','other'], "wordother\n", 'concatenate_max', {'max' => 2});
test_para(['word ','other'], "word\nother\n", 'two_elements_max', {'max' =>
2});
+test_para(['word',' other'], "word\nother\n", 'two_elements_space_max', {'max'
=> 2});
test_para(["\x{7b2c}\x{4e00} ",'other'], "\x{7b2c}\n\x{4e00}\nother\n",
'east_asian', {'max' => 2});
-test_para(['word. other'], "word. other\n", 'two_words_dot_frenshspacing',
{'frenchspacing' => 1});
+test_para(['word. other'], "word. other\n", 'two_words_dot_frenchspacing',
{'frenchspacing' => 1});
+test_para(["aa.)\x{7b2c} b"], "aa.)\x{7b2c} b\n", 'end_sentence_east_asian');
+
+my $para = Texinfo::Convert::Paragraph->new();
+my $result = '';
+$result .= $para->wrap_next('aa.)');
+$result .= $para->add_next('_');
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ("aa.)_ after\n", $result, 'add char after end sentence');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->end_line();
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ("\nafter\n", $result, 'space after end_line');
+#print STDERR "$result";
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aa.)');
+$result .= $para->add_pending_word();
+is ('aa.)', $result, 'call add_pending_word');
+$result = $para->end_line();
+is ("\n", $result, 'call end_line after add_pending_word');
+$result = $para->end();
+is ('', $result, 'call end after end_line');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aa.)');
+$result .= $para->add_pending_word();
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ("aa.) after\n", $result, 'space after sentence and add_pending_word');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aA');
+$result .= $para->add_next('.', undef, 1);
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ("aA. after\n", $result, 'force end sentence after upper case');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aA');
+$result .= $para->wrap_next('.');
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ("aA. after\n", $result, 'end sentence after upper case');
+
1;