texinfo/tp Texinfo/Convert/Paragraph.pm t/parag...

texinfo-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
texinfo/tp Texinfo/Convert/Paragraph.pm t/parag...

From:	Patrice Dumas
Subject:	texinfo/tp Texinfo/Convert/Paragraph.pm t/parag...
Date:	Sun, 07 Nov 2010 14:10:14 +0000
CVSROOT:        /sources/texinfo
Module name:    texinfo
Changes by:     Patrice Dumas <pertusus>        10/11/07 14:10:14

Modified files:
        tp/Texinfo/Convert: Paragraph.pm 
        tp/t           : paragraph.t 

Log message:
        Fix leading space handling, and correct end sentence detection.

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Paragraph.pm?cvsroot=texinfo&r1=1.1&r2=1.2
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/paragraph.t?cvsroot=texinfo&r1=1.1&r2=1.2

Patches:
Index: Texinfo/Convert/Paragraph.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Paragraph.pm,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -b -r1.1 -r1.2
--- Texinfo/Convert/Paragraph.pm        6 Nov 2010 13:19:37 -0000       1.1
+++ Texinfo/Convert/Paragraph.pm        7 Nov 2010 14:10:14 -0000       1.2
@@ -65,7 +65,7 @@
 }
 
 # end a line.
-sub _end_line($)
+sub end_line($)
 {
   my $paragraph = shift;
   $paragraph->{'counter'} = 0;
@@ -119,11 +119,12 @@
 }
 
 # add a word and/or spaces.
-sub add_next($;$$)
+sub add_next($;$$$)
 {
   my $paragraph = shift;
   my $word = shift;
   my $space = shift;
+  my $end_sentence = shift;
   my $result = '';
 
   if (defined($word)) {
@@ -135,7 +136,7 @@
     if ($paragraph->{'counter'} != 0 and 
         $paragraph->{'counter'} + _string_width($paragraph->{'word'}) + 
            _string_width($paragraph->{'space'}) > $paragraph->{'max'}) {
-      $result .= $paragraph->_end_line();
+      $result .= $paragraph->end_line();
     }
   }
   if (defined($space)) {
@@ -143,9 +144,12 @@
     $paragraph->{'space'} = $space;
     if ($paragraph->{'counter'} + _string_width($paragraph->{'space'}) 
                     > $paragraph->{'max'}) {
-      $result .= $paragraph->_end_line();
+      $result .= $paragraph->end_line();
     }
   }
+  if ($end_sentence) {
+    $paragraph->{'end_sentence'} = 1;
+  }
   return $result;
 }
 
@@ -169,22 +173,17 @@
       print STDERR "SPACES($paragraph->{'counter'})\n" if 
($paragraph->{'debug'});
       my $added_word = $paragraph->{'word'};
       $result .= $paragraph->add_pending_word();
-      if (defined($added_word)) {
-        if (!$paragraph->{'frenchspacing'}
-             and $added_word =~ 
/[$end_sentence_character][$after_punctuation_characters]*$/
-             and $added_word !~ 
/[[:upper:]][$end_sentence_character][$after_punctuation_characters]*$/) {
+      if ($paragraph->{'counter'} != 0) {
+        if (!$paragraph->{'frenchspacing'} and $paragraph->{'end_sentence'}) {
           $paragraph->{'space'} = '  ';
-          print STDERR "NEW_SPACE_2\n" if ($paragraph->{'debug'});
         } else {
           $paragraph->{'space'} = ' ';
-          print STDERR "NEW_SPACE_1\n" if ($paragraph->{'debug'});
         }
-      } else {
-        $paragraph->{'space'} = ' ';
       }
+      delete $paragraph->{'end_sentence'};
       if ($paragraph->{'counter'} + _string_width($paragraph->{'space'}) 
                       > $paragraph->{'max'}) {
-        $result .= $paragraph->_end_line();
+        $result .= $paragraph->end_line();
       }
     } elsif ($text =~ s/^(\p{Unicode::EastAsianWidth::InFullwidth})//) {
       my $added = $1;
@@ -194,12 +193,23 @@
       if ($paragraph->{'counter'} != 0 and
           $paragraph->{'counter'} + _string_width($paragraph->{'word'}) 
                                > $paragraph->{'max'}) {
-        $result .= $paragraph->_end_line();
+        $result .= $paragraph->end_line();
       }
       $result .= $paragraph->add_pending_word();
+      delete $paragraph->{'end_sentence'};
       $paragraph->{'space'} = '';
     } elsif ($text =~ s/^([^\s\p{Unicode::EastAsianWidth::InFullwidth}]+)//) {
-      $result .= $paragraph->add_next($1);
+      my $added_word = $1;
+      $result .= $paragraph->add_next($added_word);
+      # now check if it is considered as an end of sentence
+      if (!$paragraph->{'end_sentence'}) {
+        if ($paragraph->{'word'} =~ 
/[$end_sentence_character][$after_punctuation_characters]*$/
+         and $paragraph->{'word'} !~ 
/[[:upper:]][$end_sentence_character][$after_punctuation_characters]*$/) {
+          $paragraph->{'end_sentence'} = 1;
+        }
+      } elsif ($added_word !~ /^[$after_punctuation_characters]*$/) {
+        delete $paragraph->{'end_sentence'};
+      }
     }
   }
   return $result;

Index: t/paragraph.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/paragraph.t,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -b -r1.1 -r1.2
--- t/paragraph.t       6 Nov 2010 13:19:37 -0000       1.1
+++ t/paragraph.t       7 Nov 2010 14:10:14 -0000       1.2
@@ -9,7 +9,7 @@
 
 #use Test;
 use Test::More;
-BEGIN { plan tests => 19 };
+BEGIN { plan tests => 33 };
 use lib '../texi2html/lib/Unicode-EastAsianWidth/lib/';
 #push @INC, '../texi2html/lib/Unicode-EastAsianWidth/lib/';
 use Texinfo::Convert::Paragraph;
@@ -41,6 +41,9 @@
 
 test_para(['word'], "word\n", 'word');
 test_para(['word other'], "word other\n", 'two_words');
+test_para(['word '], "word\n", 'trailing spaces');
+test_para([' word'], "word\n", 'leading spaces');
+test_para([' ', ' word'], "word\n", 'double leading spaces');
 test_para(['word other'], "word\nother\n", 'two_words_max', {'max' => 2});
 test_para(['word other'], "word\nother\n", 'two_words_max_less_one', {'max' => 
3});
 test_para(['word other'], "word\nother\n", 'two_words_max_exact', {'max' => 
4});
@@ -51,11 +54,65 @@
 test_para(['word  other'], "word other\n", 'two_words_two_spaces');
 test_para(['word.  other'], "word.  other\n", 'two_words_dot');
 test_para(['word. other'], "word.  other\n", 'two_words_dot_one_space');
+test_para(['word.) other'], "word.)  other\n", 
'two_words_dot_paren_one_space');
 test_para(['worD.  other'], "worD. other\n", 'two_words_dot_upper');
 test_para(['word','other'], "wordother\n", 'concatenate');
 test_para(['word','other'], "wordother\n", 'concatenate_max', {'max' => 2});
 test_para(['word ','other'], "word\nother\n", 'two_elements_max', {'max' => 
2});
+test_para(['word',' other'], "word\nother\n", 'two_elements_space_max', {'max' 
=> 2});
 test_para(["\x{7b2c}\x{4e00} ",'other'], "\x{7b2c}\n\x{4e00}\nother\n", 
'east_asian', {'max' => 2});
-test_para(['word.  other'], "word. other\n", 'two_words_dot_frenshspacing', 
{'frenchspacing' => 1});
+test_para(['word.  other'], "word. other\n", 'two_words_dot_frenchspacing', 
{'frenchspacing' => 1});
+test_para(["aa.)\x{7b2c} b"], "aa.)\x{7b2c} b\n", 'end_sentence_east_asian');
+
+my $para = Texinfo::Convert::Paragraph->new();
+my $result = '';
+$result .= $para->wrap_next('aa.)');
+$result .= $para->add_next('_');
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ("aa.)_  after\n", $result, 'add char after end sentence');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->end_line();
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ("\nafter\n", $result, 'space after end_line');
+#print STDERR "$result";
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aa.)');
+$result .= $para->add_pending_word();
+is ('aa.)', $result, 'call add_pending_word');
+$result = $para->end_line();
+is ("\n", $result, 'call end_line after add_pending_word');
+$result = $para->end();
+is ('', $result, 'call end after end_line');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aa.)');
+$result .= $para->add_pending_word();
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ("aa.)  after\n", $result, 'space after sentence and add_pending_word');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aA');
+$result .= $para->add_next('.', undef, 1);
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ("aA.  after\n", $result, 'force end sentence after upper case');
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->wrap_next('aA');
+$result .= $para->wrap_next('.');
+$result .= $para->wrap_next(' after');
+$result .= $para->end();
+is ("aA. after\n", $result, 'end sentence after upper case');
+
 
 1;
[Prev in Thread]
Current Thread
[Next in Thread]
texinfo/tp Texinfo/Convert/Paragraph.pm t/parag..., Patrice Dumas, 2010/11/06
- texinfo/tp Texinfo/Convert/Paragraph.pm t/parag..., Patrice Dumas <=
Prev by Date: texinfo ChangeLog doc/texinfo.txi util/htmlxref...
Next by Date: texinfo ChangeLog doc/texinfo.txi util/htmlxref...
Previous by thread: texinfo/tp Texinfo/Convert/Paragraph.pm t/parag...
Next by thread: texinfo/tp Texinfo/Convert/Paragraph.pm Texinfo...
Index(es):
- Date
- Thread