texinfo/tp TODO Texinfo/Parser.pm Texinfo/Conve...

texinfo-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
texinfo/tp TODO Texinfo/Parser.pm Texinfo/Conve...

From:	Patrice Dumas
Subject:	texinfo/tp TODO Texinfo/Parser.pm Texinfo/Conve...
Date:	Mon, 08 Nov 2010 23:50:47 +0000
CVSROOT:        /sources/texinfo
Module name:    texinfo
Changes by:     Patrice Dumas <pertusus>        10/11/08 23:50:47

Modified files:
        tp             : TODO 
        tp/Texinfo     : Parser.pm 
        tp/Texinfo/Convert: Paragraph.pm 
        tp/t           : 01use.t paragraph.t 
Added files:
        tp/Texinfo/Convert: Line.pm 

Log message:
        New code using the same interface than paragraph for line text.
        Prepare for situations of spaces protection, like in @w.

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/TODO?cvsroot=texinfo&r1=1.52&r2=1.53
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Parser.pm?cvsroot=texinfo&r1=1.143&r2=1.144
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Paragraph.pm?cvsroot=texinfo&r1=1.3&r2=1.4
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Line.pm?cvsroot=texinfo&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/01use.t?cvsroot=texinfo&r1=1.2&r2=1.3
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/paragraph.t?cvsroot=texinfo&r1=1.3&r2=1.4

Patches:
Index: TODO
===================================================================
RCS file: /sources/texinfo/texinfo/tp/TODO,v
retrieving revision 1.52
retrieving revision 1.53
diff -u -b -r1.52 -r1.53
--- TODO        1 Nov 2010 19:18:04 -0000       1.52
+++ TODO        8 Nov 2010 23:50:46 -0000       1.53
@@ -41,6 +41,10 @@
 
 use definfoenclose information in Convert::Text?
 
+
+Incorporation of texi2html tests
+--------------------------------
+
 tests in test/macros, not incorporated
 glossary.texi
 bib-example.texi (big test)

Index: Texinfo/Parser.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Parser.pm,v
retrieving revision 1.143
retrieving revision 1.144
diff -u -b -r1.143 -r1.144
--- Texinfo/Parser.pm   6 Nov 2010 00:41:28 -0000       1.143
+++ Texinfo/Parser.pm   8 Nov 2010 23:50:47 -0000       1.144
@@ -2046,7 +2046,7 @@
         # not def line
         and $self->{'context_stack'}->[-1] ne 'def') {
       print STDERR "BEGIN LINE\n" if ($self->{'debug'});
-      $line =~ s/([^\S\n]*)//;
+      $line =~ s/^([^\S\n]*)//;
       push @{$current->{'contents'}}, { 'type' => 'empty_line', 
                                         'text' => $1,
                                         'parent' => $current };

Index: Texinfo/Convert/Paragraph.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Paragraph.pm,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -b -r1.3 -r1.4
--- Texinfo/Convert/Paragraph.pm        8 Nov 2010 00:04:37 -0000       1.3
+++ Texinfo/Convert/Paragraph.pm        8 Nov 2010 23:50:47 -0000       1.4
@@ -35,7 +35,7 @@
   my $class = shift;
   my $conf = shift;
   my $self = {'max' => 72, 'indent_length' => 0, 'counter' => 0,
-              'space' => '', 'frenchspacing' => 0};
+              'word_counter' => 0, 'space' => '', 'frenchspacing' => 0};
   if (defined($conf)) {
     foreach my $key (keys(%$conf)) {
       $self->{$key} = $conf->{$key};
@@ -46,28 +46,11 @@
 
 # string fixed length size takeing into account that east asian characters
 # may take 2 spaces.
-sub _string_width($)
-{
-  my $string = shift;
-
-  if (! defined($string)) {
-    Carp::cluck();
-  } 
-  my $width = 0;
-  foreach my $character(split '', $string) {
-    if ($character =~ /\p{Unicode::EastAsianWidth::InFullwidth}/) {
-      $width += 2;
-    } else {
-      $width += 1;
-    }
-  }
-  return $width;
-}
-
 # end a line.
 sub end_line($)
 {
   my $paragraph = shift;
+  return '' if ($paragraph->{'protected_spaces'});
   $paragraph->{'counter'} = 0;
   $paragraph->{'space'} = '';
   if ($paragraph->{'indent_length_next'}) {
@@ -88,18 +71,22 @@
     if ($paragraph->{'indent_length'} > $paragraph->{'counter'}) {
       $result .= ' ' x ($paragraph->{'indent_length'} - 
$paragraph->{'counter'});
       $paragraph->{'counter'} = $paragraph->{'indent_length'};
-      print STDERR "INDENT($paragraph->{'counter'})\n" if 
($paragraph->{'debug'});
+      print STDERR 
"INDENT($paragraph->{'counter'}+$paragraph->{'word_counter'})\n" 
+                   if ($paragraph->{'debug'});
     } elsif ($paragraph->{'space'}) {
       $result .= $paragraph->{'space'};
-      $paragraph->{'counter'} += _string_width($paragraph->{'space'});
-      print STDERR "ADD_SPACES($paragraph->{'counter'})\n" if 
($paragraph->{'debug'});
+      $paragraph->{'counter'} += length($paragraph->{'space'});
+      print STDERR 
"ADD_SPACES($paragraph->{'counter'}+$paragraph->{'word_counter'})\n" 
+         if ($paragraph->{'debug'});
       
     }
     $result .= $paragraph->{'word'};
-    $paragraph->{'counter'} += _string_width($paragraph->{'word'});
-    print STDERR "ADD_WORD[$paragraph->{'word'}]($paragraph->{'counter'})\n"
+    $paragraph->{'counter'} += $paragraph->{'word_counter'};
+    print STDERR "ADD_WORD[$paragraph->{'word'}]+$paragraph->{'word_counter'} 
($paragraph->{'counter'})\n"
       if ($paragraph->{'debug'});
     $paragraph->{'word'} = undef;
+    $paragraph->{'word_counter'} = 0;
+    $paragraph->{'space'} = '';
   }
   return $result;
 }
@@ -115,6 +102,7 @@
   $paragraph->{'counter'} = 0;
   $paragraph->{'space'} = '';
   $paragraph->{'word'} = undef;
+  $paragraph->{'word_counter'} = 0;
   return $result;
 }
 
@@ -130,19 +118,20 @@
   if (defined($word)) {
     $paragraph->{'word'} = '' if (!defined($paragraph->{'word'}));
     $paragraph->{'word'} .= $word;
+    $paragraph->{'word_counter'} += length($word);
     print STDERR "WORD+ $word -> $paragraph->{'word'}\n" if 
($paragraph->{'debug'});
     # The $paragraph->{'counter'} != 0 is here to avoid having an
     # additional line output when the text is longer than the max.
     if ($paragraph->{'counter'} != 0 and 
-        $paragraph->{'counter'} + _string_width($paragraph->{'word'}) + 
-           _string_width($paragraph->{'space'}) > $paragraph->{'max'}) {
+        $paragraph->{'counter'} + $paragraph->{'word_counter'} + 
+           length($paragraph->{'space'}) > $paragraph->{'max'}) {
       $result .= $paragraph->end_line();
     }
   }
   if (defined($space)) {
     $result .= $paragraph->add_pending_word();
     $paragraph->{'space'} = $space;
-    if ($paragraph->{'counter'} + _string_width($paragraph->{'space'}) 
+    if ($paragraph->{'counter'} + length($paragraph->{'space'}) 
                     > $paragraph->{'max'}) {
       $result .= $paragraph->end_line();
     }
@@ -159,11 +148,26 @@
   $paragraph->{'end_sentence'} = 0;
 }
 
+sub set_space_protected($$)
+{
+  my $paragraph = shift;
+  my $space_protection = shift;
+  $paragraph->{'protected_spaces'} = $space_protection;
+  # flush the spaces already existing
+  if ($space_protection) {
+    my $new_space = $paragraph->{'space'};
+    $paragraph->{'counter'} += length($new_space);
+    $paragraph->{'space'} = '';
+    return $new_space;
+  }
+  return '';
+}
+
 my $end_sentence_character = quotemeta('.?!');
 my $after_punctuation_characters = quotemeta('"\')]');
 
 # wrap a text.
-sub wrap_next($$)
+sub add_text($$)
 {
   my $paragraph = shift;
   my $text = shift;
@@ -173,12 +177,23 @@
     if ($paragraph->{'debug'}) {
       my $word = 'UNDEF';
       $word = $paragraph->{'word'} if (defined($paragraph->{'word'}));
-      print STDERR "($paragraph->{'counter'}) s `$paragraph->{'space'}', w 
`$word'\n";
+      print STDERR "($paragraph->{'counter'}+$paragraph->{'word_counter'}) s 
`$paragraph->{'space'}', w `$word'\n";
     }
-    if ($text =~ s/^\s+//) {
+    if ($text =~ s/^(\s+)//) {
       print STDERR "SPACES($paragraph->{'counter'})\n" if 
($paragraph->{'debug'});
       my $added_word = $paragraph->{'word'};
       $result .= $paragraph->add_pending_word();
+      if ($paragraph->{'protected_spaces'}) {
+        $paragraph->{'space'} .= $1;
+        if ($paragraph->{'space'} =~ s/\n/ /g 
+           and !$paragraph->{'frenchspacing'} and $paragraph->{'end_sentence'}
+           and length($paragraph->{'space'}) < 2) {
+          $paragraph->{'space'} = '  ';
+        }
+        $result .= $paragraph->{'space'};
+        $paragraph->{'counter'} += length($paragraph->{'space'});
+        $paragraph->{'space'} = '';
+      } else {
       if ($paragraph->{'counter'} != 0) {
         if (!$paragraph->{'frenchspacing'} and $paragraph->{'end_sentence'}) {
           $paragraph->{'space'} = '  ';
@@ -186,8 +201,9 @@
           $paragraph->{'space'} = ' ';
         }
       }
+      }
       delete $paragraph->{'end_sentence'};
-      if ($paragraph->{'counter'} + _string_width($paragraph->{'space'}) 
+      if ($paragraph->{'counter'} + length($paragraph->{'space'}) 
                       > $paragraph->{'max'}) {
         $result .= $paragraph->end_line();
       }
@@ -196,8 +212,9 @@
       print STDERR "EAST_ASIAN\n" if ($paragraph->{'debug'});
       $paragraph->{'word'} = '' if (!defined($paragraph->{'word'}));
       $paragraph->{'word'} .= $added;
+      $paragraph->{'word_counter'} += 2;
       if ($paragraph->{'counter'} != 0 and
-          $paragraph->{'counter'} + _string_width($paragraph->{'word'}) 
+          $paragraph->{'counter'} + $paragraph->{'word_counter'} 
                                > $paragraph->{'max'}) {
         $result .= $paragraph->end_line();
       }

Index: t/01use.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/01use.t,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- t/01use.t   6 Nov 2010 00:41:28 -0000       1.2
+++ t/01use.t   8 Nov 2010 23:50:47 -0000       1.3
@@ -1,5 +1,5 @@
 # Before `make install' is performed this script should be runnable with
-# `make test'. After `make install' it should work as `perl Texinfo-Parser.t'
+# `make test'. After `make install' it should work as `perl 01use.t'
 
 #########################
 

Index: t/paragraph.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/paragraph.t,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -b -r1.3 -r1.4
--- t/paragraph.t       8 Nov 2010 00:04:37 -0000       1.3
+++ t/paragraph.t       8 Nov 2010 23:50:47 -0000       1.4
@@ -9,10 +9,11 @@
 
 #use Test;
 use Test::More;
-BEGIN { plan tests => 38 };
+BEGIN { plan tests => 82 };
 use lib '../texi2html/lib/Unicode-EastAsianWidth/lib/';
 #push @INC, '../texi2html/lib/Unicode-EastAsianWidth/lib/';
 use Texinfo::Convert::Paragraph;
+use Texinfo::Convert::Line;
 ok(1, "modules loading"); # If we made it this far, we're ok.
 
 #########################
@@ -32,7 +33,7 @@
   $conf = {} if (!defined($conf));
   my $para = Texinfo::Convert::Paragraph->new($conf);
   foreach my $arg (@$args) {
-    $result .= $para->wrap_next($arg);
+    $result .= $para->add_text($arg);
   }
   $result .= $para->end();
   is ($result, $reference, $name);
@@ -41,6 +42,8 @@
 
 test_para(['word'], "word\n", 'word');
 test_para(['word other'], "word other\n", 'two_words');
+test_para(['word  other'], "word other\n", 'two_words two spaces');
+test_para(['word ', ' other'], "word other\n", 'two_words feed, space inside');
 test_para(['word '], "word\n", 'trailing spaces');
 test_para([' word'], "word\n", 'leading spaces');
 test_para([' ', ' word'], "word\n", 'double leading spaces');
@@ -66,23 +69,23 @@
 
 my $para = Texinfo::Convert::Paragraph->new();
 my $result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
 $result .= $para->add_next('_');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
 $result .= $para->end();
 is ($result, "aa.)_  after\n", 'add char after end sentence');
 
 $para = Texinfo::Convert::Paragraph->new();
 $result = '';
 $result .= $para->end_line();
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
 $result .= $para->end();
 is ($result, "\nafter\n", 'space after end_line');
 #print STDERR "$result";
 
 $para = Texinfo::Convert::Paragraph->new();
 $result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
 $result .= $para->add_pending_word();
 is ($result, 'aa.)', 'call add_pending_word');
 $result = $para->end_line();
@@ -92,71 +95,301 @@
 
 $para = Texinfo::Convert::Paragraph->new();
 $result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
 $result .= $para->add_pending_word();
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
 $result .= $para->end();
 is ($result, "aa.)  after\n", 'space after sentence and add_pending_word');
 
 $para = Texinfo::Convert::Paragraph->new();
 $result = '';
-$result .= $para->wrap_next('aA');
+$result .= $para->add_text('aA');
 $result .= $para->add_next('.', undef, 1);
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
 $result .= $para->end();
 is ($result, "aA.  after\n", 'force end sentence after upper case');
 
 $para = Texinfo::Convert::Paragraph->new();
 $result = '';
-$result .= $para->wrap_next('aA');
-$result .= $para->wrap_next('.');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text('aA');
+$result .= $para->add_text('.');
+$result .= $para->add_text(' after');
 $result .= $para->end();
 is ($result, "aA. after\n", 'end sentence after upper case');
 
 $para = Texinfo::Convert::Paragraph->new();
 $result = '';
-$result .= $para->wrap_next('aa.)');
-$result .= $para->wrap_next('))');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text('aa.)');
+$result .= $para->add_text('))');
+$result .= $para->add_text(' after');
 $result .= $para->end();
 is ($result, "aa.)))  after\n", 'continue with after_punctuation_characters');
 
 $para = Texinfo::Convert::Paragraph->new();
 $result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
 $para->inhibit_end_sentence();
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
 $result .= $para->end();
 is ($result, "aa.) after\n", 'inhibit end sentence');
 
 $para = Texinfo::Convert::Paragraph->new();
 $result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
 $para->inhibit_end_sentence();
 $result .= $para->add_next('_');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
 $result .= $para->end();
 is ($result, "aa.)_ after\n", 'inhibit end sentence then add next');
 
 $para = Texinfo::Convert::Paragraph->new();
 $result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
 $para->inhibit_end_sentence();
-$result .= $para->wrap_next('aa.)');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text('aa.)');
+$result .= $para->add_text(' after');
 $result .= $para->end();
 is ($result, "aa.)aa.)  after\n", 'cancel inhibit end sentence');
 
 $para = Texinfo::Convert::Paragraph->new();
 $result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
 $para->inhibit_end_sentence();
-$result .= $para->wrap_next('))');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text('))');
+$result .= $para->add_text(' after');
 $result .= $para->end();
 is ($result, "aa.))) after\n", 'inhibit end sentence and ))');
 
+$para = Texinfo::Convert::Paragraph->new({'max' => 2});
+$result = '';
+$para->set_space_protected(1);
+$result .= $para->add_text('aa.)    bb ');
+is ($result, 'aa.)    bb ', 'space protected 2 words');
+$result = $para->add_text(' eee ');
+is ($result, ' eee ', 'space protected more word');
+$result = $para->add_text('   .)');
+$result .= $para->add_next('_');
+$result .= $para->add_text("\n");
+is ($result, '   .)_  ', 'space protected and end of sentence and line added');
+$result = $para->add_text("aa\n");
+is ($result, 'aa ', 'protected space after end of line');
+$result = $para->add_text(' . gg');
+is ($result, ' . ', 'protected space with dot inside');
+$result = $para->add_text(". \n");
+$result .= $para->add_text("a");
+is ($result, 'gg.  ', 'protected space end line with already a space');
+$result = $para->add_text('  ');
+$para->set_space_protected(0);
+$result .= $para->add_text("c ");
+is ($result, "a  \nc", 'end protected space end of line');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->add_text('aa');
+$para->set_space_protected(1);
+$result .= $para->add_text('  f  f');
+$para->set_space_protected(0);
+$result .= $para->add_text("ggg\n");
+is ($result, 'aa  f  fggg', 'protected space within words');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa');
+$para->set_space_protected(1);
+$result .= $para->add_text('  f  f ');
+$para->set_space_protected(0);
+$result .= $para->add_text("ggg\n");
+is ($result, 'aa  f  f ggg', 'protected space and space within words');
+$para->end();
+
 $para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa ');
+$result .= $para->set_space_protected(1);
+$result .= $para->add_text('  f  f ');
+$para->set_space_protected(0);
+$result .= $para->add_text("ggg\n");
+is ($result, 'aa   f  f ggg', 'text space protected space and space within 
words');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa ');
+$result .= $para->set_space_protected(1);
+$result .= $para->add_text('  f  f ');
+$para->set_space_protected(0);
+$result .= $para->add_text(" ggg\n");
+is ($result, 'aa   f  f  ggg', 'text space protected space and space after');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa ');
+$result .= $para->set_space_protected(1);
+$para->set_space_protected(0);
+$result .= $para->add_text(" ggg\n");
+is ($result, 'aa  ggg', 'empty protected 2 space');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa ');
+$result .= $para->set_space_protected(1);
+$para->set_space_protected(0);
+$result .= $para->add_text("ggg\n");
+is ($result, 'aa ggg', 'empty protected 1 before space');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa');
+$result .= $para->set_space_protected(1);
+$para->set_space_protected(0);
+$result .= $para->add_text(" ggg\n");
+is ($result, 'aa ggg', 'empty protected 1 after space');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa ');
+$result .= $para->set_space_protected(1);
+$result .= $para->add_text(' ');
+$para->set_space_protected(0);
+$result .= $para->add_text(" ggg\n");
+is ($result, 'aa   ggg', 'space protected space');
+$para->end();
+
+
+#print STDERR "$result";
+#exit;
+
+sub test_line($$$;$)
+{
+  my $args = shift;
+  my $reference = shift;
+  my $name = shift;
+  my $conf = shift;
+
+  my $result = '';
+  #$conf = {'debug' => 1} if (!defined($conf));
+  $conf = {} if (!defined($conf));
+  my $line = Texinfo::Convert::Line->new($conf);
+  foreach my $arg (@$args) {
+    $result .= $line->add_text($arg);
+  }
+  $result .= $line->end();
+  is ($result, $reference, "line $name");
+  #print STDERR "$result\n";
+}
+
+test_line(['word'], "word\n", 'word');
+test_line(['word other'], "word other\n", 'two_words');
+test_line(['word '], "word\n", 'trailing spaces');
+test_line([' word'], "word\n", 'leading spaces');
+test_line([' ', ' word'], "word\n", 'double leading spaces');
+test_line(['word  other'], "word other\n", 'two_words_two_spaces');
+test_line(['word.  other'], "word.  other\n", 'two_words_dot');
+test_line(['word. other'], "word.  other\n", 'two_words_dot_one_space');
+test_line(['word.) other'], "word.)  other\n", 
'two_words_dot_paren_one_space');
+test_line(['worD.  other'], "worD. other\n", 'two_words_dot_upper');
+test_line(['word','other'], "wordother\n", 'concatenate');
+test_line(["\x{7b2c}\x{4e00} ",'other'], "\x{7b2c}\x{4e00} other\n", 
'east_asian');
+test_line(['word.  other'], "word. other\n", 'two_words_dot_frenchspacing', 
{'frenchspacing' => 1});
+test_line(["aa.)\x{7b2c} b"], "aa.)\x{7b2c} b\n", 'end_sentence_east_asian');
+
+my $line = Texinfo::Convert::Line->new();
+my $result = '';
+$result .= $line->add_text('aa.)');
+$result .= $line->add_next('_');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.)_  after\n", 'line add char after end sentence');
+
+$para = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->end_line();
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "\nafter\n", 'line space after end_line');
+#print STDERR "$result";
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$result .= $line->add_pending_word();
+is ($result, 'aa.)', 'line call add_pending_word');
+$result = $line->end_line();
+is ($result, "\n", 'line call end_line after add_pending_word');
+$result = $line->end();
+is ($result, "\n", 'line call end after end_line');
+
+$para = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$result .= $line->add_pending_word();
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.)  after\n", 'line space after sentence and 
add_pending_word');
+
+$para = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aA');
+$result .= $line->add_next('.', undef, 1);
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aA.  after\n", 'line force end sentence after upper case');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aA');
+$result .= $line->add_text('.');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aA. after\n", 'line end sentence after upper case');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$result .= $line->add_text('))');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.)))  after\n", 'line continue with 
after_punctuation_characters');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$line->inhibit_end_sentence();
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.) after\n", 'line inhibit end sentence');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$line->inhibit_end_sentence();
+$result .= $line->add_next('_');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.)_ after\n", 'line inhibit end sentence then add next');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$line->inhibit_end_sentence();
+$result .= $line->add_text('aa.)');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.)aa.)  after\n", 'line cancel inhibit end sentence');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$line->inhibit_end_sentence();
+$result .= $line->add_text('))');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.))) after\n", 'line inhibit end sentence and ))');
 
 1;

Index: Texinfo/Convert/Line.pm
===================================================================
RCS file: Texinfo/Convert/Line.pm
diff -N Texinfo/Convert/Line.pm
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ Texinfo/Convert/Line.pm     8 Nov 2010 23:50:47 -0000       1.1
@@ -0,0 +1,196 @@
+# Line.pm: handle line of text.
+#
+# Copyright 2010 Free Software Foundation, Inc.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# 
+# Original author: Patrice Dumas <address@hidden>
+
+# this module has nothing Texinfo specific.  It is similar with 
+# Texinfo::Convert::Paragraph, but simpler.
+# It could be even simpler: there is no need to delay outputting a word.
+
+package Texinfo::Convert::Line;
+
+use 5.006;
+use strict;
+
+use Unicode::EastAsianWidth;
+use Carp qw(cluck);
+
+# initialize a paragraph object.
+sub new($;$)
+{
+  my $class = shift;
+  my $conf = shift;
+  my $self = {'max' => 72, 'indent_length' => 0, 'counter' => 0,
+              'space' => '', 'frenchspacing' => 0, 'line_beginning' => 1};
+  if (defined($conf)) {
+    foreach my $key (keys(%$conf)) {
+      if ($key eq 'text') {
+        $self->{'counter'} = _string_width($conf->{$key});
+        $self->{'line_beginning'} = 0 if ($self->{'counter'});
+      } else {
+        $self->{$key} = $conf->{$key};
+      }
+    }
+  }
+  bless $self, $class;
+}
+
+# string fixed length size takeing into account that east asian characters
+# may take 2 spaces.
+sub _string_width($)
+{
+  my $string = shift;
+
+  if (! defined($string)) {
+    Carp::cluck();
+  } 
+  my $width = 0;
+  foreach my $character(split '', $string) {
+    if ($character =~ /\p{Unicode::EastAsianWidth::InFullwidth}/) {
+      $width += 2;
+    } else {
+      $width += 1;
+    }
+  }
+  return $width;
+}
+
+# end a line.
+sub end_line($)
+{
+  my $line = shift;
+  my $result = $line->add_pending_word();
+  $line->{'line_beginning'} = 1;
+  $line->{'space'} = '';
+  print STDERR "END_LINE\n" if ($line->{'debug'});
+  return "$result\n";
+}
+
+# put a pending word and spaces in the result string.
+sub add_pending_word($)
+{
+  my $line = shift;
+  my $result = '';
+
+  if (defined($line->{'word'})) {
+    if ($line->{'line_beginning'}) {
+      if ($line->{'indent_length'}) {
+        $result .= ' ' x ($line->{'indent_length'} - $line->{'counter'});
+        print STDERR "INDENT($line->{'counter'})\n" if ($line->{'debug'});
+      }
+      $line->{'line_beginning'} = 0;
+    } elsif ($line->{'space'}) {
+      $result .= $line->{'space'};
+      print STDERR "ADD_SPACES\n" if ($line->{'debug'});
+    }
+    $result .= $line->{'word'};
+    print STDERR "ADD_WORD[$line->{'word'}]\n" if ($line->{'debug'});
+    $line->{'word'} = undef;
+  }
+  return $result;
+}
+
+# end a paragraph
+sub end($)
+{
+  my $line = shift;
+  return $line->end_line();
+}
+
+# add a word and/or spaces and end of sentence.
+sub add_next($;$$$)
+{
+  my $line = shift;
+  my $word = shift;
+  my $space = shift;
+  my $end_sentence = shift;
+  my $result = '';
+
+  if (defined($word)) {
+    $line->{'word'} = '' if (!defined($line->{'word'}));
+    $line->{'word'} .= $word;
+    print STDERR "WORD+ $word -> $line->{'word'}\n" if ($line->{'debug'});
+  }
+  if (defined($space)) {
+    $result .= $line->add_pending_word();
+    $line->{'space'} = $space;
+  }
+  if (defined($end_sentence)) {
+    $line->{'end_sentence'} = $end_sentence;
+  }
+  return $result;
+}
+
+sub inhibit_end_sentence($)
+{
+  my $line = shift;
+  $line->{'end_sentence'} = 0;
+}
+
+my $end_sentence_character = quotemeta('.?!');
+my $after_punctuation_characters = quotemeta('"\')]');
+
+# wrap a text.
+sub add_text($$)
+{
+  my $line = shift;
+  my $text = shift;
+  my $result = '';
+
+  while ($text ne '') {
+    if ($line->{'debug'}) {
+      my $word = 'UNDEF';
+      $word = $line->{'word'} if (defined($line->{'word'}));
+      print STDERR "s `$line->{'space'}', w `$word'\n";
+    }
+    if ($text =~ s/^\s+//) {
+      print STDERR "SPACES\n" if ($line->{'debug'});
+      my $added_word = $line->{'word'};
+      $result .= $line->add_pending_word();
+      if (!$line->{'begin'}) {
+        if (!$line->{'frenchspacing'} and $line->{'end_sentence'}) {
+          $line->{'space'} = '  ';
+        } else {
+          $line->{'space'} = ' ';
+        }
+      }
+      delete $line->{'end_sentence'};
+    } elsif ($text =~ s/^([^\s\p{Unicode::EastAsianWidth::InFullwidth}]+)//) {
+      my $added_word = $1;
+      $result .= $line->add_next($added_word);
+      # now check if it is considered as an end of sentence
+      if (defined($line->{'end_sentence'}) and 
+        $added_word =~ /^[$after_punctuation_characters]*$/) {
+        # do nothing in the case of a continuation of 
after_punctuation_characters
+      } elsif ($line->{'word'} =~ 
/[$end_sentence_character][$after_punctuation_characters]*$/
+           and $line->{'word'} !~ 
/[[:upper:]][$end_sentence_character][$after_punctuation_characters]*$/) {
+        $line->{'end_sentence'} = 1;
+      }
+    } elsif ($text =~ s/^(\p{Unicode::EastAsianWidth::InFullwidth})//) {
+      my $added = $1;
+      print STDERR "EAST_ASIAN\n" if ($line->{'debug'});
+      $line->{'word'} = '' if (!defined($line->{'word'}));
+      $line->{'word'} .= $added;
+      $result .= $line->add_pending_word();
+      delete $line->{'end_sentence'};
+      $line->{'space'} = '';
+    }
+  }
+  return $result;
+}
+
+1;
[Prev in Thread]
Current Thread
[Next in Thread]
texinfo/tp TODO Texinfo/Parser.pm Texinfo/Conve..., Patrice Dumas <=
- texinfo/tp TODO Texinfo/Parser.pm Texinfo/Conve..., Patrice Dumas, 2010/11/26
Prev by Date: texinfo ChangeLog doc/texinfo.txi util/htmlxref...
Next by Date: texinfo ChangeLog doc/texinfo.txi util/htmlxref...
Previous by thread: texinfo/doc texinfo.txi
Next by thread: texinfo/tp TODO Texinfo/Parser.pm Texinfo/Conve...
Index(es):
- Date
- Thread