texinfo/tp Texinfo/Convert/Paragraph.pm t/parag...

texinfo-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
texinfo/tp Texinfo/Convert/Paragraph.pm t/parag...

From:	Patrice Dumas
Subject:	texinfo/tp Texinfo/Convert/Paragraph.pm t/parag...
Date:	Sat, 06 Nov 2010 13:19:37 +0000
CVSROOT:        /sources/texinfo
Module name:    texinfo
Changes by:     Patrice Dumas <pertusus>        10/11/06 13:19:37

Added files:
        tp/Texinfo/Convert: Paragraph.pm 
        tp/t           : paragraph.t 

Log message:
        Add a paragraph class to handle paragraph text.

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Paragraph.pm?cvsroot=texinfo&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/paragraph.t?cvsroot=texinfo&rev=1.1

Patches:
Index: Texinfo/Convert/Paragraph.pm
===================================================================
RCS file: Texinfo/Convert/Paragraph.pm
diff -N Texinfo/Convert/Paragraph.pm
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ Texinfo/Convert/Paragraph.pm        6 Nov 2010 13:19:37 -0000       1.1
@@ -0,0 +1,208 @@
+# Paragraph.pm: handle paragraph text.
+#
+# Copyright 2010 Free Software Foundation, Inc.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# 
+# Original author: Patrice Dumas <address@hidden>
+
+# this module has nothing Texinfo specific.  In contrast with existing
+# modules Text::Wrap, Text::Format, it keeps a state of the paragraph 
+# and wait for text to be feed.
+
+package Texinfo::Convert::Paragraph;
+
+use 5.006;
+use strict;
+
+use Unicode::EastAsianWidth;
+use Carp qw(cluck);
+
+# initialize a paragraph object.
+sub new($;$)
+{
+  my $class = shift;
+  my $conf = shift;
+  my $self = {'max' => 72, 'indent_length' => 0, 'counter' => 0,
+              'space' => '', 'frenchspacing' => 0};
+  if (defined($conf)) {
+    foreach my $key (keys(%$conf)) {
+      $self->{$key} = $conf->{$key};
+    }
+  }
+  bless $self, $class;
+}
+
+# string fixed length size takeing into account that east asian characters
+# may take 2 spaces.
+sub _string_width($)
+{
+  my $string = shift;
+
+  if (! defined($string)) {
+    Carp::cluck();
+  } 
+  my $width = 0;
+  foreach my $character(split '', $string) {
+    if ($character =~ /\p{Unicode::EastAsianWidth::InFullwidth}/) {
+      $width += 2;
+    } else {
+      $width += 1;
+    }
+  }
+  return $width;
+}
+
+# end a line.
+sub _end_line($)
+{
+  my $paragraph = shift;
+  $paragraph->{'counter'} = 0;
+  $paragraph->{'space'} = '';
+  if ($paragraph->{'indent_length_next'}) {
+    $paragraph->{'indent_length'} = $paragraph->{'indent_length_next'};
+    delete $paragraph->{'indent_length_next'};        
+  }
+  print STDERR "END_LINE\n" if ($paragraph->{'debug'});
+  return "\n";
+}
+
+# put a pending word and spaces in the result string.
+sub add_pending_word($)
+{
+  my $paragraph = shift;
+  my $result = '';
+
+  if (defined($paragraph->{'word'})) {
+    if ($paragraph->{'indent_length'} > $paragraph->{'counter'}) {
+      $result .= ' ' x ($paragraph->{'indent_length'} - 
$paragraph->{'counter'});
+      $paragraph->{'counter'} = $paragraph->{'indent_length'};
+      print STDERR "INDENT($paragraph->{'counter'})\n" if 
($paragraph->{'debug'});
+    } elsif ($paragraph->{'space'}) {
+      $result .= $paragraph->{'space'};
+      $paragraph->{'counter'} += _string_width($paragraph->{'space'});
+      print STDERR "ADD_SPACES($paragraph->{'counter'})\n" if 
($paragraph->{'debug'});
+      
+    }
+    $result .= $paragraph->{'word'};
+    $paragraph->{'counter'} += _string_width($paragraph->{'word'});
+    print STDERR "ADD_WORD[$paragraph->{'word'}]($paragraph->{'counter'})\n"
+      if ($paragraph->{'debug'});
+    $paragraph->{'word'} = undef;
+  }
+  return $result;
+}
+
+# end a paragraph
+sub end($)
+{
+  my $paragraph = shift;
+  print STDERR "PARA END\n" if ($paragraph->{'debug'});
+  my $result = $paragraph->add_pending_word();
+  $result .= "\n" if ($paragraph->{'counter'} != 0);
+  # This is only useful if the paragraph is reused.
+  $paragraph->{'counter'} = 0;
+  $paragraph->{'space'} = '';
+  $paragraph->{'word'} = undef;
+  return $result;
+}
+
+# add a word and/or spaces.
+sub add_next($;$$)
+{
+  my $paragraph = shift;
+  my $word = shift;
+  my $space = shift;
+  my $result = '';
+
+  if (defined($word)) {
+    $paragraph->{'word'} = '' if (!defined($paragraph->{'word'}));
+    $paragraph->{'word'} .= $word;
+    print STDERR "WORD+ $word -> $paragraph->{'word'}\n" if 
($paragraph->{'debug'});
+    # The $paragraph->{'counter'} != 0 is here to avoid having an
+    # additional line output when the text is longer than the max.
+    if ($paragraph->{'counter'} != 0 and 
+        $paragraph->{'counter'} + _string_width($paragraph->{'word'}) + 
+           _string_width($paragraph->{'space'}) > $paragraph->{'max'}) {
+      $result .= $paragraph->_end_line();
+    }
+  }
+  if (defined($space)) {
+    $result .= $paragraph->add_pending_word();
+    $paragraph->{'space'} = $space;
+    if ($paragraph->{'counter'} + _string_width($paragraph->{'space'}) 
+                    > $paragraph->{'max'}) {
+      $result .= $paragraph->_end_line();
+    }
+  }
+  return $result;
+}
+
+my $end_sentence_character = quotemeta('.?!');
+my $after_punctuation_characters = quotemeta('"\')]');
+
+# wrap a text.
+sub wrap_next($$)
+{
+  my $paragraph = shift;
+  my $text = shift;
+  my $result = '';
+
+  while ($text ne '') {
+    if ($paragraph->{'debug'}) {
+      my $word = 'UNDEF';
+      $word = $paragraph->{'word'} if (defined($paragraph->{'word'}));
+      print STDERR "($paragraph->{'counter'}) s `$paragraph->{'space'}', w 
`$word'\n";
+    }
+    if ($text =~ s/^\s+//) {
+      print STDERR "SPACES($paragraph->{'counter'})\n" if 
($paragraph->{'debug'});
+      my $added_word = $paragraph->{'word'};
+      $result .= $paragraph->add_pending_word();
+      if (defined($added_word)) {
+        if (!$paragraph->{'frenchspacing'}
+             and $added_word =~ 
/[$end_sentence_character][$after_punctuation_characters]*$/
+             and $added_word !~ 
/[[:upper:]][$end_sentence_character][$after_punctuation_characters]*$/) {
+          $paragraph->{'space'} = '  ';
+          print STDERR "NEW_SPACE_2\n" if ($paragraph->{'debug'});
+        } else {
+          $paragraph->{'space'} = ' ';
+          print STDERR "NEW_SPACE_1\n" if ($paragraph->{'debug'});
+        }
+      } else {
+        $paragraph->{'space'} = ' ';
+      }
+      if ($paragraph->{'counter'} + _string_width($paragraph->{'space'}) 
+                      > $paragraph->{'max'}) {
+        $result .= $paragraph->_end_line();
+      }
+    } elsif ($text =~ s/^(\p{Unicode::EastAsianWidth::InFullwidth})//) {
+      my $added = $1;
+      print STDERR "EAST_ASIAN\n" if ($paragraph->{'debug'});
+      $paragraph->{'word'} = '' if (!defined($paragraph->{'word'}));
+      $paragraph->{'word'} .= $added;
+      if ($paragraph->{'counter'} != 0 and
+          $paragraph->{'counter'} + _string_width($paragraph->{'word'}) 
+                               > $paragraph->{'max'}) {
+        $result .= $paragraph->_end_line();
+      }
+      $result .= $paragraph->add_pending_word();
+      $paragraph->{'space'} = '';
+    } elsif ($text =~ s/^([^\s\p{Unicode::EastAsianWidth::InFullwidth}]+)//) {
+      $result .= $paragraph->add_next($1);
+    }
+  }
+  return $result;
+}
+
+1;

Index: t/paragraph.t
===================================================================
RCS file: t/paragraph.t
diff -N t/paragraph.t
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ t/paragraph.t       6 Nov 2010 13:19:37 -0000       1.1
@@ -0,0 +1,61 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl Texinfo-Parser.t'
+
+#########################
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use strict;
+
+#use Test;
+use Test::More;
+BEGIN { plan tests => 19 };
+use lib '../texi2html/lib/Unicode-EastAsianWidth/lib/';
+#push @INC, '../texi2html/lib/Unicode-EastAsianWidth/lib/';
+use Texinfo::Convert::Paragraph;
+ok(1, "modules loading"); # If we made it this far, we're ok.
+
+#########################
+
+# Insert your test code below, the Test::More module is use()ed here so read
+# its man page ( perldoc Test::More ) for help writing this test script.
+
+sub test_para($$$;$)
+{
+  my $args = shift;
+  my $reference = shift;
+  my $name = shift;
+  my $conf = shift;
+
+  my $result = '';
+  #$conf = {'debug' => 1} if (!defined($conf));
+  $conf = {} if (!defined($conf));
+  my $para = Texinfo::Convert::Paragraph->new($conf);
+  foreach my $arg (@$args) {
+    $result .= $para->wrap_next($arg);
+  }
+  $result .= $para->end();
+  is ($result, $reference, $name);
+  #print STDERR "$result\n";
+}
+
+test_para(['word'], "word\n", 'word');
+test_para(['word other'], "word other\n", 'two_words');
+test_para(['word other'], "word\nother\n", 'two_words_max', {'max' => 2});
+test_para(['word other'], "word\nother\n", 'two_words_max_less_one', {'max' => 
3});
+test_para(['word other'], "word\nother\n", 'two_words_max_exact', {'max' => 
4});
+test_para(['word other'], "word\nother\n", 'two_words_max_plus_one', {'max' => 
5});
+test_para(['word other'], "word\nother\n", 'two_words_max_plus_two', {'max' => 
6});
+test_para(['word o'], "word\no\n", 'word_letter_max_exact', {'max' => 5});
+test_para(['word o'], "word o\n", 'word_letter_max_plus_two', {'max' => 6});
+test_para(['word  other'], "word other\n", 'two_words_two_spaces');
+test_para(['word.  other'], "word.  other\n", 'two_words_dot');
+test_para(['word. other'], "word.  other\n", 'two_words_dot_one_space');
+test_para(['worD.  other'], "worD. other\n", 'two_words_dot_upper');
+test_para(['word','other'], "wordother\n", 'concatenate');
+test_para(['word','other'], "wordother\n", 'concatenate_max', {'max' => 2});
+test_para(['word ','other'], "word\nother\n", 'two_elements_max', {'max' => 
2});
+test_para(["\x{7b2c}\x{4e00} ",'other'], "\x{7b2c}\n\x{4e00}\nother\n", 
'east_asian', {'max' => 2});
+test_para(['word.  other'], "word. other\n", 'two_words_dot_frenshspacing', 
{'frenchspacing' => 1});
+
+1;
[Prev in Thread]
Current Thread
[Next in Thread]
texinfo/tp Texinfo/Convert/Paragraph.pm t/parag..., Patrice Dumas <=
- texinfo/tp Texinfo/Convert/Paragraph.pm t/parag..., Patrice Dumas, 2010/11/07
Prev by Date: texinfo/tp Texinfo/Convert/NodeNameNormalizatio...
Next by Date: texinfo ChangeLog doc/texinfo.txi util/htmlxref...
Previous by thread: texinfo/tp Texinfo/Convert/NodeNameNormalizatio...
Next by thread: texinfo/tp Texinfo/Convert/Paragraph.pm t/parag...
Index(es):
- Date
- Thread