[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
texinfo/tp Texinfo/Convert/Paragraph.pm t/parag...
From: |
Patrice Dumas |
Subject: |
texinfo/tp Texinfo/Convert/Paragraph.pm t/parag... |
Date: |
Sat, 06 Nov 2010 13:19:37 +0000 |
CVSROOT: /sources/texinfo
Module name: texinfo
Changes by: Patrice Dumas <pertusus> 10/11/06 13:19:37
Added files:
tp/Texinfo/Convert: Paragraph.pm
tp/t : paragraph.t
Log message:
Add a paragraph class to handle paragraph text.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Paragraph.pm?cvsroot=texinfo&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/paragraph.t?cvsroot=texinfo&rev=1.1
Patches:
Index: Texinfo/Convert/Paragraph.pm
===================================================================
RCS file: Texinfo/Convert/Paragraph.pm
diff -N Texinfo/Convert/Paragraph.pm
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ Texinfo/Convert/Paragraph.pm 6 Nov 2010 13:19:37 -0000 1.1
@@ -0,0 +1,208 @@
+# Paragraph.pm: handle paragraph text.
+#
+# Copyright 2010 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Original author: Patrice Dumas <address@hidden>
+
+# this module has nothing Texinfo specific. In contrast with existing
+# modules Text::Wrap, Text::Format, it keeps a state of the paragraph
+# and wait for text to be feed.
+
+package Texinfo::Convert::Paragraph;
+
+use 5.006;
+use strict;
+
+use Unicode::EastAsianWidth;
+use Carp qw(cluck);
+
+# initialize a paragraph object.
+sub new($;$)
+{
+ my $class = shift;
+ my $conf = shift;
+ my $self = {'max' => 72, 'indent_length' => 0, 'counter' => 0,
+ 'space' => '', 'frenchspacing' => 0};
+ if (defined($conf)) {
+ foreach my $key (keys(%$conf)) {
+ $self->{$key} = $conf->{$key};
+ }
+ }
+ bless $self, $class;
+}
+
+# string fixed length size takeing into account that east asian characters
+# may take 2 spaces.
+sub _string_width($)
+{
+ my $string = shift;
+
+ if (! defined($string)) {
+ Carp::cluck();
+ }
+ my $width = 0;
+ foreach my $character(split '', $string) {
+ if ($character =~ /\p{Unicode::EastAsianWidth::InFullwidth}/) {
+ $width += 2;
+ } else {
+ $width += 1;
+ }
+ }
+ return $width;
+}
+
+# end a line.
+sub _end_line($)
+{
+ my $paragraph = shift;
+ $paragraph->{'counter'} = 0;
+ $paragraph->{'space'} = '';
+ if ($paragraph->{'indent_length_next'}) {
+ $paragraph->{'indent_length'} = $paragraph->{'indent_length_next'};
+ delete $paragraph->{'indent_length_next'};
+ }
+ print STDERR "END_LINE\n" if ($paragraph->{'debug'});
+ return "\n";
+}
+
+# put a pending word and spaces in the result string.
+sub add_pending_word($)
+{
+ my $paragraph = shift;
+ my $result = '';
+
+ if (defined($paragraph->{'word'})) {
+ if ($paragraph->{'indent_length'} > $paragraph->{'counter'}) {
+ $result .= ' ' x ($paragraph->{'indent_length'} -
$paragraph->{'counter'});
+ $paragraph->{'counter'} = $paragraph->{'indent_length'};
+ print STDERR "INDENT($paragraph->{'counter'})\n" if
($paragraph->{'debug'});
+ } elsif ($paragraph->{'space'}) {
+ $result .= $paragraph->{'space'};
+ $paragraph->{'counter'} += _string_width($paragraph->{'space'});
+ print STDERR "ADD_SPACES($paragraph->{'counter'})\n" if
($paragraph->{'debug'});
+
+ }
+ $result .= $paragraph->{'word'};
+ $paragraph->{'counter'} += _string_width($paragraph->{'word'});
+ print STDERR "ADD_WORD[$paragraph->{'word'}]($paragraph->{'counter'})\n"
+ if ($paragraph->{'debug'});
+ $paragraph->{'word'} = undef;
+ }
+ return $result;
+}
+
+# end a paragraph
+sub end($)
+{
+ my $paragraph = shift;
+ print STDERR "PARA END\n" if ($paragraph->{'debug'});
+ my $result = $paragraph->add_pending_word();
+ $result .= "\n" if ($paragraph->{'counter'} != 0);
+ # This is only useful if the paragraph is reused.
+ $paragraph->{'counter'} = 0;
+ $paragraph->{'space'} = '';
+ $paragraph->{'word'} = undef;
+ return $result;
+}
+
+# add a word and/or spaces.
+sub add_next($;$$)
+{
+ my $paragraph = shift;
+ my $word = shift;
+ my $space = shift;
+ my $result = '';
+
+ if (defined($word)) {
+ $paragraph->{'word'} = '' if (!defined($paragraph->{'word'}));
+ $paragraph->{'word'} .= $word;
+ print STDERR "WORD+ $word -> $paragraph->{'word'}\n" if
($paragraph->{'debug'});
+ # The $paragraph->{'counter'} != 0 is here to avoid having an
+ # additional line output when the text is longer than the max.
+ if ($paragraph->{'counter'} != 0 and
+ $paragraph->{'counter'} + _string_width($paragraph->{'word'}) +
+ _string_width($paragraph->{'space'}) > $paragraph->{'max'}) {
+ $result .= $paragraph->_end_line();
+ }
+ }
+ if (defined($space)) {
+ $result .= $paragraph->add_pending_word();
+ $paragraph->{'space'} = $space;
+ if ($paragraph->{'counter'} + _string_width($paragraph->{'space'})
+ > $paragraph->{'max'}) {
+ $result .= $paragraph->_end_line();
+ }
+ }
+ return $result;
+}
+
+my $end_sentence_character = quotemeta('.?!');
+my $after_punctuation_characters = quotemeta('"\')]');
+
+# wrap a text.
+sub wrap_next($$)
+{
+ my $paragraph = shift;
+ my $text = shift;
+ my $result = '';
+
+ while ($text ne '') {
+ if ($paragraph->{'debug'}) {
+ my $word = 'UNDEF';
+ $word = $paragraph->{'word'} if (defined($paragraph->{'word'}));
+ print STDERR "($paragraph->{'counter'}) s `$paragraph->{'space'}', w
`$word'\n";
+ }
+ if ($text =~ s/^\s+//) {
+ print STDERR "SPACES($paragraph->{'counter'})\n" if
($paragraph->{'debug'});
+ my $added_word = $paragraph->{'word'};
+ $result .= $paragraph->add_pending_word();
+ if (defined($added_word)) {
+ if (!$paragraph->{'frenchspacing'}
+ and $added_word =~
/[$end_sentence_character][$after_punctuation_characters]*$/
+ and $added_word !~
/[[:upper:]][$end_sentence_character][$after_punctuation_characters]*$/) {
+ $paragraph->{'space'} = ' ';
+ print STDERR "NEW_SPACE_2\n" if ($paragraph->{'debug'});
+ } else {
+ $paragraph->{'space'} = ' ';
+ print STDERR "NEW_SPACE_1\n" if ($paragraph->{'debug'});
+ }
+ } else {
+ $paragraph->{'space'} = ' ';
+ }
+ if ($paragraph->{'counter'} + _string_width($paragraph->{'space'})
+ > $paragraph->{'max'}) {
+ $result .= $paragraph->_end_line();
+ }
+ } elsif ($text =~ s/^(\p{Unicode::EastAsianWidth::InFullwidth})//) {
+ my $added = $1;
+ print STDERR "EAST_ASIAN\n" if ($paragraph->{'debug'});
+ $paragraph->{'word'} = '' if (!defined($paragraph->{'word'}));
+ $paragraph->{'word'} .= $added;
+ if ($paragraph->{'counter'} != 0 and
+ $paragraph->{'counter'} + _string_width($paragraph->{'word'})
+ > $paragraph->{'max'}) {
+ $result .= $paragraph->_end_line();
+ }
+ $result .= $paragraph->add_pending_word();
+ $paragraph->{'space'} = '';
+ } elsif ($text =~ s/^([^\s\p{Unicode::EastAsianWidth::InFullwidth}]+)//) {
+ $result .= $paragraph->add_next($1);
+ }
+ }
+ return $result;
+}
+
+1;
Index: t/paragraph.t
===================================================================
RCS file: t/paragraph.t
diff -N t/paragraph.t
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ t/paragraph.t 6 Nov 2010 13:19:37 -0000 1.1
@@ -0,0 +1,61 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl Texinfo-Parser.t'
+
+#########################
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use strict;
+
+#use Test;
+use Test::More;
+BEGIN { plan tests => 19 };
+use lib '../texi2html/lib/Unicode-EastAsianWidth/lib/';
+#push @INC, '../texi2html/lib/Unicode-EastAsianWidth/lib/';
+use Texinfo::Convert::Paragraph;
+ok(1, "modules loading"); # If we made it this far, we're ok.
+
+#########################
+
+# Insert your test code below, the Test::More module is use()ed here so read
+# its man page ( perldoc Test::More ) for help writing this test script.
+
+sub test_para($$$;$)
+{
+ my $args = shift;
+ my $reference = shift;
+ my $name = shift;
+ my $conf = shift;
+
+ my $result = '';
+ #$conf = {'debug' => 1} if (!defined($conf));
+ $conf = {} if (!defined($conf));
+ my $para = Texinfo::Convert::Paragraph->new($conf);
+ foreach my $arg (@$args) {
+ $result .= $para->wrap_next($arg);
+ }
+ $result .= $para->end();
+ is ($result, $reference, $name);
+ #print STDERR "$result\n";
+}
+
+test_para(['word'], "word\n", 'word');
+test_para(['word other'], "word other\n", 'two_words');
+test_para(['word other'], "word\nother\n", 'two_words_max', {'max' => 2});
+test_para(['word other'], "word\nother\n", 'two_words_max_less_one', {'max' =>
3});
+test_para(['word other'], "word\nother\n", 'two_words_max_exact', {'max' =>
4});
+test_para(['word other'], "word\nother\n", 'two_words_max_plus_one', {'max' =>
5});
+test_para(['word other'], "word\nother\n", 'two_words_max_plus_two', {'max' =>
6});
+test_para(['word o'], "word\no\n", 'word_letter_max_exact', {'max' => 5});
+test_para(['word o'], "word o\n", 'word_letter_max_plus_two', {'max' => 6});
+test_para(['word other'], "word other\n", 'two_words_two_spaces');
+test_para(['word. other'], "word. other\n", 'two_words_dot');
+test_para(['word. other'], "word. other\n", 'two_words_dot_one_space');
+test_para(['worD. other'], "worD. other\n", 'two_words_dot_upper');
+test_para(['word','other'], "wordother\n", 'concatenate');
+test_para(['word','other'], "wordother\n", 'concatenate_max', {'max' => 2});
+test_para(['word ','other'], "word\nother\n", 'two_elements_max', {'max' =>
2});
+test_para(["\x{7b2c}\x{4e00} ",'other'], "\x{7b2c}\n\x{4e00}\nother\n",
'east_asian', {'max' => 2});
+test_para(['word. other'], "word. other\n", 'two_words_dot_frenshspacing',
{'frenchspacing' => 1});
+
+1;
- texinfo/tp Texinfo/Convert/Paragraph.pm t/parag...,
Patrice Dumas <=