[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
texinfo/tp TODO Texinfo/Parser.pm Texinfo/Conve...
From: |
Patrice Dumas |
Subject: |
texinfo/tp TODO Texinfo/Parser.pm Texinfo/Conve... |
Date: |
Mon, 08 Nov 2010 23:50:47 +0000 |
CVSROOT: /sources/texinfo
Module name: texinfo
Changes by: Patrice Dumas <pertusus> 10/11/08 23:50:47
Modified files:
tp : TODO
tp/Texinfo : Parser.pm
tp/Texinfo/Convert: Paragraph.pm
tp/t : 01use.t paragraph.t
Added files:
tp/Texinfo/Convert: Line.pm
Log message:
New code using the same interface than paragraph for line text.
Prepare for situations of spaces protection, like in @w.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/TODO?cvsroot=texinfo&r1=1.52&r2=1.53
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Parser.pm?cvsroot=texinfo&r1=1.143&r2=1.144
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Paragraph.pm?cvsroot=texinfo&r1=1.3&r2=1.4
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Line.pm?cvsroot=texinfo&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/01use.t?cvsroot=texinfo&r1=1.2&r2=1.3
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/paragraph.t?cvsroot=texinfo&r1=1.3&r2=1.4
Patches:
Index: TODO
===================================================================
RCS file: /sources/texinfo/texinfo/tp/TODO,v
retrieving revision 1.52
retrieving revision 1.53
diff -u -b -r1.52 -r1.53
--- TODO 1 Nov 2010 19:18:04 -0000 1.52
+++ TODO 8 Nov 2010 23:50:46 -0000 1.53
@@ -41,6 +41,10 @@
use definfoenclose information in Convert::Text?
+
+Incorporation of texi2html tests
+--------------------------------
+
tests in test/macros, not incorporated
glossary.texi
bib-example.texi (big test)
Index: Texinfo/Parser.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Parser.pm,v
retrieving revision 1.143
retrieving revision 1.144
diff -u -b -r1.143 -r1.144
--- Texinfo/Parser.pm 6 Nov 2010 00:41:28 -0000 1.143
+++ Texinfo/Parser.pm 8 Nov 2010 23:50:47 -0000 1.144
@@ -2046,7 +2046,7 @@
# not def line
and $self->{'context_stack'}->[-1] ne 'def') {
print STDERR "BEGIN LINE\n" if ($self->{'debug'});
- $line =~ s/([^\S\n]*)//;
+ $line =~ s/^([^\S\n]*)//;
push @{$current->{'contents'}}, { 'type' => 'empty_line',
'text' => $1,
'parent' => $current };
Index: Texinfo/Convert/Paragraph.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Paragraph.pm,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -b -r1.3 -r1.4
--- Texinfo/Convert/Paragraph.pm 8 Nov 2010 00:04:37 -0000 1.3
+++ Texinfo/Convert/Paragraph.pm 8 Nov 2010 23:50:47 -0000 1.4
@@ -35,7 +35,7 @@
my $class = shift;
my $conf = shift;
my $self = {'max' => 72, 'indent_length' => 0, 'counter' => 0,
- 'space' => '', 'frenchspacing' => 0};
+ 'word_counter' => 0, 'space' => '', 'frenchspacing' => 0};
if (defined($conf)) {
foreach my $key (keys(%$conf)) {
$self->{$key} = $conf->{$key};
@@ -46,28 +46,11 @@
# string fixed length size takeing into account that east asian characters
# may take 2 spaces.
-sub _string_width($)
-{
- my $string = shift;
-
- if (! defined($string)) {
- Carp::cluck();
- }
- my $width = 0;
- foreach my $character(split '', $string) {
- if ($character =~ /\p{Unicode::EastAsianWidth::InFullwidth}/) {
- $width += 2;
- } else {
- $width += 1;
- }
- }
- return $width;
-}
-
# end a line.
sub end_line($)
{
my $paragraph = shift;
+ return '' if ($paragraph->{'protected_spaces'});
$paragraph->{'counter'} = 0;
$paragraph->{'space'} = '';
if ($paragraph->{'indent_length_next'}) {
@@ -88,18 +71,22 @@
if ($paragraph->{'indent_length'} > $paragraph->{'counter'}) {
$result .= ' ' x ($paragraph->{'indent_length'} -
$paragraph->{'counter'});
$paragraph->{'counter'} = $paragraph->{'indent_length'};
- print STDERR "INDENT($paragraph->{'counter'})\n" if
($paragraph->{'debug'});
+ print STDERR
"INDENT($paragraph->{'counter'}+$paragraph->{'word_counter'})\n"
+ if ($paragraph->{'debug'});
} elsif ($paragraph->{'space'}) {
$result .= $paragraph->{'space'};
- $paragraph->{'counter'} += _string_width($paragraph->{'space'});
- print STDERR "ADD_SPACES($paragraph->{'counter'})\n" if
($paragraph->{'debug'});
+ $paragraph->{'counter'} += length($paragraph->{'space'});
+ print STDERR
"ADD_SPACES($paragraph->{'counter'}+$paragraph->{'word_counter'})\n"
+ if ($paragraph->{'debug'});
}
$result .= $paragraph->{'word'};
- $paragraph->{'counter'} += _string_width($paragraph->{'word'});
- print STDERR "ADD_WORD[$paragraph->{'word'}]($paragraph->{'counter'})\n"
+ $paragraph->{'counter'} += $paragraph->{'word_counter'};
+ print STDERR "ADD_WORD[$paragraph->{'word'}]+$paragraph->{'word_counter'}
($paragraph->{'counter'})\n"
if ($paragraph->{'debug'});
$paragraph->{'word'} = undef;
+ $paragraph->{'word_counter'} = 0;
+ $paragraph->{'space'} = '';
}
return $result;
}
@@ -115,6 +102,7 @@
$paragraph->{'counter'} = 0;
$paragraph->{'space'} = '';
$paragraph->{'word'} = undef;
+ $paragraph->{'word_counter'} = 0;
return $result;
}
@@ -130,19 +118,20 @@
if (defined($word)) {
$paragraph->{'word'} = '' if (!defined($paragraph->{'word'}));
$paragraph->{'word'} .= $word;
+ $paragraph->{'word_counter'} += length($word);
print STDERR "WORD+ $word -> $paragraph->{'word'}\n" if
($paragraph->{'debug'});
# The $paragraph->{'counter'} != 0 is here to avoid having an
# additional line output when the text is longer than the max.
if ($paragraph->{'counter'} != 0 and
- $paragraph->{'counter'} + _string_width($paragraph->{'word'}) +
- _string_width($paragraph->{'space'}) > $paragraph->{'max'}) {
+ $paragraph->{'counter'} + $paragraph->{'word_counter'} +
+ length($paragraph->{'space'}) > $paragraph->{'max'}) {
$result .= $paragraph->end_line();
}
}
if (defined($space)) {
$result .= $paragraph->add_pending_word();
$paragraph->{'space'} = $space;
- if ($paragraph->{'counter'} + _string_width($paragraph->{'space'})
+ if ($paragraph->{'counter'} + length($paragraph->{'space'})
> $paragraph->{'max'}) {
$result .= $paragraph->end_line();
}
@@ -159,11 +148,26 @@
$paragraph->{'end_sentence'} = 0;
}
+sub set_space_protected($$)
+{
+ my $paragraph = shift;
+ my $space_protection = shift;
+ $paragraph->{'protected_spaces'} = $space_protection;
+ # flush the spaces already existing
+ if ($space_protection) {
+ my $new_space = $paragraph->{'space'};
+ $paragraph->{'counter'} += length($new_space);
+ $paragraph->{'space'} = '';
+ return $new_space;
+ }
+ return '';
+}
+
my $end_sentence_character = quotemeta('.?!');
my $after_punctuation_characters = quotemeta('"\')]');
# wrap a text.
-sub wrap_next($$)
+sub add_text($$)
{
my $paragraph = shift;
my $text = shift;
@@ -173,12 +177,23 @@
if ($paragraph->{'debug'}) {
my $word = 'UNDEF';
$word = $paragraph->{'word'} if (defined($paragraph->{'word'}));
- print STDERR "($paragraph->{'counter'}) s `$paragraph->{'space'}', w
`$word'\n";
+ print STDERR "($paragraph->{'counter'}+$paragraph->{'word_counter'}) s
`$paragraph->{'space'}', w `$word'\n";
}
- if ($text =~ s/^\s+//) {
+ if ($text =~ s/^(\s+)//) {
print STDERR "SPACES($paragraph->{'counter'})\n" if
($paragraph->{'debug'});
my $added_word = $paragraph->{'word'};
$result .= $paragraph->add_pending_word();
+ if ($paragraph->{'protected_spaces'}) {
+ $paragraph->{'space'} .= $1;
+ if ($paragraph->{'space'} =~ s/\n/ /g
+ and !$paragraph->{'frenchspacing'} and $paragraph->{'end_sentence'}
+ and length($paragraph->{'space'}) < 2) {
+ $paragraph->{'space'} = ' ';
+ }
+ $result .= $paragraph->{'space'};
+ $paragraph->{'counter'} += length($paragraph->{'space'});
+ $paragraph->{'space'} = '';
+ } else {
if ($paragraph->{'counter'} != 0) {
if (!$paragraph->{'frenchspacing'} and $paragraph->{'end_sentence'}) {
$paragraph->{'space'} = ' ';
@@ -186,8 +201,9 @@
$paragraph->{'space'} = ' ';
}
}
+ }
delete $paragraph->{'end_sentence'};
- if ($paragraph->{'counter'} + _string_width($paragraph->{'space'})
+ if ($paragraph->{'counter'} + length($paragraph->{'space'})
> $paragraph->{'max'}) {
$result .= $paragraph->end_line();
}
@@ -196,8 +212,9 @@
print STDERR "EAST_ASIAN\n" if ($paragraph->{'debug'});
$paragraph->{'word'} = '' if (!defined($paragraph->{'word'}));
$paragraph->{'word'} .= $added;
+ $paragraph->{'word_counter'} += 2;
if ($paragraph->{'counter'} != 0 and
- $paragraph->{'counter'} + _string_width($paragraph->{'word'})
+ $paragraph->{'counter'} + $paragraph->{'word_counter'}
> $paragraph->{'max'}) {
$result .= $paragraph->end_line();
}
Index: t/01use.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/01use.t,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- t/01use.t 6 Nov 2010 00:41:28 -0000 1.2
+++ t/01use.t 8 Nov 2010 23:50:47 -0000 1.3
@@ -1,5 +1,5 @@
# Before `make install' is performed this script should be runnable with
-# `make test'. After `make install' it should work as `perl Texinfo-Parser.t'
+# `make test'. After `make install' it should work as `perl 01use.t'
#########################
Index: t/paragraph.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/paragraph.t,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -b -r1.3 -r1.4
--- t/paragraph.t 8 Nov 2010 00:04:37 -0000 1.3
+++ t/paragraph.t 8 Nov 2010 23:50:47 -0000 1.4
@@ -9,10 +9,11 @@
#use Test;
use Test::More;
-BEGIN { plan tests => 38 };
+BEGIN { plan tests => 82 };
use lib '../texi2html/lib/Unicode-EastAsianWidth/lib/';
#push @INC, '../texi2html/lib/Unicode-EastAsianWidth/lib/';
use Texinfo::Convert::Paragraph;
+use Texinfo::Convert::Line;
ok(1, "modules loading"); # If we made it this far, we're ok.
#########################
@@ -32,7 +33,7 @@
$conf = {} if (!defined($conf));
my $para = Texinfo::Convert::Paragraph->new($conf);
foreach my $arg (@$args) {
- $result .= $para->wrap_next($arg);
+ $result .= $para->add_text($arg);
}
$result .= $para->end();
is ($result, $reference, $name);
@@ -41,6 +42,8 @@
test_para(['word'], "word\n", 'word');
test_para(['word other'], "word other\n", 'two_words');
+test_para(['word other'], "word other\n", 'two_words two spaces');
+test_para(['word ', ' other'], "word other\n", 'two_words feed, space inside');
test_para(['word '], "word\n", 'trailing spaces');
test_para([' word'], "word\n", 'leading spaces');
test_para([' ', ' word'], "word\n", 'double leading spaces');
@@ -66,23 +69,23 @@
my $para = Texinfo::Convert::Paragraph->new();
my $result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
$result .= $para->add_next('_');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
$result .= $para->end();
is ($result, "aa.)_ after\n", 'add char after end sentence');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
$result .= $para->end_line();
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
$result .= $para->end();
is ($result, "\nafter\n", 'space after end_line');
#print STDERR "$result";
$para = Texinfo::Convert::Paragraph->new();
$result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
$result .= $para->add_pending_word();
is ($result, 'aa.)', 'call add_pending_word');
$result = $para->end_line();
@@ -92,71 +95,301 @@
$para = Texinfo::Convert::Paragraph->new();
$result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
$result .= $para->add_pending_word();
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
$result .= $para->end();
is ($result, "aa.) after\n", 'space after sentence and add_pending_word');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
-$result .= $para->wrap_next('aA');
+$result .= $para->add_text('aA');
$result .= $para->add_next('.', undef, 1);
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
$result .= $para->end();
is ($result, "aA. after\n", 'force end sentence after upper case');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
-$result .= $para->wrap_next('aA');
-$result .= $para->wrap_next('.');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text('aA');
+$result .= $para->add_text('.');
+$result .= $para->add_text(' after');
$result .= $para->end();
is ($result, "aA. after\n", 'end sentence after upper case');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
-$result .= $para->wrap_next('aa.)');
-$result .= $para->wrap_next('))');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text('aa.)');
+$result .= $para->add_text('))');
+$result .= $para->add_text(' after');
$result .= $para->end();
is ($result, "aa.))) after\n", 'continue with after_punctuation_characters');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
$para->inhibit_end_sentence();
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
$result .= $para->end();
is ($result, "aa.) after\n", 'inhibit end sentence');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
$para->inhibit_end_sentence();
$result .= $para->add_next('_');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text(' after');
$result .= $para->end();
is ($result, "aa.)_ after\n", 'inhibit end sentence then add next');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
$para->inhibit_end_sentence();
-$result .= $para->wrap_next('aa.)');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text('aa.)');
+$result .= $para->add_text(' after');
$result .= $para->end();
is ($result, "aa.)aa.) after\n", 'cancel inhibit end sentence');
$para = Texinfo::Convert::Paragraph->new();
$result = '';
-$result .= $para->wrap_next('aa.)');
+$result .= $para->add_text('aa.)');
$para->inhibit_end_sentence();
-$result .= $para->wrap_next('))');
-$result .= $para->wrap_next(' after');
+$result .= $para->add_text('))');
+$result .= $para->add_text(' after');
$result .= $para->end();
is ($result, "aa.))) after\n", 'inhibit end sentence and ))');
+$para = Texinfo::Convert::Paragraph->new({'max' => 2});
+$result = '';
+$para->set_space_protected(1);
+$result .= $para->add_text('aa.) bb ');
+is ($result, 'aa.) bb ', 'space protected 2 words');
+$result = $para->add_text(' eee ');
+is ($result, ' eee ', 'space protected more word');
+$result = $para->add_text(' .)');
+$result .= $para->add_next('_');
+$result .= $para->add_text("\n");
+is ($result, ' .)_ ', 'space protected and end of sentence and line added');
+$result = $para->add_text("aa\n");
+is ($result, 'aa ', 'protected space after end of line');
+$result = $para->add_text(' . gg');
+is ($result, ' . ', 'protected space with dot inside');
+$result = $para->add_text(". \n");
+$result .= $para->add_text("a");
+is ($result, 'gg. ', 'protected space end line with already a space');
+$result = $para->add_text(' ');
+$para->set_space_protected(0);
+$result .= $para->add_text("c ");
+is ($result, "a \nc", 'end protected space end of line');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result .= $para->add_text('aa');
+$para->set_space_protected(1);
+$result .= $para->add_text(' f f');
+$para->set_space_protected(0);
+$result .= $para->add_text("ggg\n");
+is ($result, 'aa f fggg', 'protected space within words');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa');
+$para->set_space_protected(1);
+$result .= $para->add_text(' f f ');
+$para->set_space_protected(0);
+$result .= $para->add_text("ggg\n");
+is ($result, 'aa f f ggg', 'protected space and space within words');
+$para->end();
+
$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa ');
+$result .= $para->set_space_protected(1);
+$result .= $para->add_text(' f f ');
+$para->set_space_protected(0);
+$result .= $para->add_text("ggg\n");
+is ($result, 'aa f f ggg', 'text space protected space and space within
words');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa ');
+$result .= $para->set_space_protected(1);
+$result .= $para->add_text(' f f ');
+$para->set_space_protected(0);
+$result .= $para->add_text(" ggg\n");
+is ($result, 'aa f f ggg', 'text space protected space and space after');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa ');
+$result .= $para->set_space_protected(1);
+$para->set_space_protected(0);
+$result .= $para->add_text(" ggg\n");
+is ($result, 'aa ggg', 'empty protected 2 space');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa ');
+$result .= $para->set_space_protected(1);
+$para->set_space_protected(0);
+$result .= $para->add_text("ggg\n");
+is ($result, 'aa ggg', 'empty protected 1 before space');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa');
+$result .= $para->set_space_protected(1);
+$para->set_space_protected(0);
+$result .= $para->add_text(" ggg\n");
+is ($result, 'aa ggg', 'empty protected 1 after space');
+$para->end();
+
+$para = Texinfo::Convert::Paragraph->new();
+$result = '';
+$result = $para->add_text('aa ');
+$result .= $para->set_space_protected(1);
+$result .= $para->add_text(' ');
+$para->set_space_protected(0);
+$result .= $para->add_text(" ggg\n");
+is ($result, 'aa ggg', 'space protected space');
+$para->end();
+
+
+#print STDERR "$result";
+#exit;
+
+sub test_line($$$;$)
+{
+ my $args = shift;
+ my $reference = shift;
+ my $name = shift;
+ my $conf = shift;
+
+ my $result = '';
+ #$conf = {'debug' => 1} if (!defined($conf));
+ $conf = {} if (!defined($conf));
+ my $line = Texinfo::Convert::Line->new($conf);
+ foreach my $arg (@$args) {
+ $result .= $line->add_text($arg);
+ }
+ $result .= $line->end();
+ is ($result, $reference, "line $name");
+ #print STDERR "$result\n";
+}
+
+test_line(['word'], "word\n", 'word');
+test_line(['word other'], "word other\n", 'two_words');
+test_line(['word '], "word\n", 'trailing spaces');
+test_line([' word'], "word\n", 'leading spaces');
+test_line([' ', ' word'], "word\n", 'double leading spaces');
+test_line(['word other'], "word other\n", 'two_words_two_spaces');
+test_line(['word. other'], "word. other\n", 'two_words_dot');
+test_line(['word. other'], "word. other\n", 'two_words_dot_one_space');
+test_line(['word.) other'], "word.) other\n",
'two_words_dot_paren_one_space');
+test_line(['worD. other'], "worD. other\n", 'two_words_dot_upper');
+test_line(['word','other'], "wordother\n", 'concatenate');
+test_line(["\x{7b2c}\x{4e00} ",'other'], "\x{7b2c}\x{4e00} other\n",
'east_asian');
+test_line(['word. other'], "word. other\n", 'two_words_dot_frenchspacing',
{'frenchspacing' => 1});
+test_line(["aa.)\x{7b2c} b"], "aa.)\x{7b2c} b\n", 'end_sentence_east_asian');
+
+my $line = Texinfo::Convert::Line->new();
+my $result = '';
+$result .= $line->add_text('aa.)');
+$result .= $line->add_next('_');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.)_ after\n", 'line add char after end sentence');
+
+$para = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->end_line();
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "\nafter\n", 'line space after end_line');
+#print STDERR "$result";
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$result .= $line->add_pending_word();
+is ($result, 'aa.)', 'line call add_pending_word');
+$result = $line->end_line();
+is ($result, "\n", 'line call end_line after add_pending_word');
+$result = $line->end();
+is ($result, "\n", 'line call end after end_line');
+
+$para = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$result .= $line->add_pending_word();
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.) after\n", 'line space after sentence and
add_pending_word');
+
+$para = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aA');
+$result .= $line->add_next('.', undef, 1);
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aA. after\n", 'line force end sentence after upper case');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aA');
+$result .= $line->add_text('.');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aA. after\n", 'line end sentence after upper case');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$result .= $line->add_text('))');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.))) after\n", 'line continue with
after_punctuation_characters');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$line->inhibit_end_sentence();
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.) after\n", 'line inhibit end sentence');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$line->inhibit_end_sentence();
+$result .= $line->add_next('_');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.)_ after\n", 'line inhibit end sentence then add next');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$line->inhibit_end_sentence();
+$result .= $line->add_text('aa.)');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.)aa.) after\n", 'line cancel inhibit end sentence');
+
+$line = Texinfo::Convert::Line->new();
+$result = '';
+$result .= $line->add_text('aa.)');
+$line->inhibit_end_sentence();
+$result .= $line->add_text('))');
+$result .= $line->add_text(' after');
+$result .= $line->end();
+is ($result, "aa.))) after\n", 'line inhibit end sentence and ))');
1;
Index: Texinfo/Convert/Line.pm
===================================================================
RCS file: Texinfo/Convert/Line.pm
diff -N Texinfo/Convert/Line.pm
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ Texinfo/Convert/Line.pm 8 Nov 2010 23:50:47 -0000 1.1
@@ -0,0 +1,196 @@
+# Line.pm: handle line of text.
+#
+# Copyright 2010 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Original author: Patrice Dumas <address@hidden>
+
+# this module has nothing Texinfo specific. It is similar with
+# Texinfo::Convert::Paragraph, but simpler.
+# It could be even simpler: there is no need to delay outputting a word.
+
+package Texinfo::Convert::Line;
+
+use 5.006;
+use strict;
+
+use Unicode::EastAsianWidth;
+use Carp qw(cluck);
+
+# initialize a paragraph object.
+sub new($;$)
+{
+ my $class = shift;
+ my $conf = shift;
+ my $self = {'max' => 72, 'indent_length' => 0, 'counter' => 0,
+ 'space' => '', 'frenchspacing' => 0, 'line_beginning' => 1};
+ if (defined($conf)) {
+ foreach my $key (keys(%$conf)) {
+ if ($key eq 'text') {
+ $self->{'counter'} = _string_width($conf->{$key});
+ $self->{'line_beginning'} = 0 if ($self->{'counter'});
+ } else {
+ $self->{$key} = $conf->{$key};
+ }
+ }
+ }
+ bless $self, $class;
+}
+
+# string fixed length size takeing into account that east asian characters
+# may take 2 spaces.
+sub _string_width($)
+{
+ my $string = shift;
+
+ if (! defined($string)) {
+ Carp::cluck();
+ }
+ my $width = 0;
+ foreach my $character(split '', $string) {
+ if ($character =~ /\p{Unicode::EastAsianWidth::InFullwidth}/) {
+ $width += 2;
+ } else {
+ $width += 1;
+ }
+ }
+ return $width;
+}
+
+# end a line.
+sub end_line($)
+{
+ my $line = shift;
+ my $result = $line->add_pending_word();
+ $line->{'line_beginning'} = 1;
+ $line->{'space'} = '';
+ print STDERR "END_LINE\n" if ($line->{'debug'});
+ return "$result\n";
+}
+
+# put a pending word and spaces in the result string.
+sub add_pending_word($)
+{
+ my $line = shift;
+ my $result = '';
+
+ if (defined($line->{'word'})) {
+ if ($line->{'line_beginning'}) {
+ if ($line->{'indent_length'}) {
+ $result .= ' ' x ($line->{'indent_length'} - $line->{'counter'});
+ print STDERR "INDENT($line->{'counter'})\n" if ($line->{'debug'});
+ }
+ $line->{'line_beginning'} = 0;
+ } elsif ($line->{'space'}) {
+ $result .= $line->{'space'};
+ print STDERR "ADD_SPACES\n" if ($line->{'debug'});
+ }
+ $result .= $line->{'word'};
+ print STDERR "ADD_WORD[$line->{'word'}]\n" if ($line->{'debug'});
+ $line->{'word'} = undef;
+ }
+ return $result;
+}
+
+# end a paragraph
+sub end($)
+{
+ my $line = shift;
+ return $line->end_line();
+}
+
+# add a word and/or spaces and end of sentence.
+sub add_next($;$$$)
+{
+ my $line = shift;
+ my $word = shift;
+ my $space = shift;
+ my $end_sentence = shift;
+ my $result = '';
+
+ if (defined($word)) {
+ $line->{'word'} = '' if (!defined($line->{'word'}));
+ $line->{'word'} .= $word;
+ print STDERR "WORD+ $word -> $line->{'word'}\n" if ($line->{'debug'});
+ }
+ if (defined($space)) {
+ $result .= $line->add_pending_word();
+ $line->{'space'} = $space;
+ }
+ if (defined($end_sentence)) {
+ $line->{'end_sentence'} = $end_sentence;
+ }
+ return $result;
+}
+
+sub inhibit_end_sentence($)
+{
+ my $line = shift;
+ $line->{'end_sentence'} = 0;
+}
+
+my $end_sentence_character = quotemeta('.?!');
+my $after_punctuation_characters = quotemeta('"\')]');
+
+# wrap a text.
+sub add_text($$)
+{
+ my $line = shift;
+ my $text = shift;
+ my $result = '';
+
+ while ($text ne '') {
+ if ($line->{'debug'}) {
+ my $word = 'UNDEF';
+ $word = $line->{'word'} if (defined($line->{'word'}));
+ print STDERR "s `$line->{'space'}', w `$word'\n";
+ }
+ if ($text =~ s/^\s+//) {
+ print STDERR "SPACES\n" if ($line->{'debug'});
+ my $added_word = $line->{'word'};
+ $result .= $line->add_pending_word();
+ if (!$line->{'begin'}) {
+ if (!$line->{'frenchspacing'} and $line->{'end_sentence'}) {
+ $line->{'space'} = ' ';
+ } else {
+ $line->{'space'} = ' ';
+ }
+ }
+ delete $line->{'end_sentence'};
+ } elsif ($text =~ s/^([^\s\p{Unicode::EastAsianWidth::InFullwidth}]+)//) {
+ my $added_word = $1;
+ $result .= $line->add_next($added_word);
+ # now check if it is considered as an end of sentence
+ if (defined($line->{'end_sentence'}) and
+ $added_word =~ /^[$after_punctuation_characters]*$/) {
+ # do nothing in the case of a continuation of
after_punctuation_characters
+ } elsif ($line->{'word'} =~
/[$end_sentence_character][$after_punctuation_characters]*$/
+ and $line->{'word'} !~
/[[:upper:]][$end_sentence_character][$after_punctuation_characters]*$/) {
+ $line->{'end_sentence'} = 1;
+ }
+ } elsif ($text =~ s/^(\p{Unicode::EastAsianWidth::InFullwidth})//) {
+ my $added = $1;
+ print STDERR "EAST_ASIAN\n" if ($line->{'debug'});
+ $line->{'word'} = '' if (!defined($line->{'word'}));
+ $line->{'word'} .= $added;
+ $result .= $line->add_pending_word();
+ delete $line->{'end_sentence'};
+ $line->{'space'} = '';
+ }
+ }
+ return $result;
+}
+
+1;
- texinfo/tp TODO Texinfo/Parser.pm Texinfo/Conve...,
Patrice Dumas <=