[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
texinfo/tp Texinfo/Parser.pm t/80include.t t/te...
From: |
Patrice Dumas |
Subject: |
texinfo/tp Texinfo/Parser.pm t/80include.t t/te... |
Date: |
Wed, 27 Oct 2010 18:44:30 +0000 |
CVSROOT: /sources/texinfo
Module name: texinfo
Changes by: Patrice Dumas <pertusus> 10/10/27 18:44:30
Modified files:
tp/Texinfo : Parser.pm
tp/t : 80include.t test_utils.pl
Log message:
More informations on the parser code and file organization.
Ignore @setfilename in @included files.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Parser.pm?cvsroot=texinfo&r1=1.114&r2=1.115
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/80include.t?cvsroot=texinfo&r1=1.5&r2=1.6
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/test_utils.pl?cvsroot=texinfo&r1=1.27&r2=1.28
Patches:
Index: Texinfo/Parser.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Parser.pm,v
retrieving revision 1.114
retrieving revision 1.115
diff -u -b -r1.114 -r1.115
--- Texinfo/Parser.pm 26 Oct 2010 19:36:51 -0000 1.114
+++ Texinfo/Parser.pm 27 Oct 2010 18:44:30 -0000 1.115
@@ -18,6 +18,13 @@
# Original author: Patrice Dumas <address@hidden>
# Parts (also from Patrice Dumas) come from texi2html.pl or texi2html.init.
+# The organization of the file is the following:
+# module definitions.
+# default parser state. With explanation of the internal structures.
+# initializations, mostly determination of command types.
+# user visible subroutines.
+# internal subroutines, doing the parsing.
+# code used to transform a texinfo tree into texinfo text.
package Texinfo::Parser;
@@ -116,7 +123,7 @@
# encoding Current encoding set by @documentencoding
# input a stack, with last at bottom. Holds the opened files
# or text. Pending macro expansion or text expansion
-# is also in that structure
+# is also in that structure.
# misc_commands the same than %misc_commands below, but with index
# entry commands dynamically added
# no_paragraph_commands the same than %default_no_paragraph_commands
@@ -127,6 +134,23 @@
# errors_warnings a structure with the errors and warnings.
# error_nrs number of errors.
+# A line information is an hash reference with the keys:
+# line_nr the line number
+# file_name the file name
+# macro if in a macro expansion, the name of the macro
+#
+# A text fragment information is a 2 element array reference, the first is the
+# text fragment, the second is the line information.
+#
+# The input structure is an array, the first is the most recently included
+# file. The last element may be a file if the parsing is done on a file,
+# with parse_texi_file, or simply pending text, if called as parse_texi_text.
+# each element of the array is a hash reference. The key are:
+# pending an array reference containing pending text fragments, either the
+# text given as parse_texi_text or macro expansion text.
+# name file name
+# line_nr current line number in the file
+# fh filehandle for the file
my %no_brace_commands; # commands never taking braces
@@ -320,11 +344,6 @@
$brace_commands{$five_arg_command} = 5;
}
-my %no_paragraph_contexts;
-foreach my $no_paragraph_context ('math', 'preformatted', 'menu', 'def') {
- $no_paragraph_contexts{$no_paragraph_context} = 1;
-};
-
# commands delimiting blocks, with an @end.
# Value is either the number of arguments on the line separated by
@@ -659,6 +678,13 @@
$full_text_commands{'center'} = 1;
$full_text_commands{'exdent'} = 1;
+# contexts on the context_stack stack where empty line don't trigger
+# paragraph
+my %no_paragraph_contexts;
+foreach my $no_paragraph_context ('math', 'preformatted', 'menu', 'def') {
+ $no_paragraph_contexts{$no_paragraph_context} = 1;
+};
+
my %canonical_texinfo_encodings;
# These are the encodings from the texinfo manual
foreach my $canonical_encoding('us-ascii', 'utf-8', 'iso-8859-1',
@@ -708,7 +734,8 @@
return $struct;
}
-# enter all the commands associated with an index name.
+# enter all the commands associated with an index name using the prefix
+# list
sub _enter_index_commands ($$)
{
my $self = shift;
@@ -720,7 +747,11 @@
}
}
-# initialize a parser
+# initialization entry point. Set up a parser.
+# The last argument, optional, is a hash provided by the user to change
+# the default values for what is present in %default_configuration.
+# The exact arguments of the function depend on how it was called,
+# in a object oriented way or not.
sub parser(;$$)
{
my $class = shift;
@@ -766,11 +797,14 @@
}
}
}
+
+ # Now initialize command hash that are dynamically modified, notably
+ # those for index commands, and lists, based on defaults and user provided.
$parser->{'misc_commands'} = _deep_copy (\%misc_commands);
$parser->{'simple_text_commands'} = _deep_copy (\%simple_text_commands);
$parser->{'no_paragraph_commands'} = { %default_no_paragraph_commands };
$parser->{'index_names'} = _deep_copy (\%index_names);
- # a hash is simply concatenated
+ # a hash is simply concatenated. It should be like %index_names.
if (ref($parser->{'indices'}) eq 'HASH') {
%{$parser->{'index_names'}} = (%{$parser->{'index_names'}},
%{$parser->{'indices'}});
@@ -785,6 +819,8 @@
$parser->{'errors_warnings'} = [];
$parser->{'errors_nrs'} = 0;
$parser->{'context_stack'} = [ $parser->{'context'} ];
+ # turn the array to a hash for speed. Not sure it really matters for such
+ # a small array.
foreach my $expanded_format(@{$parser->{'expanded_formats'}}) {
$parser->{'expanded_formats_hash'}->{$expanded_format} = 1;
}
@@ -803,7 +839,7 @@
return $lines;
}
-# construct a line numbers array matching a lines array, based on information
+# construct a text fragments array matching a lines array, based on information
# supplied.
# If $fixed_line_number is set the line number is not increased, otherwise
# it is increased, beginning at $first_line.
@@ -904,6 +940,13 @@
return ($self->{'index_names'}, $self->{'merged_indices'});
}
+# Following are the internal subsections. The most important are
+# _parse_texi: the main parser loop.
+# _end_line: called at an end of line. Opening if @include lines is
+# done here.
+# _next_text: present the next text fragment, from pending text or line,
+# as described above.
+
# for debugging
sub _print_current($)
{
@@ -980,7 +1023,7 @@
}
# parse a @macro line
-sub _parse_macro_command($$$$$;$)
+sub _parse_macro_command_line($$$$$;$)
{
my $self = shift;
my $command = shift;
@@ -1089,6 +1132,56 @@
return $current;
}
+# close the current command, with error messages and give the parent.
+# If the last argument is given it is the command being closed if
+# there was no error, currently only block command, used for a
+# better error message.
+sub _close_current($$$;$)
+{
+ my $self = shift;
+ my $current = shift;
+ my $line_nr = shift;
+ my $command = shift;
+
+ if ($current->{'cmdname'}) {
+ if (exists($brace_commands{$current->{'cmdname'}})) {
+ pop @{$self->{'context_stack'}}
+ if (exists $context_brace_commands{$current->{'cmdname'}});
+ $current = _close_brace_command($self, $current, $line_nr);
+ } elsif (exists($block_commands{$current->{'cmdname'}})) {
+ if (defined($command)) {
+ $self->_line_error(sprintf($self->__("address@hidden' expected `%s',
but saw `%s'"),
+ $current->{'cmdname'}, $command), $line_nr);
+ } else {
+ $self->_line_error(sprintf($self->__("No matching `%cend %s'"),
+ ord('@'), $current->{'cmdname'}), $line_nr);
+ }
+ pop @{$self->{'context_stack'}} if
+ ($preformatted_commands{$current->{'cmdname'}}
+ or $menu_commands{$current->{'cmdname'}});
+ $current = $current->{'parent'};
+ } else { # FIXME is this possible? And does it make sense?
+ # silently close containers and @-commands without brace nor @end
+ #_line_error($self, sprintf($self->__("Closing address@hidden"),
+ # $current->{'cmdname'}), $line_nr);
+ $current = $current->{'parent'};
+ }
+ } elsif ($current->{'type'}) {
+ if ($current->{'type'} eq 'bracketed') {
+ # FIXME record the line number in the bracketed and use it
+ _line_error ($self, sprintf($self->__("Misplaced %c"),
+ ord('{')), $line_nr);
+ $current = $current->{'parent'};
+ } else {
+ $current = $current->{'parent'} if ($current->{'parent'});
+ }
+ } else { # Should never go here.
+ $current = $current->{'parent'} if ($current->{'parent'});
+ print STDERR "BUG: Where am I? "._print_current($current);
+ }
+ return $current;
+}
+
# a command arg means closing until that command is found.
# no command arg means closing until the root or a root_command
# is found.
@@ -1553,55 +1646,6 @@
return address@hidden, @args_results];
}
-# close the current command, with error messages and give the parent.
-# If the last argument is given it is the command being closed if
-# there was no error, currently only block command, used for a
-# better error message.
-sub _close_current($$$;$)
-{
- my $self = shift;
- my $current = shift;
- my $line_nr = shift;
- my $command = shift;
-
- if ($current->{'cmdname'}) {
- if (exists($brace_commands{$current->{'cmdname'}})) {
- pop @{$self->{'context_stack'}}
- if (exists $context_brace_commands{$current->{'cmdname'}});
- $current = _close_brace_command($self, $current, $line_nr);
- } elsif (exists($block_commands{$current->{'cmdname'}})) {
- if (defined($command)) {
- $self->_line_error(sprintf($self->__("address@hidden' expected `%s',
but saw `%s'"),
- $current->{'cmdname'}, $command), $line_nr);
- } else {
- $self->_line_error(sprintf($self->__("No matching `%cend %s'"),
- ord('@'), $current->{'cmdname'}), $line_nr);
- }
- pop @{$self->{'context_stack'}} if
- ($preformatted_commands{$current->{'cmdname'}}
- or $menu_commands{$current->{'cmdname'}});
- $current = $current->{'parent'};
- } else { # FIXME is this possible? And does it make sense?
- # silently close containers and @-commands without brace nor @end
- #_line_error($self, sprintf($self->__("Closing address@hidden"),
- # $current->{'cmdname'}), $line_nr);
- $current = $current->{'parent'};
- }
- } elsif ($current->{'type'}) {
- if ($current->{'type'} eq 'bracketed') {
- # FIXME record the line number in the bracketed and use it
- _line_error ($self, sprintf($self->__("Misplaced %c"),
- ord('{')), $line_nr);
- $current = $current->{'parent'};
- } else {
- $current = $current->{'parent'} if ($current->{'parent'});
- }
- } else { # Should never go here.
- $current = $current->{'parent'} if ($current->{'parent'});
- print STDERR "BUG: Where am I? "._print_current($current);
- }
- return $current;
-}
# close constructs and do stuff at end of line (or end of the document)
sub _end_line($$$);
@@ -1874,8 +1918,10 @@
}
}
$current = $current->{'parent'};
- if ($included_file) {
- # remove completly the include file command
+ # if filie was included, remove completly the include file command.
+ # Also ignore @setfilename in included file, as said in the manual.
+ if ($included_file or ($command eq 'setfilename'
+ and scalar(@{$self->{'input'}}) > 1)) {
pop @{$current->{'contents'}};
# columnfractions
} elsif ($command eq 'columnfractions') {
@@ -2690,7 +2736,7 @@
# @-command with matching @end
} elsif (exists($block_commands{$command})) {
if ($command eq 'macro' or $command eq 'rmacro') {
- my $macro = _parse_macro_command ($self, $command, $line,
+ my $macro = _parse_macro_command_line ($self, $command, $line,
$current, $line_nr);
push @{$current->{'contents'}}, $macro;
$current = $current->{'contents'}->[-1];
@@ -3306,6 +3352,11 @@
return $args;
}
+
+
+# Following subroutines deal with transforming a texinfo tree into texinfo
+# text. Should give the text that was used parsed, except for a few cases.
+
# expand a tree to the corresponding texinfo.
sub tree_to_texi ($)
{
Index: t/80include.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/80include.t,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -b -r1.5 -r1.6
--- t/80include.t 18 Oct 2010 00:16:52 -0000 1.5
+++ t/80include.t 27 Oct 2010 18:44:30 -0000 1.6
@@ -122,7 +122,10 @@
['include_at_end_line',
'@include inc_file.texi@
-After.']
+After.'],
+['include_with_setfilename',
+'@include included_file_with_setfilename.texi
+']
);
our ($arg_test_case, $arg_generate, $arg_debug);
Index: t/test_utils.pl
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/test_utils.pl,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -b -r1.27 -r1.28
--- t/test_utils.pl 26 Oct 2010 22:28:43 -0000 1.27
+++ t/test_utils.pl 27 Oct 2010 18:44:30 -0000 1.28
@@ -129,8 +129,11 @@
$perl_string_converted_text =~ s/'/\\'/g;
$out_result .= "\n".'$result_texis{\''.$test_name.'\'} =
\''.$perl_string_result."';\n\n";
$out_result .= "\n".'$result_texts{\''.$test_name.'\'} =
\''.$perl_string_converted_text."';\n\n";
+ {
+ local $Data::Dumper::Sortkeys = 1;
$out_result .= "".Data::Dumper->Dump([$errors],
['$result_errors{\''.$test_name.'\'}']) ."\n\n";
$out_result .= "".Data::Dumper->Dump([$indices],
['$result_indices{\''.$test_name.'\'}']) ."\n\n";
+ }
$out_result .= "1;\n";
print OUT $out_result;
close (OUT);
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- texinfo/tp Texinfo/Parser.pm t/80include.t t/te...,
Patrice Dumas <=