texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

texinfo/tp Texinfo/Parser.pm t/80include.t t/te...


From: Patrice Dumas
Subject: texinfo/tp Texinfo/Parser.pm t/80include.t t/te...
Date: Wed, 27 Oct 2010 18:44:30 +0000

CVSROOT:        /sources/texinfo
Module name:    texinfo
Changes by:     Patrice Dumas <pertusus>        10/10/27 18:44:30

Modified files:
        tp/Texinfo     : Parser.pm 
        tp/t           : 80include.t test_utils.pl 

Log message:
        More informations on the parser code and file organization.
        Ignore @setfilename in @included files.

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Parser.pm?cvsroot=texinfo&r1=1.114&r2=1.115
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/80include.t?cvsroot=texinfo&r1=1.5&r2=1.6
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/t/test_utils.pl?cvsroot=texinfo&r1=1.27&r2=1.28

Patches:
Index: Texinfo/Parser.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Parser.pm,v
retrieving revision 1.114
retrieving revision 1.115
diff -u -b -r1.114 -r1.115
--- Texinfo/Parser.pm   26 Oct 2010 19:36:51 -0000      1.114
+++ Texinfo/Parser.pm   27 Oct 2010 18:44:30 -0000      1.115
@@ -18,6 +18,13 @@
 # Original author: Patrice Dumas <address@hidden>
 # Parts (also from Patrice Dumas) come from texi2html.pl or texi2html.init.
 
+# The organization of the file is the following:
+#  module definitions.
+#  default parser state.  With explanation of the internal structures.
+#  initializations, mostly determination of command types.
+#  user visible subroutines.
+#  internal subroutines, doing the parsing.
+#  code used to transform a texinfo tree into texinfo text.
 
 package Texinfo::Parser;
 
@@ -116,7 +123,7 @@
 # encoding                Current encoding set by @documentencoding
 # input                   a stack, with last at bottom.  Holds the opened files
 #                         or text.  Pending macro expansion or text expansion
-#                         is also in that structure
+#                         is also in that structure.
 # misc_commands           the same than %misc_commands below, but with index
 #                         entry commands dynamically added
 # no_paragraph_commands   the same than %default_no_paragraph_commands
@@ -127,6 +134,23 @@
 # errors_warnings         a structure with the errors and warnings.
 # error_nrs               number of errors.
 
+# A line information is an hash reference with the keys:
+# line_nr        the line number
+# file_name      the file name
+# macro          if in a macro expansion, the name of the macro
+#
+# A text fragment information is a 2 element array reference, the first is the
+# text fragment, the second is the line information.
+#
+# The input structure is an array, the first is the most recently included
+# file.  The last element may be a file if the parsing is done on a file, 
+# with parse_texi_file, or simply pending text, if called as parse_texi_text.
+# each element of the array is a hash reference.  The key are:
+# pending    an array reference containing pending text fragments, either the
+#            text given as parse_texi_text or macro expansion text.
+# name       file name
+# line_nr    current line number in the file
+# fh         filehandle for the file
 
 
 my %no_brace_commands;             # commands never taking braces
@@ -320,11 +344,6 @@
   $brace_commands{$five_arg_command} = 5;
 }
 
-my %no_paragraph_contexts;
-foreach my $no_paragraph_context ('math', 'preformatted', 'menu', 'def') {
-  $no_paragraph_contexts{$no_paragraph_context} = 1;
-};
-
 
 # commands delimiting blocks, with an @end.
 # Value is either the number of arguments on the line separated by
@@ -659,6 +678,13 @@
 $full_text_commands{'center'} = 1;
 $full_text_commands{'exdent'} = 1;
 
+# contexts on the context_stack stack where empty line don't trigger
+# paragraph
+my %no_paragraph_contexts;
+foreach my $no_paragraph_context ('math', 'preformatted', 'menu', 'def') {
+  $no_paragraph_contexts{$no_paragraph_context} = 1;
+};
+
 my %canonical_texinfo_encodings;
 # These are the encodings from the texinfo manual
 foreach my $canonical_encoding('us-ascii', 'utf-8', 'iso-8859-1',
@@ -708,7 +734,8 @@
   return $struct;
 }
 
-# enter all the commands associated with an index name.
+# enter all the commands associated with an index name using the prefix
+# list
 sub _enter_index_commands ($$)
 {
   my $self = shift;
@@ -720,7 +747,11 @@
   }
 }
 
-# initialize a parser
+# initialization entry point.  Set up a parser.
+# The last argument, optional, is a hash provided by the user to change
+# the default values for what is present in %default_configuration.
+# The exact arguments of the function depend on how it was called,
+# in a object oriented way or not.
 sub parser(;$$)
 {
   my $class = shift;
@@ -766,11 +797,14 @@
       }
     }
   }
+
+  # Now initialize command hash that are dynamically modified, notably
+  # those for index commands, and lists, based on defaults and user provided.
   $parser->{'misc_commands'} = _deep_copy (\%misc_commands);
   $parser->{'simple_text_commands'} = _deep_copy (\%simple_text_commands);
   $parser->{'no_paragraph_commands'} = { %default_no_paragraph_commands };
   $parser->{'index_names'} = _deep_copy (\%index_names);
-  # a hash is simply concatenated
+  # a hash is simply concatenated.  It should be like %index_names.
   if (ref($parser->{'indices'}) eq 'HASH') {
     %{$parser->{'index_names'}} = (%{$parser->{'index_names'}}, 
                                    %{$parser->{'indices'}});
@@ -785,6 +819,8 @@
   $parser->{'errors_warnings'} = [];
   $parser->{'errors_nrs'} = 0;
   $parser->{'context_stack'} = [ $parser->{'context'} ];
+  # turn the array to a hash for speed.  Not sure it really matters for such
+  # a small array.
   foreach my $expanded_format(@{$parser->{'expanded_formats'}}) {
     $parser->{'expanded_formats_hash'}->{$expanded_format} = 1;
   }
@@ -803,7 +839,7 @@
   return $lines;
 }
 
-# construct a line numbers array matching a lines array, based on information
+# construct a text fragments array matching a lines array, based on information
 # supplied.
 # If $fixed_line_number is set the line number is not increased, otherwise
 # it is increased, beginning at $first_line.
@@ -904,6 +940,13 @@
   return ($self->{'index_names'}, $self->{'merged_indices'});
 }
 
+# Following are the internal subsections.  The most important are
+# _parse_texi:  the main parser loop.
+# _end_line:    called at an end of line.  Opening if @include lines is 
+#               done here.
+# _next_text:   present the next text fragment, from pending text or line,
+#               as described above.
+
 # for debugging
 sub _print_current($)
 {
@@ -980,7 +1023,7 @@
 }
 
 # parse a @macro line
-sub _parse_macro_command($$$$$;$)
+sub _parse_macro_command_line($$$$$;$)
 {
   my $self = shift;
   my $command = shift;
@@ -1089,6 +1132,56 @@
   return $current;
 }
 
+# close the current command, with error messages and give the parent.
+# If the last argument is given it is the command being closed if
+# there was no error, currently only block command, used for a
+# better error message.
+sub _close_current($$$;$)
+{
+  my $self = shift;
+  my $current = shift;
+  my $line_nr = shift;
+  my $command = shift;
+
+  if ($current->{'cmdname'}) {
+    if (exists($brace_commands{$current->{'cmdname'}})) {
+      pop @{$self->{'context_stack'}}
+         if (exists $context_brace_commands{$current->{'cmdname'}});
+      $current = _close_brace_command($self, $current, $line_nr);
+    } elsif (exists($block_commands{$current->{'cmdname'}})) {
+      if (defined($command)) {
+        $self->_line_error(sprintf($self->__("address@hidden' expected `%s', 
but saw `%s'"),
+                                   $current->{'cmdname'}, $command), $line_nr);
+      } else {
+        $self->_line_error(sprintf($self->__("No matching `%cend %s'"),
+                                   ord('@'), $current->{'cmdname'}), $line_nr);
+      }
+      pop @{$self->{'context_stack'}} if
+         ($preformatted_commands{$current->{'cmdname'}}
+           or $menu_commands{$current->{'cmdname'}});
+      $current = $current->{'parent'};
+    } else { # FIXME is this possible? And does it make sense?
+      # silently close containers and @-commands without brace nor @end
+      #_line_error($self, sprintf($self->__("Closing address@hidden"), 
+      #                          $current->{'cmdname'}), $line_nr);
+      $current = $current->{'parent'};
+    }
+  } elsif ($current->{'type'}) {
+    if ($current->{'type'} eq 'bracketed') {
+    # FIXME record the line number in the bracketed and use it
+      _line_error ($self, sprintf($self->__("Misplaced %c"),
+                                             ord('{')), $line_nr);
+      $current = $current->{'parent'};
+    } else {
+      $current = $current->{'parent'} if ($current->{'parent'});
+    }
+  } else { # Should never go here.
+    $current = $current->{'parent'} if ($current->{'parent'});
+    print STDERR "BUG: Where am I? "._print_current($current);
+  }
+  return $current;
+}
+
 # a command arg means closing until that command is found.
 # no command arg means closing until the root or a root_command
 # is found.
@@ -1553,55 +1646,6 @@
   return address@hidden, @args_results];
 }
 
-# close the current command, with error messages and give the parent.
-# If the last argument is given it is the command being closed if
-# there was no error, currently only block command, used for a
-# better error message.
-sub _close_current($$$;$)
-{
-  my $self = shift;
-  my $current = shift;
-  my $line_nr = shift;
-  my $command = shift;
-
-  if ($current->{'cmdname'}) {
-    if (exists($brace_commands{$current->{'cmdname'}})) {
-      pop @{$self->{'context_stack'}}
-         if (exists $context_brace_commands{$current->{'cmdname'}});
-      $current = _close_brace_command($self, $current, $line_nr);
-    } elsif (exists($block_commands{$current->{'cmdname'}})) {
-      if (defined($command)) {
-        $self->_line_error(sprintf($self->__("address@hidden' expected `%s', 
but saw `%s'"),
-                                   $current->{'cmdname'}, $command), $line_nr);
-      } else {
-        $self->_line_error(sprintf($self->__("No matching `%cend %s'"),
-                                   ord('@'), $current->{'cmdname'}), $line_nr);
-      }
-      pop @{$self->{'context_stack'}} if
-         ($preformatted_commands{$current->{'cmdname'}}
-           or $menu_commands{$current->{'cmdname'}});
-      $current = $current->{'parent'};
-    } else { # FIXME is this possible? And does it make sense?
-      # silently close containers and @-commands without brace nor @end
-      #_line_error($self, sprintf($self->__("Closing address@hidden"), 
-      #                          $current->{'cmdname'}), $line_nr);
-      $current = $current->{'parent'};
-    }
-  } elsif ($current->{'type'}) {
-    if ($current->{'type'} eq 'bracketed') {
-    # FIXME record the line number in the bracketed and use it
-      _line_error ($self, sprintf($self->__("Misplaced %c"),
-                                             ord('{')), $line_nr);
-      $current = $current->{'parent'};
-    } else {
-      $current = $current->{'parent'} if ($current->{'parent'});
-    }
-  } else { # Should never go here.
-    $current = $current->{'parent'} if ($current->{'parent'});
-    print STDERR "BUG: Where am I? "._print_current($current);
-  }
-  return $current;
-}
 
 # close constructs and do stuff at end of line (or end of the document)
 sub _end_line($$$);
@@ -1874,8 +1918,10 @@
       }
     }
     $current = $current->{'parent'};
-    if ($included_file) {
-      # remove completly the include file command
+    # if filie was included, remove completly the include file command.
+    # Also ignore @setfilename in included file, as said in the manual.
+    if ($included_file or ($command eq 'setfilename'
+                           and scalar(@{$self->{'input'}}) > 1)) {
       pop @{$current->{'contents'}};
     # columnfractions 
     } elsif ($command eq 'columnfractions') {
@@ -2690,7 +2736,7 @@
         # @-command with matching @end
         } elsif (exists($block_commands{$command})) {
           if ($command eq 'macro' or $command eq 'rmacro') {
-            my $macro = _parse_macro_command ($self, $command, $line, 
+            my $macro = _parse_macro_command_line ($self, $command, $line, 
                                  $current, $line_nr);
             push @{$current->{'contents'}}, $macro;
             $current = $current->{'contents'}->[-1];
@@ -3306,6 +3352,11 @@
   return $args;
 }
 
+
+
+# Following subroutines deal with transforming a texinfo tree into texinfo
+# text.  Should give the text that was used parsed, except for a few cases.
+
 # expand a tree to the corresponding texinfo.
 sub tree_to_texi ($)
 {

Index: t/80include.t
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/80include.t,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -b -r1.5 -r1.6
--- t/80include.t       18 Oct 2010 00:16:52 -0000      1.5
+++ t/80include.t       27 Oct 2010 18:44:30 -0000      1.6
@@ -122,7 +122,10 @@
 ['include_at_end_line',
 '@include inc_file.texi@
 
-After.']
+After.'],
+['include_with_setfilename',
+'@include included_file_with_setfilename.texi 
+']
 );
 
 our ($arg_test_case, $arg_generate, $arg_debug);

Index: t/test_utils.pl
===================================================================
RCS file: /sources/texinfo/texinfo/tp/t/test_utils.pl,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -b -r1.27 -r1.28
--- t/test_utils.pl     26 Oct 2010 22:28:43 -0000      1.27
+++ t/test_utils.pl     27 Oct 2010 18:44:30 -0000      1.28
@@ -129,8 +129,11 @@
     $perl_string_converted_text =~ s/'/\\'/g;
     $out_result .= "\n".'$result_texis{\''.$test_name.'\'} = 
\''.$perl_string_result."';\n\n";
     $out_result .= "\n".'$result_texts{\''.$test_name.'\'} = 
\''.$perl_string_converted_text."';\n\n";
+    {
+      local $Data::Dumper::Sortkeys = 1;
     $out_result .= "".Data::Dumper->Dump([$errors], 
['$result_errors{\''.$test_name.'\'}']) ."\n\n";
     $out_result .= "".Data::Dumper->Dump([$indices], 
['$result_indices{\''.$test_name.'\'}']) ."\n\n";
+    }
     $out_result .= "1;\n";
     print OUT $out_result;
     close (OUT);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]