octave-maintainers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Parsing "commands"


From: John W. Eaton
Subject: Parsing "commands"
Date: Sat, 14 Feb 2009 20:12:43 -0500

About a year ago, I wrote:

| I'm thinking about modifying the parser to remove the special "command"
| and "rawcommand" tags for symbols and instead use the surrounding
| context to decide when the parser is looking at a command-style
| function.  This change would improve compatibility with Matlab.
| 
| I resisted this kind of change in the past because I didn't like the
| following behavior of Matlab:
| 
| Given a function foo, the expression
| 
|   foo -1
| 
| would call foo with the character string "-1" as an argument but
| 
|   foo - 1
|   foo-1
|   foo- 1
| 
| would all call foo with no arguments, then subtract 1 from the value
| it returned.
| 
| Now I'm beginning to think the advantage of improved compatibility and
| not having to tag functions as commands outweighs the trouble that
| might be caused by this small inconsistency.
| 
| Also, we no longer mark any functions in Octave or Octave Forge with
| the "rawcommand" tag.  It was only used to support the special syntax
| used by gset/gplot/gsplot.  Now that those are gone I see no reason to
| keep the "rawcommand" tag in any case.
| 
| Comments?

The complete discussion is here:

  http://www.nabble.com/proposed-parser-changes-to16035583.html#a16035583

It seems that most people were in favor of this type of change.  Since
I've been looking at the lexer recently, I decided to try to implement
the change.  My first attempt is below.  I tried to implement what is
described here:

  
http://www.mathworks.com/access/helpdesk/help/techdoc/index.html?/access/helpdesk/help/techdoc/matlab_prog/f7-58170.html#f7-58289

under the heading

  Recognizing Function Calls That Use Command Syntax

but since that description is not entirely precise.  It includes the statement

  The rules are complicated and have exceptions.  In general, when
  MATLAB recognizes an identifier (which might name a function or a
  variable), it analyzes the characters that follow the identifier to
  determine what kind of expression exists.  The expression is usually
  a function call using command syntax when all of the following are
  true:

Note the use of "usually", along with no explanation of precisely what
the additional exceptions might be!

Anyway, my best effort is attached below.  To apply this, you will
also need to apply the previous patch for the lexer and parser that I
sent the the address@hidden list earlier today:

  https://www-old.cae.wisc.edu/pipermail/bug-octave/2009-February/007996.html

Note that I had to make a few changes to the tests.  This change will
break code that does things like

  x = load -text foo.dat

Now if you want to assign the result of a function call, you must use
function call syntax:

  x = load ("text", "foo.dat");

Also, I have not yet removed the code that handles marking things as
commands/rawcommands.  I propose making mark_as_command and
unmark_command do nothing.  What should is_command return?  Always
true, since any function may be a command?  Likewise for the raw
versions.  They can issue an optional warning once per session if they
are used.  We'll also remove all the PKG_ADD: lines that call
mark_as_command, and convert all the uses of the DEFCMD macro to be
DEFUN instead, and remove the DEFCMD macro.

Although I see all tests passing with this change, it is still quite
experimental and probably not quite right yet, so I have not checked
it in.  But it would help for it and the previous lexer/parser patch
to get some additional testing.

Comments?

jwe

diff --git a/scripts/miscellaneous/edit.m b/scripts/miscellaneous/edit.m
--- a/scripts/miscellaneous/edit.m
+++ b/scripts/miscellaneous/edit.m
@@ -498,13 +498,13 @@
 endfunction
 
 %!test
-%! s.editor = edit get editor;
-%! s.home = edit get home;
-%! s.author = edit get author;
-%! s.email = edit get email;
-%! s.license = edit get license;
-%! s.editinplace = edit get editinplace;
-%! s.mode = edit get mode;
+%! s.editor = edit ("get", "editor");
+%! s.home = edit ("get", "home");
+%! s.author = edit ("get", "author");
+%! s.email = edit ("get", "email");
+%! s.license = edit ("get", "license");
+%! s.editinplace = edit ("get", "editinplace");
+%! s.mode = edit ("get", "mode");
 %! edit editor none
 %! edit home none
 %! edit author none
diff --git a/src/lex.h b/src/lex.h
--- a/src/lex.h
+++ b/src/lex.h
@@ -81,6 +81,10 @@
   // TRUE means that we should convert spaces to a comma inside a
   // matrix definition.
   bool convert_spaces_to_comma;
+
+  // TRUE means we are at the beginning of a statement, where a
+  // command name is possible.
+  bool at_beginning_of_statement;
 
   // TRUE means we're in the middle of defining a function.
   bool defining_func;
diff --git a/src/lex.l b/src/lex.l
--- a/src/lex.l
+++ b/src/lex.l
@@ -1,7 +1,7 @@
 /*
 
 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-              2002, 2003, 2004, 2005, 2006, 2007 John W. Eaton
+              2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 John W. Eaton
 
 This file is part of Octave.
 
@@ -156,7 +156,7 @@
     } \
   while (0)
 
-#define BIN_OP_RETURN(tok, convert) \
+#define BIN_OP_RETURN(tok, convert, bos) \
   do \
     { \
       yylval.tok_val = new token (input_line_number, current_input_column); \
@@ -165,15 +165,16 @@
       lexer_flags.quote_is_transpose = false; \
       lexer_flags.convert_spaces_to_comma = convert; \
       lexer_flags.looking_for_object_index = false; \
+      lexer_flags.at_beginning_of_statement = bos; \
       COUNT_TOK_AND_RETURN (tok); \
     } \
   while (0)
 
-#define XBIN_OP_RETURN(tok, convert) \
+#define XBIN_OP_RETURN(tok, convert, bos) \
   do \
     { \
       gripe_matlab_incompatible_operator (yytext); \
-      BIN_OP_RETURN (tok, convert); \
+      BIN_OP_RETURN (tok, convert, bos); \
     } \
   while (0)
 
@@ -339,6 +340,9 @@
 
     BEGIN (NESTED_FUNCTION_BEGIN);
     xunput (yytext[0], yytext);
+
+    lexer_flags.at_beginning_of_statement = true;
+
     COUNT_TOK_AND_RETURN (';');
   }
 
@@ -347,7 +351,9 @@
 
     BEGIN (INITIAL);
     xunput (yytext[0], yytext);
+
     prep_for_nested_function ();
+
     COUNT_TOK_AND_RETURN (FCN);
   }
 
@@ -368,6 +374,7 @@
     lexer_flags.convert_spaces_to_comma = true;
     lexer_flags.doing_rawcommand = false;
     lexer_flags.looking_for_object_index = false;
+    lexer_flags.at_beginning_of_statement = true;
 
     COUNT_TOK_AND_RETURN ('\n');
   }
@@ -376,6 +383,7 @@
     LEXER_DEBUG ("<COMMAND_START>[\\;\\,]");
 
     lexer_flags.looking_for_object_index = false;
+    lexer_flags.at_beginning_of_statement = true;
 
     if (lexer_flags.doing_rawcommand)
       TOK_PUSH_AND_RETURN (yytext, SQ_STRING);
@@ -391,6 +399,8 @@
 <COMMAND_START>[\"\'] {
     LEXER_DEBUG ("<COMMAND_START>[\\\"\\']");
 
+    lexer_flags.at_beginning_of_statement = false;
+
     current_input_column++;
     int tok = handle_string (yytext[0], true);
 
@@ -403,6 +413,7 @@
     std::string tok = strip_trailing_whitespace (yytext);
 
     lexer_flags.looking_for_object_index = false;
+    lexer_flags.at_beginning_of_statement = false;
 
     TOK_PUSH_AND_RETURN (tok, SQ_STRING);
   }
@@ -430,6 +441,7 @@
     lexer_flags.looking_at_object_index.pop_front ();
 
     lexer_flags.looking_for_object_index = true;
+    lexer_flags.at_beginning_of_statement = false;
 
     int c = yytext[yyleng-1];
     int cont_is_spc = eat_continuation ();
@@ -455,6 +467,7 @@
     lexer_flags.looking_at_object_index.pop_front ();
 
     lexer_flags.looking_for_object_index = true;
+    lexer_flags.at_beginning_of_statement = false;
 
     int c = yytext[yyleng-1];
     int cont_is_spc = eat_continuation ();
@@ -483,6 +496,7 @@
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = true;
     lexer_flags.looking_for_object_index = false;
+    lexer_flags.at_beginning_of_statement = false;
 
     if (! lexer_flags.looking_at_object_index.front ())
       {
@@ -508,6 +522,8 @@
     LEXER_DEBUG ("<MATRIX_START>{S}+");
 
     current_input_column += yyleng;
+
+    lexer_flags.at_beginning_of_statement = false;
 
     int tmp = eat_continuation ();
 
@@ -556,6 +572,7 @@
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = true;
     lexer_flags.looking_for_object_index = false;
+    lexer_flags.at_beginning_of_statement = false;
 
     COUNT_TOK_AND_RETURN (';');
   }
@@ -578,6 +595,7 @@
 
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = true;
+    lexer_flags.at_beginning_of_statement = false;
 
     if (nesting_level.none ())
       return LEXICAL_ERROR;
@@ -602,6 +620,7 @@
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = true;
     lexer_flags.looking_for_object_index = false;
+    lexer_flags.at_beginning_of_statement = false;
 
     if (lexer_flags.defining_func && ! lexer_flags.parsed_function_name)
       lexer_flags.looking_at_return_list = true;
@@ -624,6 +643,7 @@
     lexer_flags.looking_at_object_index.pop_front ();
 
     lexer_flags.looking_for_object_index = true;
+    lexer_flags.at_beginning_of_statement = false;
 
     TOK_RETURN (']');
   }
@@ -727,6 +747,7 @@
     lexer_flags.convert_spaces_to_comma = false;
     lexer_flags.looking_at_function_handle++;
     lexer_flags.looking_for_object_index = false;
+    lexer_flags.at_beginning_of_statement = false;
 
     COUNT_TOK_AND_RETURN ('@');
   }
@@ -742,12 +763,20 @@
 
     input_line_number++;
     current_input_column = 1;
+
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = true;
+
     if (nesting_level.none ())
-      COUNT_TOK_AND_RETURN ('\n');
+      {
+       lexer_flags.at_beginning_of_statement = true;
+       COUNT_TOK_AND_RETURN ('\n');
+      }
     else if (nesting_level.is_paren ())
-      gripe_matlab_incompatible ("bare newline inside parentheses");
+      {
+       lexer_flags.at_beginning_of_statement = false;
+       gripe_matlab_incompatible ("bare newline inside parentheses");
+      }
     else if (nesting_level.is_bracket_or_brace ())
       return LEXICAL_ERROR;
   }
@@ -821,6 +850,7 @@
     current_input_column = 1;
     block_comment_nesting_level++;
     promptflag--;
+
     bool eof = false;
     process_comment (true, eof);
   }
@@ -829,49 +859,50 @@
 // Other operators.
 %}
 
-":"     { LEXER_DEBUG (":"); BIN_OP_RETURN (':', false); }
+":"     { LEXER_DEBUG (":"); BIN_OP_RETURN (':', false, false); }
 
-".+"   { LEXER_DEBUG (".+"); XBIN_OP_RETURN (EPLUS, false); }
-".-"   { LEXER_DEBUG (".-"); XBIN_OP_RETURN (EMINUS, false); }
-".*"   { LEXER_DEBUG (".*"); BIN_OP_RETURN (EMUL, false); }
-"./"   { LEXER_DEBUG ("./"); BIN_OP_RETURN (EDIV, false); }
-".\\"  { LEXER_DEBUG (".\\"); BIN_OP_RETURN (ELEFTDIV, false); }
-".^"   { LEXER_DEBUG (".^"); BIN_OP_RETURN (EPOW, false); }
-".**"  { LEXER_DEBUG (".**"); XBIN_OP_RETURN (EPOW, false); }
-".'"   { LEXER_DEBUG (".'"); do_comma_insert_check (); BIN_OP_RETURN 
(TRANSPOSE, true); }
-"++"   { LEXER_DEBUG ("++"); do_comma_insert_check (); XBIN_OP_RETURN 
(PLUS_PLUS, true); }
-"--"   { LEXER_DEBUG ("--"); do_comma_insert_check (); XBIN_OP_RETURN 
(MINUS_MINUS, true); }
-"<="   { LEXER_DEBUG ("<="); BIN_OP_RETURN (EXPR_LE, false); }
-"=="   { LEXER_DEBUG ("=="); BIN_OP_RETURN (EXPR_EQ, false); }
-"~="   { LEXER_DEBUG ("~="); BIN_OP_RETURN (EXPR_NE, false); }
-"!="   { LEXER_DEBUG ("!="); XBIN_OP_RETURN (EXPR_NE, false); }
-">="   { LEXER_DEBUG (">="); BIN_OP_RETURN (EXPR_GE, false); }
-"&"    { LEXER_DEBUG ("&"); BIN_OP_RETURN (EXPR_AND, false); }
-"|"    { LEXER_DEBUG ("|"); BIN_OP_RETURN (EXPR_OR, false); }
-"<"    { LEXER_DEBUG ("<"); BIN_OP_RETURN (EXPR_LT, false); }
-">"    { LEXER_DEBUG (">"); BIN_OP_RETURN (EXPR_GT, false); }
-"+"     { LEXER_DEBUG ("+"); BIN_OP_RETURN ('+', false); }
-"-"     { LEXER_DEBUG ("-"); BIN_OP_RETURN ('-', false); }
-"*"    { LEXER_DEBUG ("*"); BIN_OP_RETURN ('*', false); }
-"/"    { LEXER_DEBUG ("/"); BIN_OP_RETURN ('/', false); }
-"\\"   { LEXER_DEBUG ("\\"); BIN_OP_RETURN (LEFTDIV, false); }
-";"    { LEXER_DEBUG (";"); BIN_OP_RETURN (';', true); }
-","    { LEXER_DEBUG (","); BIN_OP_RETURN (',', true); }
-"^"    { LEXER_DEBUG ("^"); BIN_OP_RETURN (POW, false); }
-"**"   { LEXER_DEBUG ("**"); XBIN_OP_RETURN (POW, false); }
-"="    { LEXER_DEBUG ("="); BIN_OP_RETURN ('=', true); }
-"&&"   { LEXER_DEBUG ("&&"); BIN_OP_RETURN (EXPR_AND_AND, false); }
-"||"   { LEXER_DEBUG ("||"); BIN_OP_RETURN (EXPR_OR_OR, false); }
-"<<"   { LEXER_DEBUG ("<<"); XBIN_OP_RETURN (LSHIFT, false); }
-">>"   { LEXER_DEBUG (">>"); XBIN_OP_RETURN (RSHIFT, false); }
+".+"   { LEXER_DEBUG (".+"); XBIN_OP_RETURN (EPLUS, false, false); }
+".-"   { LEXER_DEBUG (".-"); XBIN_OP_RETURN (EMINUS, false, false); }
+".*"   { LEXER_DEBUG (".*"); BIN_OP_RETURN (EMUL, false, false); }
+"./"   { LEXER_DEBUG ("./"); BIN_OP_RETURN (EDIV, false, false); }
+".\\"  { LEXER_DEBUG (".\\"); BIN_OP_RETURN (ELEFTDIV, false, false); }
+".^"   { LEXER_DEBUG (".^"); BIN_OP_RETURN (EPOW, false, false); }
+".**"  { LEXER_DEBUG (".**"); XBIN_OP_RETURN (EPOW, false, false); }
+".'"   { LEXER_DEBUG (".'"); do_comma_insert_check (); BIN_OP_RETURN 
(TRANSPOSE, true, false); }
+"++"   { LEXER_DEBUG ("++"); do_comma_insert_check (); XBIN_OP_RETURN 
(PLUS_PLUS, true, false); }
+"--"   { LEXER_DEBUG ("--"); do_comma_insert_check (); XBIN_OP_RETURN 
(MINUS_MINUS, true, false); }
+"<="   { LEXER_DEBUG ("<="); BIN_OP_RETURN (EXPR_LE, false, false); }
+"=="   { LEXER_DEBUG ("=="); BIN_OP_RETURN (EXPR_EQ, false, false); }
+"~="   { LEXER_DEBUG ("~="); BIN_OP_RETURN (EXPR_NE, false, false); }
+"!="   { LEXER_DEBUG ("!="); XBIN_OP_RETURN (EXPR_NE, false, false); }
+">="   { LEXER_DEBUG (">="); BIN_OP_RETURN (EXPR_GE, false, false); }
+"&"    { LEXER_DEBUG ("&"); BIN_OP_RETURN (EXPR_AND, false, false); }
+"|"    { LEXER_DEBUG ("|"); BIN_OP_RETURN (EXPR_OR, false, false); }
+"<"    { LEXER_DEBUG ("<"); BIN_OP_RETURN (EXPR_LT, false, false); }
+">"    { LEXER_DEBUG (">"); BIN_OP_RETURN (EXPR_GT, false, false); }
+"+"     { LEXER_DEBUG ("+"); BIN_OP_RETURN ('+', false, false); }
+"-"     { LEXER_DEBUG ("-"); BIN_OP_RETURN ('-', false, false); }
+"*"    { LEXER_DEBUG ("*"); BIN_OP_RETURN ('*', false, false); }
+"/"    { LEXER_DEBUG ("/"); BIN_OP_RETURN ('/', false, false); }
+"\\"   { LEXER_DEBUG ("\\"); BIN_OP_RETURN (LEFTDIV, false, false); }
+";"     { LEXER_DEBUG (";"); BIN_OP_RETURN (';', true, true); }
+","     { LEXER_DEBUG (","); BIN_OP_RETURN (',', true, ! 
lexer_flags.looking_at_object_index.front ()); }
+"^"    { LEXER_DEBUG ("^"); BIN_OP_RETURN (POW, false, false); }
+"**"   { LEXER_DEBUG ("**"); XBIN_OP_RETURN (POW, false, false); }
+"="    { LEXER_DEBUG ("="); BIN_OP_RETURN ('=', true, false); }
+"&&"   { LEXER_DEBUG ("&&"); BIN_OP_RETURN (EXPR_AND_AND, false, false); }
+"||"   { LEXER_DEBUG ("||"); BIN_OP_RETURN (EXPR_OR_OR, false, false); }
+"<<"   { LEXER_DEBUG ("<<"); XBIN_OP_RETURN (LSHIFT, false, false); }
+">>"   { LEXER_DEBUG (">>"); XBIN_OP_RETURN (RSHIFT, false, false); }
+
 
 {NOT} {
     LEXER_DEBUG ("{NOT}");
 
     if (yytext[0] == '~')
-      BIN_OP_RETURN (EXPR_NOT, false);
+      BIN_OP_RETURN (EXPR_NOT, false, false);
     else
-      XBIN_OP_RETURN (EXPR_NOT, false);
+      XBIN_OP_RETURN (EXPR_NOT, false, false);
   }
 
 "(" {
@@ -887,6 +918,7 @@
 
     lexer_flags.looking_at_indirect_ref = false;
     lexer_flags.looking_for_object_index = false;
+    lexer_flags.at_beginning_of_statement = false;
 
     nesting_level.paren ();
     promptflag--;
@@ -905,6 +937,7 @@
     lexer_flags.quote_is_transpose = true;
     lexer_flags.convert_spaces_to_comma = nesting_level.is_bracket_or_brace ();
     lexer_flags.looking_for_object_index = true;
+    lexer_flags.at_beginning_of_statement = false;
 
     do_comma_insert_check ();
 
@@ -915,26 +948,27 @@
     LEXER_DEBUG (".");
 
     lexer_flags.looking_for_object_index = false;
+    lexer_flags.at_beginning_of_statement = false;
 
     TOK_RETURN ('.');
   }
 
-"+="   { LEXER_DEBUG ("+="); XBIN_OP_RETURN (ADD_EQ, false); }
-"-="   { LEXER_DEBUG ("-="); XBIN_OP_RETURN (SUB_EQ, false); }
-"*="   { LEXER_DEBUG ("*="); XBIN_OP_RETURN (MUL_EQ, false); }
-"/="   { LEXER_DEBUG ("/="); XBIN_OP_RETURN (DIV_EQ, false); }
-"\\="  { LEXER_DEBUG ("\\="); XBIN_OP_RETURN (LEFTDIV_EQ, false); }
-".+="  { LEXER_DEBUG (".+="); XBIN_OP_RETURN (ADD_EQ, false); }
-".-="  { LEXER_DEBUG (".-="); XBIN_OP_RETURN (SUB_EQ, false); }
-".*="  { LEXER_DEBUG (".*="); XBIN_OP_RETURN (EMUL_EQ, false); }
-"./="  { LEXER_DEBUG ("./="); XBIN_OP_RETURN (EDIV_EQ, false); }
-".\\=" { LEXER_DEBUG (".\\="); XBIN_OP_RETURN (ELEFTDIV_EQ, false); }
-{POW}=  { LEXER_DEBUG ("{POW}="); XBIN_OP_RETURN (POW_EQ, false); }
-{EPOW}= { LEXER_DEBUG ("{EPOW}="); XBIN_OP_RETURN (EPOW_EQ, false); }
-"&="   { LEXER_DEBUG ("&="); XBIN_OP_RETURN (AND_EQ, false); }
-"|="   { LEXER_DEBUG ("|="); XBIN_OP_RETURN (OR_EQ, false); }
-"<<="  { LEXER_DEBUG ("<<="); XBIN_OP_RETURN (LSHIFT_EQ, false); }
-">>="  { LEXER_DEBUG (">>="); XBIN_OP_RETURN (RSHIFT_EQ, false); }
+"+="   { LEXER_DEBUG ("+="); XBIN_OP_RETURN (ADD_EQ, false, false); }
+"-="   { LEXER_DEBUG ("-="); XBIN_OP_RETURN (SUB_EQ, false, false); }
+"*="   { LEXER_DEBUG ("*="); XBIN_OP_RETURN (MUL_EQ, false, false); }
+"/="   { LEXER_DEBUG ("/="); XBIN_OP_RETURN (DIV_EQ, false, false); }
+"\\="  { LEXER_DEBUG ("\\="); XBIN_OP_RETURN (LEFTDIV_EQ, false, false); }
+".+="  { LEXER_DEBUG (".+="); XBIN_OP_RETURN (ADD_EQ, false, false); }
+".-="  { LEXER_DEBUG (".-="); XBIN_OP_RETURN (SUB_EQ, false, false); }
+".*="  { LEXER_DEBUG (".*="); XBIN_OP_RETURN (EMUL_EQ, false, false); }
+"./="  { LEXER_DEBUG ("./="); XBIN_OP_RETURN (EDIV_EQ, false, false); }
+".\\=" { LEXER_DEBUG (".\\="); XBIN_OP_RETURN (ELEFTDIV_EQ, false, false); }
+{POW}=  { LEXER_DEBUG ("{POW}="); XBIN_OP_RETURN (POW_EQ, false, false); }
+{EPOW}= { LEXER_DEBUG ("{EPOW}="); XBIN_OP_RETURN (EPOW_EQ, false, false); }
+"&="   { LEXER_DEBUG ("&="); XBIN_OP_RETURN (AND_EQ, false, false); }
+"|="   { LEXER_DEBUG ("|="); XBIN_OP_RETURN (OR_EQ, false, false); }
+"<<="  { LEXER_DEBUG ("<<="); XBIN_OP_RETURN (LSHIFT_EQ, false, false); }
+">>="  { LEXER_DEBUG (">>="); XBIN_OP_RETURN (RSHIFT_EQ, false, false); }
 
 \{{S}* {
     LEXER_DEBUG ("\\{{S}*");
@@ -948,6 +982,7 @@
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = true;
     lexer_flags.looking_for_object_index = false;
+    lexer_flags.at_beginning_of_statement = false;
 
     promptflag--;
     eat_whitespace ();
@@ -963,6 +998,7 @@
     lexer_flags.looking_at_object_index.pop_front ();
 
     lexer_flags.looking_for_object_index = true;
+    lexer_flags.at_beginning_of_statement = false;
 
     nesting_level.remove ();
 
@@ -1416,18 +1452,21 @@
       switch (kw->kw_id)
        {
        case break_kw:
-       case case_kw:
        case catch_kw:
        case continue_kw:
        case else_kw:
+       case otherwise_kw:
+       case return_kw:
+       case unwind_protect_cleanup_kw:
+         lexer_flags.at_beginning_of_statement = true;
+         break;
+
+       case case_kw:
        case elseif_kw:
        case global_kw:
-       case otherwise_kw:
-       case return_kw:
        case static_kw:
        case until_kw:
-       case unwind_protect_cleanup_kw:
-         break;
+         break;
 
        case end_kw:
          if (inside_any_object_index ()
@@ -1442,24 +1481,28 @@
              else
                {
                  yylval.tok_val = new token (token::simple_end, l, c);
+                 lexer_flags.at_beginning_of_statement = true;
                  end_tokens_expected--;
                }
            }
          break;
 
        case end_try_catch_kw:
+         yylval.tok_val = new token (token::try_catch_end, l, c);
+         lexer_flags.at_beginning_of_statement = true;
          end_tokens_expected--;
-         yylval.tok_val = new token (token::try_catch_end, l, c);
          break;
 
        case end_unwind_protect_kw:
+         yylval.tok_val = new token (token::unwind_protect_end, l, c);
+         lexer_flags.at_beginning_of_statement = true;
          end_tokens_expected--;
-         yylval.tok_val = new token (token::unwind_protect_end, l, c);
          break;
 
        case endfor_kw:
+         yylval.tok_val = new token (token::for_end, l, c);
+         lexer_flags.at_beginning_of_statement = true;
          end_tokens_expected--;
-         yylval.tok_val = new token (token::for_end, l, c);
          break;
 
        case endfunction_kw:
@@ -1469,40 +1512,52 @@
            else
              {
                yylval.tok_val = new token (token::function_end, l, c);
+               lexer_flags.at_beginning_of_statement = true;
                end_tokens_expected--;
              }
          }
          break;
 
        case endif_kw:
+         yylval.tok_val = new token (token::if_end, l, c);
+         lexer_flags.at_beginning_of_statement = true;
          end_tokens_expected--;
-         yylval.tok_val = new token (token::if_end, l, c);
          break;
 
        case endswitch_kw:
+         yylval.tok_val = new token (token::switch_end, l, c);
+         lexer_flags.at_beginning_of_statement = true;
          end_tokens_expected--;
-         yylval.tok_val = new token (token::switch_end, l, c);
          break;
 
        case endwhile_kw:
+         yylval.tok_val = new token (token::while_end, l, c);
+         lexer_flags.at_beginning_of_statement = true;
          end_tokens_expected--;
-         yylval.tok_val = new token (token::while_end, l, c);
          break;
 
        case for_kw:
        case while_kw:
          end_tokens_expected++;
-         // Fall through...
-
-       case do_kw:
          promptflag--;
          lexer_flags.looping++;
          break;
 
+       case do_kw:
+         lexer_flags.at_beginning_of_statement = true;
+         promptflag--;
+         lexer_flags.looping++;
+         break;
+
+       case try_kw:
+       case unwind_protect_kw:
+         lexer_flags.at_beginning_of_statement = true;
+         end_tokens_expected++;
+         promptflag--;
+         break;
+
        case if_kw:
-       case try_kw:
        case switch_kw:
-       case unwind_protect_kw:
          end_tokens_expected++;
          promptflag--;
          break;
@@ -1519,6 +1574,8 @@
 
                        yylval.tok_val = new token (token::function_end, l, c);
                        token_stack.push (yylval.tok_val);
+
+                       lexer_flags.at_beginning_of_statement = true;
 
                        return END;
                      }
@@ -1888,6 +1945,7 @@
   current_input_column = 1;
   lexer_flags.quote_is_transpose = false;
   lexer_flags.convert_spaces_to_comma = true;
+  lexer_flags.at_beginning_of_statement = true;
 
   if (YY_START == COMMAND_START)
     BEGIN (INITIAL);
@@ -2332,6 +2390,7 @@
   lexer_flags.quote_is_transpose = true;
   lexer_flags.convert_spaces_to_comma = true;
   lexer_flags.looking_for_object_index = true;
+  lexer_flags.at_beginning_of_statement = false;
 
   yylval.tok_val = new token (value, yytext, input_line_number,
                              current_input_column);
@@ -2568,6 +2627,7 @@
                    gripe_single_quote_string ();
 
                   lexer_flags.looking_for_object_index = true;
+                 lexer_flags.at_beginning_of_statement = false;
 
                  return delim == '"' ? DQ_STRING : SQ_STRING;
                }
@@ -2781,6 +2841,257 @@
     }
 }
 
+static bool
+next_token_can_follow_bin_op (void)
+{
+  std::stack<char> buf;
+
+  int c = EOF;
+
+  // Skip whitespace in current statement on current line
+  while (true)
+    {
+      c = text_yyinput ();
+
+      if (! match_any (c, ",;\n") && (c == ' ' || c == '\t'))
+       buf.push (c);
+      else
+       break;
+    }
+
+  // Restore input.
+  while (! buf.empty ())
+    {
+      xunput (buf.top (), yytext);
+
+      buf.pop ();
+    }
+
+  return (isalnum (c) || match_any (c, "!\"'(-[_{~"));
+}
+
+static bool
+looks_like_command_arg (void)
+{
+  bool retval = true;
+
+  int c0 = text_yyinput ();
+
+  switch (c0)
+    {
+    // = ==
+    case '=':
+      {
+       int c1 = text_yyinput ();
+
+       if (c1 == '=')
+         {
+           int c2 = text_yyinput ();
+
+           if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
+               && next_token_can_follow_bin_op ())
+             retval = false;
+
+           xunput (c2, yytext);
+         }
+       else
+         retval = false;
+
+       xunput (c1, yytext);
+      }
+      break;
+
+    case '(':
+    case '{':
+      // Indexing.
+      retval = false;
+      break;
+
+    case '\n':
+      // EOL.
+      break;
+
+    case '\'':
+    case '"':
+      // Beginning of a character string.
+      break;
+
+    // + - ++ -- += -=
+    case '+':
+    case '-':
+      {
+       int c1 = text_yyinput ();
+
+       switch (c1)
+         {
+         case '\n':
+           // EOL.
+         case '+':
+         case '-':
+           // Unary ops, spacing doesn't matter.
+           break;
+
+         case '\t':
+         case ' ':
+           {
+             if (next_token_can_follow_bin_op ())
+               retval = false;
+           }
+           break;
+
+         case '=':
+           {
+             int c2 = text_yyinput ();
+
+             if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
+                 && next_token_can_follow_bin_op ())
+               retval = false;
+
+             xunput (c2, yytext);
+           }
+           break;
+         }
+
+       xunput (c1, yytext);
+      }
+      break;
+
+    case ':':
+    case '/':
+    case '\\':
+    case '^':
+      {
+       int c1 = text_yyinput ();
+
+       if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
+           && next_token_can_follow_bin_op ())
+         retval = false;
+
+       xunput (c1, yytext);
+      }
+      break;
+
+    // .+ .- ./ .\ .^ .* .**
+    case '.':
+      {
+       int c1 = text_yyinput ();
+
+       if (match_any (c1, "+-/\\^*"))
+         {
+           int c2 = text_yyinput ();
+
+           if (c2 == '=')
+             {
+               int c3 = text_yyinput ();
+
+               if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t')
+                   && next_token_can_follow_bin_op ())
+                 retval = false;
+
+               xunput (c3, yytext);
+             }
+           else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
+                    && next_token_can_follow_bin_op ())
+             retval = false;
+
+           xunput (c2, yytext);
+         }
+       else if (! match_any (c1, ",;\n")
+                && (! isdigit (c1) && c1 != ' ' && c1 != '\t'
+                    && c1 != '.'))
+         {
+           // Structure reference.  FIXME -- is this a complete check?
+
+           retval = false;
+         }
+
+       xunput (c1, yytext);
+      }
+      break;
+
+    // & && | || * **
+    case '&':
+    case '|':
+    case '*':
+      {
+       int c1 = text_yyinput ();
+
+       if (c1 == c0)
+         {
+           int c2 = text_yyinput ();
+
+           if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
+               && next_token_can_follow_bin_op ())
+             retval = false;
+
+           xunput (c2, yytext);
+         }
+       else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
+                && next_token_can_follow_bin_op ())
+         retval = false;
+
+       xunput (c1, yytext);
+      }
+      break;
+
+    // < <= > >=
+    case '<':
+    case '>':
+      {
+       int c1 = text_yyinput ();
+
+       if (c1 == '=')
+         {
+           int c2 = text_yyinput ();
+
+           if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
+               && next_token_can_follow_bin_op ())
+             retval = false;
+
+           xunput (c2, yytext);
+         }
+       else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
+                && next_token_can_follow_bin_op ())
+         retval = false;
+
+       xunput (c1, yytext);
+      }
+      break;
+
+    // ~= !=
+    case '~':
+    case '!':
+      {
+       int c1 = text_yyinput ();
+
+       // ~ and ! can be unary ops, so require following =.
+       if (c1 == '=')
+         {
+           int c2 = text_yyinput ();
+
+           if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
+               && next_token_can_follow_bin_op ())
+             retval = false;
+
+           xunput (c2, yytext);
+         }
+       else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
+                && next_token_can_follow_bin_op ())
+         retval = false;
+
+       xunput (c1, yytext);
+      }
+      break;
+
+    default:
+      break;
+    }
+
+  xunput (c0, yytext);
+
+  return retval;
+}
+
 // Figure out exactly what kind of token to return when we have seen
 // an identifier.  Handles keywords.  Return -1 if the identifier
 // should be ignored.
@@ -2788,6 +3099,8 @@
 static int
 handle_identifier (void)
 {
+  bool at_bos = lexer_flags.at_beginning_of_statement;
+
   std::string tok = strip_trailing_whitespace (yytext);
 
   int c = yytext[yyleng-1];
@@ -2820,7 +3133,18 @@
       return STRUCT_ELT;
     }
 
+  lexer_flags.at_beginning_of_statement = false;
+
+  // The is_keyword_token may reset
+  // lexer_flags.at_beginning_of_statement.  For example, if it sees
+  // an else token, then the next token is at the beginning of a
+  // statement.
+
   int kw_token = is_keyword_token (tok);
+
+  // If we found a keyword token, then the beginning_of_statement flag
+  // is already set.  Otherwise, we won't be at the beginning of a
+  // statement.
 
   if (lexer_flags.looking_at_function_handle)
     {
@@ -2864,8 +3188,6 @@
 
   int c1 = text_yyinput ();
 
-  bool next_tok_is_paren = (c1 == '(');
-
   bool next_tok_is_eq = false;
   if (c1 == '=')
     {
@@ -2888,13 +3210,17 @@
   // something like [ab,cd] = foo (), force the symbol to be inserted
   // as a variable in the current symbol table.
 
-  if (is_command_name (tok) && ! is_variable (tok))
+  if (! is_variable (tok))
     {
-      if (next_tok_is_eq
-         || lexer_flags.looking_at_decl_list
-         || lexer_flags.looking_at_return_list
-         || (lexer_flags.looking_at_parameter_list
-             && ! lexer_flags.looking_at_initializer_expression))
+      if (at_bos && spc_gobbled && looks_like_command_arg ())
+       {
+         BEGIN (COMMAND_START);
+       }
+      else if (next_tok_is_eq
+              || lexer_flags.looking_at_decl_list
+              || lexer_flags.looking_at_return_list
+              || (lexer_flags.looking_at_parameter_list
+                  && ! lexer_flags.looking_at_initializer_expression))
        {
          force_local_variable (tok);
        }
@@ -2902,14 +3228,8 @@
        {
          lexer_flags.pending_local_variables.insert (tok);
        }
-      else if (! (next_tok_is_paren
-                 || lexer_flags.looking_at_object_index.front ()))
-       {
-         BEGIN (COMMAND_START);
-       }
-
-      if (is_rawcommand_name (tok)
-         && ! lexer_flags.looking_at_object_index.front ())
+      else if (is_rawcommand_name (tok)
+              && ! lexer_flags.looking_at_object_index.front ())
        {
          lexer_flags.doing_rawcommand = true;
          BEGIN (COMMAND_START);
@@ -2932,7 +3252,7 @@
 
   lexer_flags.convert_spaces_to_comma = true;
 
-  if (! next_tok_is_eq)
+  if (! (next_tok_is_eq || YY_START == COMMAND_START))
     {
       lexer_flags.quote_is_transpose = true;
 
@@ -2987,6 +3307,9 @@
 
   // Object index not possible until we've seen something.
   looking_for_object_index = false;
+
+  // Yes, we are at the beginning of a statement.
+  at_beginning_of_statement = true;
 
   // No need to do comma insert or convert spaces to comma at
   // beginning of input. 
diff --git a/src/parse.y b/src/parse.y
--- a/src/parse.y
+++ b/src/parse.y
@@ -1,7 +1,7 @@
 /*
 
 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-              2002, 2003, 2004, 2005, 2006, 2007, 2008 John W. Eaton
+              2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 John W. Eaton
 
 This file is part of Octave.
 
@@ -544,6 +544,29 @@
                  { $$ = make_statement ($1); }
                | command
                  { $$ = make_statement ($1); }
+               | word_list_cmd
+                 { $$ = make_statement ($1); }
+               ;
+
+// =================
+// Word-list command
+// =================
+
+// These are not really like expressions since they can't appear on
+// the RHS of an assignment.  But they are also not like commands (IF,
+// WHILE, etc.
+
+word_list_cmd  : identifier word_list
+                 { $$ = make_index_expression ($1, $2, '('); }
+               ;
+
+word_list      : string
+                 { $$ = new tree_argument_list ($1); }
+               | word_list string
+                 {
+                   $1->append ($2);
+                   $$ = $1;
+                 }
                ;
 
 // ===========
@@ -855,22 +878,7 @@
                  { $$ = make_assign_op (OR_EQ, $1, $2, $3); }
                ;
 
-word_list_cmd  : identifier word_list
-                 { $$ = make_index_expression ($1, $2, '('); }
-               ;
-
-word_list      : string
-                 { $$ = new tree_argument_list ($1); }
-               | word_list string
-                 {
-                   $1->append ($2);
-                   $$ = $1;
-                 }
-               ;
-
 expression     : simple_expr
-                 { $$ = $1; }
-               | word_list_cmd
                  { $$ = $1; }
                | assign_expr
                  { $$ = $1; }
diff --git a/test/test_io.m b/test/test_io.m
--- a/test/test_io.m
+++ b/test/test_io.m
@@ -196,7 +196,7 @@
 %! STR.struct_fld.y = 1;
 %! 
 %! save struct.dat -struct STR;
-%! STR = load struct.dat ;
+%! STR = load ("struct.dat");
 %!
 %! assert(STR.scalar_fld == 1 && ...
 %!     STR.matrix_fld == [1.1,2;3,4] && ...
@@ -206,7 +206,7 @@
 %!
 %!
 %! save -binary struct.dat -struct STR matrix_fld str*_fld;
-%! STR = load struct.dat ;
+%! STR = load ("struct.dat");
 %!
 %! assert(!isfield(STR,"scalar_fld") && ...
 %!     STR.matrix_fld == [1.1,2;3,4] && ...
diff --git a/test/test_prefer.m b/test/test_prefer.m
--- a/test/test_prefer.m
+++ b/test/test_prefer.m
@@ -134,7 +134,7 @@
 %! ped = print_empty_dimensions ();
 %! print_empty_dimensions (0);
 %! a = cell (1, 1);
-%! b = type -q a;
+%! b = type ("-q", "a");
 %! assert (!isempty (findstr (b{1}, "[]")));
 %! assert (isempty (findstr (b{1} ,"[](0x0)")));
 %! print_empty_dimensions (ped);
@@ -144,7 +144,7 @@
 %! ped = print_empty_dimensions ();
 %! print_empty_dimensions (1);
 %! a = cell (1, 1);
-%! b = type -q a;
+%! b = type ("-q", "a");
 %! assert (!isempty (findstr (b{1}, "[](0x0)")));
 %! print_empty_dimensions (ped);
 

reply via email to

[Prev in Thread] Current Thread [Next in Thread]