bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: xgettext PHP parsing bug with associative arrays


From: Bruno Haible
Subject: Re: xgettext PHP parsing bug with associative arrays
Date: Mon, 19 Mar 2007 14:51:01 +0100
User-agent: KMail/1.9.1

Hello,

Robert Vock wrote:
> we are just switching to gettext in our PHP project and I noticed a
> bug in xgettext, when extracting from PHP files with associative
> arrays:
> 
> <?php
> $array = array( 'key' => 'value' );
> echo gettext( $array['key'] );
> ?>
> 
> xgettext extracts the msgid "key" from this code example.
> The resulting po file will contain:
> msgid "key"
> msgstr ""
> 
> I think, to fix this bug, the x-php.c must also parse the square
> brackets and ignore the inner contents. (Just as it does with the
> parentheses)

Yes, you are right. Find attached a patch that implements this. It will
be contained in gettext 0.16.2.

> PS:
> Is there a bugtracker for GNU projects? I did not find one and I could
> not find anyone who had the same problem...

"xgettext --help" tells to report bugs to this mailing list. If the bug
tracker was the preferred way of working, we would mention it in the
"xgettext --help" output.

Bruno


2007-03-17  Bruno Haible  <address@hidden>

        * x-php.c (enum token_type_ty): New values token_type_lbracket,
        token_type_rbracket.
        (x_php_lex): Recognize also token_type_lbracket, token_type_rbracket.
        (extract_balanced): Renamed from extract_parenthesized. Add 'delim'
        argument. Handle token_type_lbracket and token_type_rbracket.
        (extract_php): Update.
        Reported by Robert Vock <address@hidden>.

*** gettext-tools/src/x-php.c   19 Feb 2007 03:31:37 -0000      1.22
--- gettext-tools/src/x-php.c   17 Mar 2007 11:52:02 -0000
***************
*** 739,744 ****
--- 739,746 ----
    token_type_lparen,          /* ( */
    token_type_rparen,          /* ) */
    token_type_comma,           /* , */
+   token_type_lbracket,                /* [ */
+   token_type_rbracket,                /* ] */
    token_type_string_literal,  /* "abc" */
    token_type_symbol,          /* symbol, number */
    token_type_other            /* misc. operator */
***************
*** 1092,1097 ****
--- 1094,1107 ----
          tp->type = token_type_comma;
          return;
  
+       case '[':
+         tp->type = token_type_lbracket;
+         return;
+ 
+       case ']':
+         tp->type = token_type_rbracket;
+         return;
+ 
        case '<':
          {
            int c2 = phase1_getc ();
***************
*** 1261,1274 ****
     and msgid_plural can contain subexpressions of the same form.  */
  
  
! /* Extract messages until the next balanced closing parenthesis.
     Extracted messages are added to MLP.
     Return true upon eof, false upon closing parenthesis.  */
  static bool
! extract_parenthesized (message_list_ty *mlp,
!                      flag_context_ty outer_context,
!                      flag_context_list_iterator_ty context_iter,
!                      struct arglist_parser *argparser)
  {
    /* Current argument number.  */
    int arg = 1;
--- 1271,1287 ----
     and msgid_plural can contain subexpressions of the same form.  */
  
  
! /* Extract messages until the next balanced closing parenthesis or bracket.
     Extracted messages are added to MLP.
+    DELIM can be either token_type_rparen or token_type_rbracket, or
+    token_type_eof to accept both.
     Return true upon eof, false upon closing parenthesis.  */
  static bool
! extract_balanced (message_list_ty *mlp,
!                 token_type_ty delim,
!                 flag_context_ty outer_context,
!                 flag_context_list_iterator_ty context_iter,
!                 struct arglist_parser *argparser)
  {
    /* Current argument number.  */
    int arg = 1;
***************
*** 1317,1325 ****
          continue;
  
        case token_type_lparen:
!         if (extract_parenthesized (mlp, inner_context, next_context_iter,
!                                    arglist_parser_alloc (mlp,
!                                                          state ? next_shapes 
: NULL)))
            {
              arglist_parser_done (argparser, arg);
              return true;
--- 1330,1339 ----
          continue;
  
        case token_type_lparen:
!         if (extract_balanced (mlp, token_type_rparen,
!                               inner_context, next_context_iter,
!                               arglist_parser_alloc (mlp,
!                                                     state ? next_shapes : 
NULL)))
            {
              arglist_parser_done (argparser, arg);
              return true;
***************
*** 1329,1336 ****
          continue;
  
        case token_type_rparen:
!         arglist_parser_done (argparser, arg);
!         return false;
  
        case token_type_comma:
          arg++;
--- 1343,1356 ----
          continue;
  
        case token_type_rparen:
!         if (delim == token_type_rparen || delim == token_type_eof)
!           {
!             arglist_parser_done (argparser, arg);
!             return false;
!           }
!         next_context_iter = null_context_list_iterator;
!         state = 0;
!         continue;
  
        case token_type_comma:
          arg++;
***************
*** 1342,1347 ****
--- 1362,1386 ----
          state = 0;
          continue;
  
+       case token_type_lbracket:
+         if (extract_balanced (mlp, token_type_rbracket,
+                               null_context, null_context_list_iterator,
+                               arglist_parser_alloc (mlp, NULL)))
+           {
+             arglist_parser_done (argparser, arg);
+             return true;
+           }
+ 
+       case token_type_rbracket:
+         if (delim == token_type_rbracket || delim == token_type_eof)
+           {
+             arglist_parser_done (argparser, arg);
+             return false;
+           }
+         next_context_iter = null_context_list_iterator;
+         state = 0;
+         continue;
+ 
        case token_type_string_literal:
          {
            lex_pos_ty pos;
***************
*** 1400,1409 ****
    /* Initial mode is HTML mode, not PHP mode.  */
    skip_html ();
  
!   /* Eat tokens until eof is seen.  When extract_parenthesized returns
       due to an unbalanced closing parenthesis, just restart it.  */
!   while (!extract_parenthesized (mlp, null_context, 
null_context_list_iterator,
!                                arglist_parser_alloc (mlp, NULL)))
      ;
  
    /* Close scanner.  */
--- 1439,1449 ----
    /* Initial mode is HTML mode, not PHP mode.  */
    skip_html ();
  
!   /* Eat tokens until eof is seen.  When extract_balanced returns
       due to an unbalanced closing parenthesis, just restart it.  */
!   while (!extract_balanced (mlp, token_type_eof,
!                           null_context, null_context_list_iterator,
!                           arglist_parser_alloc (mlp, NULL)))
      ;
  
    /* Close scanner.  */





reply via email to

[Prev in Thread] Current Thread [Next in Thread]