[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: xgettext PHP parsing bug with associative arrays
From: |
Bruno Haible |
Subject: |
Re: xgettext PHP parsing bug with associative arrays |
Date: |
Mon, 19 Mar 2007 14:51:01 +0100 |
User-agent: |
KMail/1.9.1 |
Hello,
Robert Vock wrote:
> we are just switching to gettext in our PHP project and I noticed a
> bug in xgettext, when extracting from PHP files with associative
> arrays:
>
> <?php
> $array = array( 'key' => 'value' );
> echo gettext( $array['key'] );
> ?>
>
> xgettext extracts the msgid "key" from this code example.
> The resulting po file will contain:
> msgid "key"
> msgstr ""
>
> I think, to fix this bug, the x-php.c must also parse the square
> brackets and ignore the inner contents. (Just as it does with the
> parentheses)
Yes, you are right. Find attached a patch that implements this. It will
be contained in gettext 0.16.2.
> PS:
> Is there a bugtracker for GNU projects? I did not find one and I could
> not find anyone who had the same problem...
"xgettext --help" tells to report bugs to this mailing list. If the bug
tracker was the preferred way of working, we would mention it in the
"xgettext --help" output.
Bruno
2007-03-17 Bruno Haible <address@hidden>
* x-php.c (enum token_type_ty): New values token_type_lbracket,
token_type_rbracket.
(x_php_lex): Recognize also token_type_lbracket, token_type_rbracket.
(extract_balanced): Renamed from extract_parenthesized. Add 'delim'
argument. Handle token_type_lbracket and token_type_rbracket.
(extract_php): Update.
Reported by Robert Vock <address@hidden>.
*** gettext-tools/src/x-php.c 19 Feb 2007 03:31:37 -0000 1.22
--- gettext-tools/src/x-php.c 17 Mar 2007 11:52:02 -0000
***************
*** 739,744 ****
--- 739,746 ----
token_type_lparen, /* ( */
token_type_rparen, /* ) */
token_type_comma, /* , */
+ token_type_lbracket, /* [ */
+ token_type_rbracket, /* ] */
token_type_string_literal, /* "abc" */
token_type_symbol, /* symbol, number */
token_type_other /* misc. operator */
***************
*** 1092,1097 ****
--- 1094,1107 ----
tp->type = token_type_comma;
return;
+ case '[':
+ tp->type = token_type_lbracket;
+ return;
+
+ case ']':
+ tp->type = token_type_rbracket;
+ return;
+
case '<':
{
int c2 = phase1_getc ();
***************
*** 1261,1274 ****
and msgid_plural can contain subexpressions of the same form. */
! /* Extract messages until the next balanced closing parenthesis.
Extracted messages are added to MLP.
Return true upon eof, false upon closing parenthesis. */
static bool
! extract_parenthesized (message_list_ty *mlp,
! flag_context_ty outer_context,
! flag_context_list_iterator_ty context_iter,
! struct arglist_parser *argparser)
{
/* Current argument number. */
int arg = 1;
--- 1271,1287 ----
and msgid_plural can contain subexpressions of the same form. */
! /* Extract messages until the next balanced closing parenthesis or bracket.
Extracted messages are added to MLP.
+ DELIM can be either token_type_rparen or token_type_rbracket, or
+ token_type_eof to accept both.
Return true upon eof, false upon closing parenthesis. */
static bool
! extract_balanced (message_list_ty *mlp,
! token_type_ty delim,
! flag_context_ty outer_context,
! flag_context_list_iterator_ty context_iter,
! struct arglist_parser *argparser)
{
/* Current argument number. */
int arg = 1;
***************
*** 1317,1325 ****
continue;
case token_type_lparen:
! if (extract_parenthesized (mlp, inner_context, next_context_iter,
! arglist_parser_alloc (mlp,
! state ? next_shapes
: NULL)))
{
arglist_parser_done (argparser, arg);
return true;
--- 1330,1339 ----
continue;
case token_type_lparen:
! if (extract_balanced (mlp, token_type_rparen,
! inner_context, next_context_iter,
! arglist_parser_alloc (mlp,
! state ? next_shapes :
NULL)))
{
arglist_parser_done (argparser, arg);
return true;
***************
*** 1329,1336 ****
continue;
case token_type_rparen:
! arglist_parser_done (argparser, arg);
! return false;
case token_type_comma:
arg++;
--- 1343,1356 ----
continue;
case token_type_rparen:
! if (delim == token_type_rparen || delim == token_type_eof)
! {
! arglist_parser_done (argparser, arg);
! return false;
! }
! next_context_iter = null_context_list_iterator;
! state = 0;
! continue;
case token_type_comma:
arg++;
***************
*** 1342,1347 ****
--- 1362,1386 ----
state = 0;
continue;
+ case token_type_lbracket:
+ if (extract_balanced (mlp, token_type_rbracket,
+ null_context, null_context_list_iterator,
+ arglist_parser_alloc (mlp, NULL)))
+ {
+ arglist_parser_done (argparser, arg);
+ return true;
+ }
+
+ case token_type_rbracket:
+ if (delim == token_type_rbracket || delim == token_type_eof)
+ {
+ arglist_parser_done (argparser, arg);
+ return false;
+ }
+ next_context_iter = null_context_list_iterator;
+ state = 0;
+ continue;
+
case token_type_string_literal:
{
lex_pos_ty pos;
***************
*** 1400,1409 ****
/* Initial mode is HTML mode, not PHP mode. */
skip_html ();
! /* Eat tokens until eof is seen. When extract_parenthesized returns
due to an unbalanced closing parenthesis, just restart it. */
! while (!extract_parenthesized (mlp, null_context,
null_context_list_iterator,
! arglist_parser_alloc (mlp, NULL)))
;
/* Close scanner. */
--- 1439,1449 ----
/* Initial mode is HTML mode, not PHP mode. */
skip_html ();
! /* Eat tokens until eof is seen. When extract_balanced returns
due to an unbalanced closing parenthesis, just restart it. */
! while (!extract_balanced (mlp, token_type_eof,
! null_context, null_context_list_iterator,
! arglist_parser_alloc (mlp, NULL)))
;
/* Close scanner. */