[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [bug-gettext] [PATCH 4/4] xg-js: fix end-of string miss bug due to b
From: |
Daiki Ueno |
Subject: |
Re: [bug-gettext] [PATCH 4/4] xg-js: fix end-of string miss bug due to backslashes |
Date: |
Sun, 12 May 2013 17:10:48 +0900 |
User-agent: |
Gnus/5.13 (Gnus v5.13) Emacs/23.4 (gnu/linux) |
Daiki Ueno <address@hidden> writes:
> Thanks for looking into this. However, I doubt that backslash_counter
> is even needed for JavaScript parser. The original code in the Python
> parser has a check like this:
>
> if (c == quote_char && (interpret_ansic || (*backslash_counter & 1) == 0))
>
> In JavaScript parser, interpret_ansic is always true
I've simplified phase7_getuc by removing BACKSLASH_COUNTER argument and
rebased the patches (pushed to the maint branch).
> That means, "\xxx" should be treated as "xxx", while the current
> implementation treats it as "\\xxx".
The attached is a patch for this. Testing would be appreciated.
Regards,
--
Daiki Ueno
>From d9a927107b878ed313245815fdaa3f44b7f4a08e Mon Sep 17 00:00:00 2001
From: Daiki Ueno <address@hidden>
Date: Sun, 12 May 2013 16:54:58 +0900
Subject: [PATCH] Improve backslash handling in JavaScript scanner
---
gettext-tools/src/ChangeLog | 5 +
gettext-tools/src/x-javascript.c | 223 ++++++++++++++---------------
gettext-tools/tests/ChangeLog | 4 +
gettext-tools/tests/xgettext-javascript-4 | 6 +-
4 files changed, 125 insertions(+), 113 deletions(-)
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index d329dfc..eb96da9 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,5 +1,10 @@
2013-05-12 Daiki Ueno <address@hidden>
+ * x-javascript.c (phase7_getuc): Treat non-legitimate character
+ escape sequences more strictly.
+
+2013-05-12 Daiki Ueno <address@hidden>
+
Fix end-of-string handling in JavaScript scanner.
* x-javascript.c (phase7_getuc): Remove unused BACKSLASH_COUNTER
argument; all callers changed.
diff --git a/gettext-tools/src/x-javascript.c b/gettext-tools/src/x-javascript.c
index a9296df..6542355 100644
--- a/gettext-tools/src/x-javascript.c
+++ b/gettext-tools/src/x-javascript.c
@@ -960,7 +960,10 @@ free_token (token_ty *tp)
sequences or deprecated octal escape sequences:
\xXX, \OOO
Any unicode point can be entered using Unicode escape sequences:
- \uNNNN */
+ \uNNNN
+ If a sequence after a backslash is not a legitimate character
+ escape sequence, the character value is the sequence itself without
+ a backslash. For example, \xxx is treated as xxx. */
static int
phase7_getuc (int quote_char)
@@ -976,7 +979,7 @@ phase7_getuc (int quote_char)
return P7_EOF;
if (c == quote_char)
- return P7_STRING_END;
+ return P7_STRING_END;
if (c == '\n')
{
@@ -989,128 +992,124 @@ phase7_getuc (int quote_char)
}
if (c != '\\')
- return UNICODE (c);
+ return UNICODE (c);
/* Dispatch according to the character following the backslash. */
c = phase2_getc ();
if (c == UEOF)
- return UNICODE ('\\');
+ return P7_EOF;
- switch (c)
+ switch (c)
+ {
+ case '\n':
+ continue;
+ case 'b':
+ return UNICODE ('\b');
+ case 'f':
+ return UNICODE ('\f');
+ case 'n':
+ return UNICODE ('\n');
+ case 'r':
+ return UNICODE ('\r');
+ case 't':
+ return UNICODE ('\t');
+ case 'v':
+ return UNICODE ('\v');
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
{
- case '\n':
- continue;
- case '\\':
- return UNICODE (c);
- case '\'': case '"':
- return UNICODE (c);
- case 'b':
- return UNICODE ('\b');
- case 'f':
- return UNICODE ('\f');
- case 'n':
- return UNICODE ('\n');
- case 'r':
- return UNICODE ('\r');
- case 't':
- return UNICODE ('\t');
- case 'v':
- return UNICODE ('\v');
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7':
- {
- int n = c - '0';
+ int n = c - '0';
- c = phase2_getc ();
- if (c != UEOF)
- {
- if (c >= '0' && c <= '7')
- {
- n = (n << 3) + (c - '0');
- c = phase2_getc ();
- if (c != UEOF)
- {
- if (c >= '0' && c <= '7')
- n = (n << 3) + (c - '0');
- else
- phase2_ungetc (c);
- }
- }
- else
- phase2_ungetc (c);
- }
- return UNICODE (n);
- }
- case 'x':
- {
- int c1 = phase2_getc ();
- int n1;
-
- if (c1 >= '0' && c1 <= '9')
- n1 = c1 - '0';
- else if (c1 >= 'A' && c1 <= 'F')
- n1 = c1 - 'A' + 10;
- else if (c1 >= 'a' && c1 <= 'f')
- n1 = c1 - 'a' + 10;
- else
- n1 = -1;
+ c = phase2_getc ();
+ if (c != UEOF)
+ {
+ if (c >= '0' && c <= '7')
+ {
+ n = (n << 3) + (c - '0');
+ c = phase2_getc ();
+ if (c != UEOF)
+ {
+ if (c >= '0' && c <= '7')
+ n = (n << 3) + (c - '0');
+ else
+ phase2_ungetc (c);
+ }
+ }
+ else
+ phase2_ungetc (c);
+ }
+ return UNICODE (n);
+ }
+ case 'x':
+ {
+ int c1 = phase2_getc ();
+ int n1;
+
+ if (c1 >= '0' && c1 <= '9')
+ n1 = c1 - '0';
+ else if (c1 >= 'A' && c1 <= 'F')
+ n1 = c1 - 'A' + 10;
+ else if (c1 >= 'a' && c1 <= 'f')
+ n1 = c1 - 'a' + 10;
+ else
+ n1 = -1;
- if (n1 >= 0)
- {
- int c2 = phase2_getc ();
- int n2;
-
- if (c2 >= '0' && c2 <= '9')
- n2 = c2 - '0';
- else if (c2 >= 'A' && c2 <= 'F')
- n2 = c2 - 'A' + 10;
- else if (c2 >= 'a' && c2 <= 'f')
- n2 = c2 - 'a' + 10;
- else
- n2 = -1;
-
- if (n2 >= 0)
- {
- int n = (n1 << 4) + n2;
- return UNICODE (n);
- }
+ if (n1 >= 0)
+ {
+ int c2 = phase2_getc ();
+ int n2;
+
+ if (c2 >= '0' && c2 <= '9')
+ n2 = c2 - '0';
+ else if (c2 >= 'A' && c2 <= 'F')
+ n2 = c2 - 'A' + 10;
+ else if (c2 >= 'a' && c2 <= 'f')
+ n2 = c2 - 'a' + 10;
+ else
+ n2 = -1;
+
+ if (n2 >= 0)
+ {
+ int n = (n1 << 4) + n2;
+ return UNICODE (n);
+ }
- phase2_ungetc (c2);
- }
- phase2_ungetc (c1);
- phase2_ungetc (c);
- return UNICODE ('\\');
- }
- case 'u':
- {
- unsigned char buf[4];
- unsigned int n = 0;
- int i;
+ phase2_ungetc (c2);
+ }
+ phase2_ungetc (c1);
+ return UNICODE (c);
+ }
+ case 'u':
+ {
+ unsigned char buf[4];
+ unsigned int n = 0;
+ int i;
- for (i = 0; i < 4; i++)
- {
- int c1 = phase2_getc ();
-
- if (c1 >= '0' && c1 <= '9')
- n = (n << 4) + (c1 - '0');
- else if (c1 >= 'A' && c1 <= 'F')
- n = (n << 4) + (c1 - 'A' + 10);
- else if (c1 >= 'a' && c1 <= 'f')
- n = (n << 4) + (c1 - 'a' + 10);
- else
- {
- phase2_ungetc (c1);
- while (--i >= 0)
- phase2_ungetc (buf[i]);
- phase2_ungetc (c);
- return UNICODE ('\\');
- }
+ for (i = 0; i < 4; i++)
+ {
+ int c1 = phase2_getc ();
+
+ if (c1 >= '0' && c1 <= '9')
+ n = (n << 4) + (c1 - '0');
+ else if (c1 >= 'A' && c1 <= 'F')
+ n = (n << 4) + (c1 - 'A' + 10);
+ else if (c1 >= 'a' && c1 <= 'f')
+ n = (n << 4) + (c1 - 'a' + 10);
+ else
+ {
+ phase2_ungetc (c1);
+ while (--i >= 0)
+ phase2_ungetc (buf[i]);
+ return UNICODE (c);
+ }
- buf[i] = c1;
- }
- return UNICODE (n);
- }
+ buf[i] = c1;
+ }
+ return UNICODE (n);
}
+ default:
+ return UNICODE (c);
+ }
}
}
diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog
index 3727ac4..721c29a 100644
--- a/gettext-tools/tests/ChangeLog
+++ b/gettext-tools/tests/ChangeLog
@@ -1,3 +1,7 @@
+2013-05-12 Daiki Ueno <address@hidden>
+
+ * xgettext-javascript-4: Add a test for normal escape sequences.
+
2013-05-12 Andreas Stricker <address@hidden>
Improve JavaScript scanner tests.
diff --git a/gettext-tools/tests/xgettext-javascript-4
b/gettext-tools/tests/xgettext-javascript-4
index 92805e9..3b4ff47 100755
--- a/gettext-tools/tests/xgettext-javascript-4
+++ b/gettext-tools/tests/xgettext-javascript-4
@@ -1,6 +1,6 @@
#!/bin/sh
-# Test of JavaScript Unicode support.
+# Test of JavaScript escape sequences in string literals.
tmpfiles=""
trap 'rm -fr $tmpfiles' 1 2 3 15
@@ -9,6 +9,7 @@ tmpfiles="$tmpfiles xg-js-4.js"
cat <<\EOF > xg-js-4.js
var s1 = _("Unicode escape \u3042");
var s2 = _("Surrogate pair \uD835\uDC9C");
+var s3 = _("Escape sequence \1411 \x622 \xxx \y");
EOF
tmpfiles="$tmpfiles xg-js-4.err xg-js-4.tmp xg-js-4.pot"
@@ -44,6 +45,9 @@ msgstr ""
msgid "Surrogate pair 𝒜"
msgstr ""
+
+msgid "Escape sequence a1 b2 xxx y"
+msgstr ""
EOF
: ${DIFF=diff}
--
1.7.10.4
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [bug-gettext] [PATCH 4/4] xg-js: fix end-of string miss bug due to backslashes,
Daiki Ueno <=