[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Bug-wget] [PATCH] Use u8_check() instead our own utf8 checking
From: |
Ángel González |
Subject: |
[Bug-wget] [PATCH] Use u8_check() instead our own utf8 checking |
Date: |
Mon, 06 Jul 2015 02:05:57 +0200 |
User-agent: |
Thunderbird |
* bootstrap.conf: Enable u8-check module
* src/iri.c: Remove _utf8_is_valid()
---
This is probably the shortest-lived function in wget :)
I didn't change the urls, but there are probably more suited ones.
bootstrap.conf | 1 +
src/iri.c | 62
++++++++++++++--------------------------------------------
2 files changed, 16 insertions(+), 47 deletions(-)
diff --git a/bootstrap.conf b/bootstrap.conf
index 4fff711..376a549 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -85,6 +85,7 @@ strtoll
timegm
tmpdir
unlocked-io
+unistr/u8-check
update-copyright
vasprintf
vsnprintf
diff --git a/src/iri.c b/src/iri.c
index a6b1c6e..7d66e9d 100644
--- a/src/iri.c
+++ b/src/iri.c
@@ -43,6 +43,7 @@ as that of the covered work. */
#include "url.h"
#include "c-strcase.h"
#include "c-strcasestr.h"
+#include "unistr.h"
#include "xstrndup.h"
/* RFC3987 section 3.1 mandates STD3 ASCII RULES */
@@ -220,50 +221,6 @@ locale_to_utf8 (const char *str)
return str;
}
-/*
- * Work around a libidn <= 1.30 vulnerability.
- *
- * The function checks for a valid UTF-8 character sequence before
- * passing it to idna_to_ascii_8z().
- *
- * [1] http://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
- * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
- * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html
- */
-static bool
-_utf8_is_valid(const char *utf8)
-{
- const unsigned char *s = (const unsigned char *) utf8;
-
- while (*s)
- {
- if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
- s++;
- else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */
- {
- if ((s[1] & 0xC0) != 0x80)
- return false;
- s+=2;
- }
- else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */
- {
- if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
- return false;
- s+=3;
- }
- else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx
10xxxxxx */
- {
- if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] &
0xC0) != 0x80)
- return false;
- s+=4;
- }
- else
- return false;
- }
-
- return true;
-}
-
/* Try to "ASCII encode" UTF-8 host. Return the new domain on success
or NULL
on error. */
char *
@@ -272,6 +229,7 @@ idn_encode (const struct iri *i, const char *host)
int ret;
char *ascii_encoded;
char *utf8_encoded = NULL;
+ const char *utf8_host;
/* Encode to UTF-8 if not done */
if (!i->utf8_encode)
@@ -280,16 +238,26 @@ idn_encode (const struct iri *i, const char *host)
return NULL; /* Nothing to encode or an error occured */
}
- if (!_utf8_is_valid(utf8_encoded ? utf8_encoded : host))
+ utf8_host = utf8_encoded ? utf8_encoded : host;
+
+ /*
+ * Verify that utf8_host is a valid UTF-8 character sequence before
+ * passing it to idna_to_ascii_8z().
+ *
+ * [1]
https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
+ * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
+ * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html
+ */
+ if (u8_check (utf8_host, strlen(utf8_host)))
{
logprintf (LOG_VERBOSE, _("Invalid UTF-8 sequence: %s\n"),
- quote(utf8_encoded ? utf8_encoded : host));
+ quote (utf8_host));
xfree (utf8_encoded);
return NULL;
}
/* Store in ascii_encoded the ASCII UTF-8 NULL terminated string */
- ret = idna_to_ascii_8z (utf8_encoded ? utf8_encoded : host,
&ascii_encoded, IDNA_FLAGS);
+ ret = idna_to_ascii_8z (utf8_host, &ascii_encoded, IDNA_FLAGS);
xfree (utf8_encoded);
if (ret != IDNA_SUCCESS)
--
2.4.3