>From d10bdeeb31225238f47a9002e891c62c06cd6a47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 30 Jun 2015 09:55:14 +0200 Subject: [PATCH] Work around a libidn <= 1.30 vulnerability * src/iri.c: Add _utf8_is_valid() to check UTF-8 sequences before passing them to idna_to_ascii_8z(). --- src/iri.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/iri.c b/src/iri.c index 10ae994..faeca90 100644 --- a/src/iri.c +++ b/src/iri.c @@ -219,6 +219,51 @@ locale_to_utf8 (const char *str) return str; } +/* + * Work around a libidn <= 1.30 vulnerability. + * + * The function checks for a valid UTF-8 character sequence before + * passing it to idna_to_ascii_8z(). + * + * [1] http://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html + * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html + * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html + */ +static bool +_utf8_is_valid(const char *utf8) +{ + int i, offset = 0; + const unsigned char *s = (const unsigned char *) utf8; + + while (*s) + { + if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */ + offset = 1; + else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ + offset = 2; + else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ + offset = 3; + else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ + offset = 4; + else if ((*s & 0xFC) == 0xF8) + offset = 5; + else if ((*s & 0xFE) == 0xFC) + offset = 6; + else + return false; + + for (i = 1; i < offset; i++) + { + if ((s[i] & 0xC0) != 0x80) + return false; + } + + s += offset; + } + + return true; +} + /* Try to "ASCII encode" UTF-8 host. Return the new domain on success or NULL on error. */ char * @@ -235,6 +280,13 @@ idn_encode (struct iri *i, char *host) return NULL; /* Nothing to encode or an error occured */ } + if (!_utf8_is_valid(utf8_encoded ? utf8_encoded : host)) + { + xfree (utf8_encoded); + logprintf (LOG_VERBOSE, _("Invalid UTF-8 sequence\n")); + return NULL; + } + /* Store in ascii_encoded the ASCII UTF-8 NULL terminated string */ ret = idna_to_ascii_8z (utf8_encoded ? utf8_encoded : host, &ascii_encoded, IDNA_FLAGS); xfree (utf8_encoded); -- 1.9.1