>From 5a3f0714a14a5d9554185d72da337be471f25484 Mon Sep 17 00:00:00 2001 From: Ander Juaristi Date: Mon, 13 Apr 2015 16:28:36 +0200 Subject: [PATCH] Fixed incorrect handling of reserved characters when converting to UTF-8. * src/url.c (url_unescape_1): New static function. (url_unescape): Calls url_unescape_1 with mask zero. Preserves same behavior as before. Only code changes. (url_unescape_except_reserved): New function. Unescapes unsafe characters only (leaves reserved characters in their original percent encoding). --- src/iri.c | 2 +- src/url.c | 40 +++++++++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/iri.c b/src/iri.c index 5278dee..10ae994 100644 --- a/src/iri.c +++ b/src/iri.c @@ -136,7 +136,7 @@ do_conversion (const char *tocode, const char *fromcode, char const *in_org, siz /* iconv() has to work on an unescaped string */ in_save = in = xstrndup (in_org, inlen); - url_unescape(in); + url_unescape_except_reserved (in); inlen = strlen(in); len = outlen = inlen * 2; diff --git a/src/url.c b/src/url.c index e78dbc6..73c8dd0 100644 --- a/src/url.c +++ b/src/url.c @@ -161,17 +161,8 @@ static const unsigned char urlchr_table[256] = #undef U #undef RU -/* URL-unescape the string S. - - This is done by transforming the sequences "%HH" to the character - represented by the hexadecimal digits HH. If % is not followed by - two hexadecimal digits, it is inserted literally. - - The transformation is done in place. If you need the original - string intact, make a copy before calling this function. */ - -void -url_unescape (char *s) +static void +url_unescape_1 (char *s, unsigned char mask) { char *t = s; /* t - tortoise */ char *h = s; /* h - hare */ @@ -190,6 +181,8 @@ url_unescape (char *s) if (!h[1] || !h[2] || !(c_isxdigit (h[1]) && c_isxdigit (h[2]))) goto copychar; c = X2DIGITS_TO_NUM (h[1], h[2]); + if (urlchr_test(c, mask)) + goto copychar; /* Don't unescape %00 because there is no way to insert it into a C string without effectively truncating it. */ if (c == '\0') @@ -201,6 +194,31 @@ url_unescape (char *s) *t = '\0'; } +/* URL-unescape the string S. + + This is done by transforming the sequences "%HH" to the character + represented by the hexadecimal digits HH. If % is not followed by + two hexadecimal digits, it is inserted literally. + + The transformation is done in place. If you need the original + string intact, make a copy before calling this function. */ +void +url_unescape (char *s) +{ + url_unescape_1 (s, 0); +} + +/* URL-unescape the string S. + + This functions behaves identically as url_unescape(), but does not + convert characters from "reserved". In other words, it only converts + "unsafe" characters. */ +void +url_unescape_except_reserved (char *s) +{ + url_unescape_1 (s, urlchr_reserved); +} + /* The core of url_escape_* functions. Escapes the characters that match the provided mask in urlchr_table. -- 1.9.1