From 79ea716e209a14f3cdb567883b60c529df621b62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20P=C3=BDcha?= Date: Fri, 14 Feb 2014 11:27:03 +0100 Subject: [PATCH] URL-decode the filename parameter of Content-Disposition HTTP header if it is encoded --- src/ChangeLog | 10 ++++++++++ src/cookies.c | 6 +++--- src/http.c | 33 ++++++++++++++++++++++++++------- src/http.h | 2 +- src/url.c | 2 +- src/url.h | 1 + 6 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index b7b6753..4144b4e 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,13 @@ +2014-02-14 Vladimír Pýcha + + * http.c (parse_content_disposition, extract_param) + (append_value_to_filename): URL-decode the filename parameter of + Content-Disposition HTTP header if it is encoded. This is related to + --content-disposition. + New parameter of extract_param(), "is_url_encoded". + All callers of extract_param() changed. + * url.c (url_unescape): Remove "static" modifier. + 2014-02-06 Giuseppe Scrivano * main.c (print_version): Move copyright year out of the localized diff --git a/src/cookies.c b/src/cookies.c index 4efda88..6ba7b5a 100644 --- a/src/cookies.c +++ b/src/cookies.c @@ -346,7 +346,7 @@ parse_set_cookie (const char *set_cookie, bool silent) struct cookie *cookie = cookie_new (); param_token name, value; - if (!extract_param (&ptr, &name, &value, ';')) + if (!extract_param (&ptr, &name, &value, ';', NULL)) goto error; if (!value.b) goto error; @@ -360,7 +360,7 @@ parse_set_cookie (const char *set_cookie, bool silent) cookie->attr = strdupdelim (name.b, name.e); cookie->value = strdupdelim (value.b, value.e); - while (extract_param (&ptr, &name, &value, ';')) + while (extract_param (&ptr, &name, &value, ';', NULL)) { if (TOKEN_IS (name, "domain")) { @@ -1377,7 +1377,7 @@ test_cookies (void) param_token name, value; const char *ptr = data; int j = 0; - while (extract_param (&ptr, &name, &value, ';')) + while (extract_param (&ptr, &name, &value, ';', NULL)) { char *n = strdupdelim (name.b, name.e); char *v = strdupdelim (value.b, value.e); diff --git a/src/http.c b/src/http.c index 5715df6..690fcde 100644 --- a/src/http.c +++ b/src/http.c @@ -1060,13 +1060,21 @@ modify_param_value (param_token *value, int encoding_type ) filename=\"foo bar\"", the first call to this function will return the token named "attachment" and no value, and the second call will return the token named "filename" and value "foo bar". The third - call will return false, indicating no more valid tokens. */ + call will return false, indicating no more valid tokens. + + is_url_encoded is an out parameter. If not NULL, a boolean value will be + stored into it, letting the caller know whether or not the extracted value is + URL-encoded. The caller can then decode it with url_unescape(), which however + performs decoding in-place. URL-encoding is used by RFC 2231 to support + non-US-ASCII characters in HTTP header values. */ bool extract_param (const char **source, param_token *name, param_token *value, - char separator) + char separator, bool *is_url_encoded) { const char *p = *source; + if (is_url_encoded) + *is_url_encoded = false; /* initializing the out parameter */ while (c_isspace (*p)) ++p; if (!*p) @@ -1125,6 +1133,8 @@ extract_param (const char **source, param_token *name, param_token *value, int param_type = modify_param_name(name); if (NOT_RFC2231 != param_type) { + if (RFC2231_ENCODING == param_type && is_url_encoded) + *is_url_encoded = true; modify_param_value(value, param_type); } return true; @@ -1137,13 +1147,16 @@ extract_param (const char **source, param_token *name, param_token *value, /* Appends the string represented by VALUE to FILENAME */ static void -append_value_to_filename (char **filename, param_token const * const value) +append_value_to_filename (char **filename, param_token const * const value, + bool is_url_encoded) { int original_length = strlen(*filename); int new_length = strlen(*filename) + (value->e - value->b); *filename = xrealloc (*filename, new_length+1); memcpy (*filename + original_length, value->b, (value->e - value->b)); (*filename)[new_length] = '\0'; + if (is_url_encoded) + url_unescape (*filename + original_length); } #undef MAX @@ -1176,7 +1189,9 @@ parse_content_disposition (const char *hdr, char **filename) { param_token name, value; *filename = NULL; - while (extract_param (&hdr, &name, &value, ';')) + bool is_url_encoded = false; + for ( ; extract_param (&hdr, &name, &value, ';', &is_url_encoded); + is_url_encoded = false) { int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" ); if ( isFilename && value.b != NULL) @@ -1192,9 +1207,13 @@ parse_content_disposition (const char *hdr, char **filename) continue; if (*filename) - append_value_to_filename (filename, &value); + append_value_to_filename (filename, &value, is_url_encoded); else - *filename = strdupdelim (value.b, value.e); + { + *filename = strdupdelim (value.b, value.e); + if (is_url_encoded) + url_unescape (*filename); + } } } @@ -3730,7 +3749,7 @@ digest_authentication_encode (const char *au, const char *user, realm = opaque = nonce = algorithm = qop = NULL; au += 6; /* skip over `Digest' */ - while (extract_param (&au, &name, &value, ',')) + while (extract_param (&au, &name, &value, ',', NULL)) { size_t i; size_t namelen = name.e - name.b; diff --git a/src/http.h b/src/http.h index 389456b..21f1ed5 100644 --- a/src/http.h +++ b/src/http.h @@ -43,7 +43,7 @@ typedef struct { /* A token consists of characters in the [b, e) range. */ const char *b, *e; } param_token; -bool extract_param (const char **, param_token *, param_token *, char); +bool extract_param (const char **, param_token *, param_token *, char, bool *); #endif /* HTTP_H */ diff --git a/src/url.c b/src/url.c index f554432..f32c726 100644 --- a/src/url.c +++ b/src/url.c @@ -169,7 +169,7 @@ static const unsigned char urlchr_table[256] = The transformation is done in place. If you need the original string intact, make a copy before calling this function. */ -static void +void url_unescape (char *s) { char *t = s; /* t - tortoise */ diff --git a/src/url.h b/src/url.h index cd3782b..6d18ed8 100644 --- a/src/url.h +++ b/src/url.h @@ -101,6 +101,7 @@ struct url char *url_escape (const char *); char *url_escape_unsafe_and_reserved (const char *); +void url_unescape (char *); struct url *url_parse (const char *, int *, struct iri *iri, bool percent_encode); char *url_error (const char *, int); -- 1.7.9.5