>From 43ec7008f21195dfc33daacbbb760fa456fd77f5 Mon Sep 17 00:00:00 2001
From: Matthew White
Date: Wed, 17 Aug 2016 16:50:18 +0200
Subject: [PATCH 11/27] Enforce Metalink file name verification, strip
directory if necessary
* NEWS: Mention the use of a safe Metalink destination path
* src/metalink.h: Add declaration of functions get_metalink_basename(),
last_component(), metalink_check_safe_path()
* src/metalink.c: Add directive #include "dosname.h"
* src/metalink.c: Add function get_metalink_basename() to return the
basename of a file name, strip w32's drive letter prefixes
* src/metalink.c (retrieve_from_metalink): Enforce Metalink file name
verification, if the file name is unsafe try its basename
* doc/metalink.txt: Update document. Explain --directory-prefix
The function get_metalink_basename() uses FILE_SYSTEM_PREFIX_LEN to
catch any 'C:D:file' (w32 environment), then it removes each drive
letter prefix, i.e. 'C:' and 'D:'.
Unsafe file names contain an absolute, relative, or home path. Safe
paths can be verified by libmetalink's metalink_check_safe_path().
---
NEWS | 7 +++++++
doc/metalink.txt | 4 ++++
src/metalink.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
src/metalink.h | 4 ++++
4 files changed, 73 insertions(+), 4 deletions(-)
diff --git a/NEWS b/NEWS
index bfb3bef..72f8728 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,13 @@ Please send GNU Wget bug reports to .
* Changes in Wget X.Y.Z
+* When processing a Metalink file, enforce a safe destination path.
+ Remove any drive letter prefix under w32, i.e. 'C:D:file'. Call
+ libmetalink's metalink_check_safe_path() to prevent absolute,
+ relative, or home paths:
+ https://tools.ietf.org/html/rfc5854#section-4.1.2.1
+ https://tools.ietf.org/html/rfc5854#section-4.2.8.3
+
* When processing a Metalink file, --directory-prefix= sets
the top of the retrieval tree to prefix for Metalink downloads.
diff --git a/doc/metalink.txt b/doc/metalink.txt
index 94a07ba..9d9dea2 100644
--- a/doc/metalink.txt
+++ b/doc/metalink.txt
@@ -159,3 +159,7 @@ References:
Set the top of the retrieval tree to prefix for both Metalink/XML
and Metalink/HTTP downloads, see wget(1).
+
+ If combining the prefix with the file name results in an absolute,
+ relative, or home path, the directory components are stripped and
+ only the basename is used. See '4.1 Implemented safety features'.
diff --git a/src/metalink.c b/src/metalink.c
index 3e03aee..be723dc 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -40,6 +40,7 @@ as that of the covered work. */
#include "sha1.h"
#include "sha256.h"
#include "sha512.h"
+#include "dosname.h"
#include "xstrndup.h"
#include "c-strcase.h"
#include
@@ -87,6 +88,8 @@ retrieve_from_metalink (const metalink_t* metalink)
metalink_file_t *mfile = *mfile_ptr;
metalink_resource_t **mres_ptr;
char *filename = NULL;
+ char *basename = NULL;
+ char *safename = NULL;
char *destname = NULL;
bool hash_ok = false;
@@ -110,6 +113,23 @@ retrieve_from_metalink (const metalink_t* metalink)
DEBUGP (("Processing metalink file %s...\n", quote (mfile->name)));
+ /* Enforce libmetalink's metalink_check_safe_path(). */
+ basename = get_metalink_basename (filename);
+ safename = metalink_check_safe_path (filename) ? filename : basename;
+
+ if (filename != safename)
+ logprintf (LOG_NOTQUIET,
+ _("Unsafe metalink file %s. Stripping directory...\n"),
+ quote (filename));
+
+ if (!basename)
+ {
+ logprintf (LOG_NOTQUIET,
+ _("Rejecting metalink file. Invalid basename.\n"));
+ xfree (filename);
+ continue;
+ }
+
/* Resources are sorted by priority. */
for (mres_ptr = mfile->resources; *mres_ptr && !skip_mfile; mres_ptr++)
{
@@ -170,6 +190,12 @@ retrieve_from_metalink (const metalink_t* metalink)
/* Avoid recursive Metalink from HTTP headers. */
bool _metalink_http = opt.metalink_over_http;
+ /* FIXME: could be useless. */
+ if (strcmp (url->file, basename))
+ logprintf (LOG_VERBOSE,
+ _("URL file name %s and Metalink file name %s are different.\n"),
+ quote_n (0, url->file), quote_n (1, basename));
+
/* If output_stream is not NULL, then we have failed on
previous resource and are retrying. Thus, continue
with the next resource. Do not close output_stream
@@ -188,10 +214,10 @@ retrieve_from_metalink (const metalink_t* metalink)
after we are finished with the file. */
if (opt.always_rest)
/* continue previous download */
- output_stream = fopen (filename, "ab");
+ output_stream = fopen (safename, "ab");
else
/* create a file with an unique name */
- output_stream = unique_create (filename, true, &destname);
+ output_stream = unique_create (safename, true, &destname);
}
output_stream_regular = true;
@@ -212,7 +238,7 @@ retrieve_from_metalink (const metalink_t* metalink)
NULL, create the opt.output_document "path/file"
*/
if (!destname)
- destname = xstrdup (filename);
+ destname = xstrdup (safename);
/* Store the real file name for displaying in messages,
and for proper RFC5854 "path/file" handling. */
@@ -600,7 +626,7 @@ gpg_skip_verification:
if (retr_err != RETROK)
{
logprintf (LOG_VERBOSE, _("Failed to download %s. Skipping resource.\n"),
- quote (destname ? destname : filename));
+ quote (destname ? destname : safename));
}
else if (!hash_ok)
{
@@ -646,6 +672,34 @@ gpg_skip_verification:
return last_retr_err;
}
+/*
+ Strip the directory components from the given name.
+
+ Return a pointer to the end of the leading directory components.
+ Return NULL if the resulting name is unsafe or invalid.
+
+ Due to security issues posed by saving files with unsafe names, here
+ the use of libmetalink's metalink_check_safe_path() is enforced. If
+ this appears redundant because the given name was already verified,
+ just remember to never underestimate unsafe file names.
+*/
+char *
+get_metalink_basename (char *name)
+{
+ int n;
+ char *basename;
+
+ if (!name)
+ return NULL;
+
+ basename = last_component (name);
+
+ while ((n = FILE_SYSTEM_PREFIX_LEN (basename)) > 0)
+ basename += n;
+
+ return metalink_check_safe_path (basename) ? basename : NULL;
+}
+
/* Append the suffix ".badhash" to the file NAME, except without
overwriting an existing file with that name and suffix. */
void
diff --git a/src/metalink.h b/src/metalink.h
index 020fdf5..d052b70 100644
--- a/src/metalink.h
+++ b/src/metalink.h
@@ -47,6 +47,10 @@ uerr_t retrieve_from_metalink (const metalink_t *metalink);
int metalink_res_cmp (const void *res1, const void *res2);
+int metalink_check_safe_path (const char *path);
+
+char *last_component (char const *name);
+char *get_metalink_basename (char *name);
void badhash_suffix (char *name);
void badhash_or_remove (char *name);
--
2.7.3