From c6a259bdc1de21b2487943d329f155ca4b14ff38 Mon Sep 17 00:00:00 2001 From: Sean Burford Date: Thu, 21 Jul 2016 14:15:49 +1000 Subject: [PATCH] [xattr] Keep fetched URLs in POSIX extended attributes These attributes provide a lightweight method of later determining where a file was downloaded from. This patch changes: * autoconf detects whether extended attributes are available and enables the code if they are. * The new flags --xattr and --no-xattr control whether xattr is enabled. * The new command "xattr = (on|off)" can be used in ~/.wgetrc or /etc/wgetrc * The original and redirected URLs are recorded as shown below. * This works for both single fetches and recursive mode. The attributes that are set are: user.xdg.origin.url: The URL that the content was fetched from. user.xdg.referrer.url: The URL that was originally requested. Here is an example, where http://archive.org redirects to https://archive.org: $ wget --xattr http://archive.org ... $ getfattr -d index.html user.xdg.origin.url="https://archive.org/" user.xdg.referrer.url="http://archive.org/" These attributes were chosen based on those stored by Google Chrome https://bugs.chromium.org/p/chromium/issues/detail?id=45903 and curl https://github.com/curl/curl/blob/master/src/tool_xattr.c --- configure.ac | 27 ++++++++++++++++++++++ src/Makefile.am | 8 +++++-- src/ftp.c | 10 ++++++++ src/http.c | 23 ++++++++++++++++--- src/init.c | 9 ++++++++ src/main.c | 7 ++++++ src/options.h | 2 ++ src/xattr.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/xattr.h | 45 ++++++++++++++++++++++++++++++++++++ 9 files changed, 197 insertions(+), 5 deletions(-) create mode 100644 src/xattr.c create mode 100644 src/xattr.h diff --git a/configure.ac b/configure.ac index eb303f0..cce20c6 100644 --- a/configure.ac +++ b/configure.ac @@ -771,11 +771,37 @@ AS_IF([test "X$with_cares" == "Xyes"],[ RESOLVER_INFO="libc, --bind-dns-address and --dns-servers not available" ]) +dnl +dnl Extended Attribute support +dnl + +AC_ARG_ENABLE([xattr], + [AS_HELP_STRING([--disable-xattr], [disable support for POSIX Extended Attributes])], + [ENABLE_XATTR=$enableval], + [ENABLE_XATTR=yes]) + +case "$host_os" in + *linux* | *darwin*) xattr_syscalls="fsetxattr" ;; + freebsd*) xattr_syscalls="extattr_set_fd" ;; + *) AC_MSG_NOTICE([Disabling Extended Attribute support: your system is not known to support extended attributes.]) + ENABLE_XATTR=no +esac + +if test "X${ENABLE_XATTR}" = "Xyes"; then + AC_CHECK_FUNCS([$xattr_syscalls], [], [ + AC_MSG_NOTICE([Disabling Extended Attribute support: your system does not support $xattr_syscalls]) + ENABLE_XATTR=no + ]) +fi + +test "X${ENABLE_XATTR}" = "Xyes" && AC_DEFINE([ENABLE_XATTR], 1, + [Define if you want file meta-data storing into POSIX Extended Attributes compiled in.]) dnl Needed by src/Makefile.am AM_CONDITIONAL([IRI_IS_ENABLED], [test "X$iri" != "Xno"]) AM_CONDITIONAL([WITH_SSL], [test "X$with_ssl" != "Xno"]) AM_CONDITIONAL([METALINK_IS_ENABLED], [test "X$with_metalink" != "Xno"]) +AM_CONDITIONAL([WITH_XATTR], [test "X$ENABLE_XATTR" != "Xno"]) dnl dnl Create output @@ -801,6 +827,7 @@ AC_MSG_NOTICE([Summary of build options: Digest: $ENABLE_DIGEST NTLM: $ENABLE_NTLM OPIE: $ENABLE_OPIE + POSIX xattr: $ENABLE_XATTR Debugging: $ENABLE_DEBUG Assertions: $ENABLE_ASSERTION Valgrind: $VALGRIND_INFO diff --git a/src/Makefile.am b/src/Makefile.am index ef0b37e..5311bba 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -39,6 +39,10 @@ if METALINK_IS_ENABLED METALINK_OBJ = metalink.c endif +if WITH_XATTR +XATTR_OBJ = xattr.c +endif + # The following line is losing on some versions of make! DEFS = @DEFS@ -DSYSTEM_WGETRC=\"$(sysconfdir)/wgetrc\" -DLOCALEDIR=\"$(localedir)\" @@ -49,14 +53,14 @@ wget_SOURCES = connect.c convert.c cookies.c ftp.c \ css_.c css-url.c \ ftp-basic.c ftp-ls.c hash.c host.c hsts.c html-parse.c html-url.c \ http.c init.c log.c main.c netrc.c progress.c ptimer.c \ - recur.c res.c retr.c spider.c url.c warc.c \ + recur.c res.c retr.c spider.c url.c warc.c $(XATTR_OBJ) \ utils.c exits.c build_info.c $(IRI_OBJ) $(METALINK_OBJ) \ css-url.h css-tokens.h connect.h convert.h cookies.h \ ftp.h hash.h host.h hsts.h html-parse.h html-url.h \ http.h http-ntlm.h init.h log.h mswindows.h netrc.h \ options.h progress.h ptimer.h recur.h res.h retr.h \ spider.h ssl.h sysdep.h url.h warc.h utils.h wget.h iri.h \ - exits.h version.h metalink.h + exits.h version.h metalink.h xattr.h nodist_wget_SOURCES = version.c EXTRA_wget_SOURCES = iri.c LDADD = $(LIBOBJS) ../lib/libgnu.a $(GETADDRINFO_LIB) $(HOSTENT_LIB) $(INET_NTOP_LIB) $(LIBSOCKET)\ diff --git a/src/ftp.c b/src/ftp.c index 88a9777..27d90d6 100644 --- a/src/ftp.c +++ b/src/ftp.c @@ -52,6 +52,9 @@ as that of the covered work. */ #include "recur.h" /* for INFINITE_RECURSION */ #include "warc.h" #include "c-strcase.h" +#ifdef ENABLE_XATTR +#include "xattr.h" +#endif #ifdef __VMS # include "vms.h" @@ -1546,6 +1549,13 @@ Error in server response, closing control connection.\n")); tmrate = retr_rate (rd_size, con->dltime); total_download_time += con->dltime; +#ifdef ENABLE_XATTR + if (opt.enable_xattr) + { + set_file_metadata (u->url, NULL, fp); + } +#endif + fd_close (local_sock); /* Close the local file. */ if (!output_stream || con->cmd & DO_LIST) diff --git a/src/http.c b/src/http.c index 7e60a07..0cd142c 100644 --- a/src/http.c +++ b/src/http.c @@ -66,6 +66,9 @@ as that of the covered work. */ # include "metalink.h" # include "xstrndup.h" #endif +#ifdef ENABLE_XATTR +#include "xattr.h" +#endif #ifdef TESTING #include "test.h" @@ -2892,8 +2895,8 @@ fail: If PROXY is non-NULL, the connection will be made to the proxy server, and u->url will be requested. */ static uerr_t -gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, - struct iri *iri, int count) +gethttp (struct url *u, struct url *original_url, struct http_stat *hs, + int *dt, struct url *proxy, struct iri *iri, int count) { struct request *req = NULL; @@ -3754,6 +3757,20 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, goto cleanup; } +#ifdef ENABLE_XATTR + if (opt.enable_xattr) + { + if (original_url != u) + { + set_file_metadata (u->url, original_url->url, fp); + } + else + { + set_file_metadata (u->url, NULL, fp); + } + } +#endif + err = read_response_body (hs, sock, fp, contlen, contrange, chunked_transfer_encoding, u->url, warc_timestamp_str, @@ -3972,7 +3989,7 @@ http_loop (struct url *u, struct url *original_url, char **newloc, *dt &= ~SEND_NOCACHE; /* Try fetching the document, or at least its head. */ - err = gethttp (u, &hstat, dt, proxy, iri, count); + err = gethttp (u, original_url, &hstat, dt, proxy, iri, count); /* Time? */ tms = datetime_str (time (NULL)); diff --git a/src/init.c b/src/init.c index d043d83..06d2e44 100644 --- a/src/init.c +++ b/src/init.c @@ -339,6 +339,9 @@ static const struct { #ifdef USE_WATT32 { "wdebug", &opt.wdebug, cmd_boolean }, #endif +#ifdef ENABLE_XATTR + { "xattr", &opt.enable_xattr, cmd_boolean }, +#endif }; /* Look up CMDNAME in the commands[] and return its position in the @@ -482,6 +485,12 @@ defaults (void) /* HSTS is enabled by default */ opt.hsts = true; #endif + +#ifdef ENABLE_XATTR + opt.enable_xattr = true; +#else + opt.enable_xattr = false; +#endif } /* Return the user's home directory (strdup-ed), or NULL if none is diff --git a/src/main.c b/src/main.c index a5617da..4d69e03 100644 --- a/src/main.c +++ b/src/main.c @@ -436,6 +436,9 @@ static struct cmdline_option option_data[] = #ifdef USE_WATT32 { "wdebug", 0, OPT_BOOLEAN, "wdebug", -1 }, #endif +#ifdef ENABLE_XATTR + { "xattr", 0, OPT_BOOLEAN, "xattr", -1 }, +#endif }; #undef IF_SSL @@ -704,6 +707,10 @@ Download:\n"), N_("\ --preferred-location preferred location for Metalink resources\n"), #endif +#ifdef ENABLE_XATTR + N_("\ + --no-xattr turn off storage of metadata in extended file attributes\n"), +#endif "\n", N_("\ diff --git a/src/options.h b/src/options.h index 85d2de1..b2e31a8 100644 --- a/src/options.h +++ b/src/options.h @@ -127,6 +127,8 @@ struct options bool warc_keep_log; /* Store the log file in a WARC record. */ char **warc_user_headers; /* User-defined WARC header(s). */ + bool enable_xattr; /* Store metadata in POSIX extended attributes. */ + char *user; /* Generic username */ char *passwd; /* Generic password */ bool ask_passwd; /* Ask for password? */ diff --git a/src/xattr.c b/src/xattr.c new file mode 100644 index 0000000..360b032 --- /dev/null +++ b/src/xattr.c @@ -0,0 +1,71 @@ +/* xattr.h -- POSIX Extended Attribute support. + + Copyright (C) 2016 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +#include "wget.h" + +#include +#include + +#include "log.h" +#include "xattr.h" + +#ifdef USE_XATTR + +static int +write_xattr_metadata (const char *name, const char *value, FILE *fp) { + int retval = -1; + if (name && value && fp) + { + retval = fsetxattr (fileno(fp), name, value, strlen(value), 0); + /* FreeBSD's extattr_set_fd returns the length of the extended attribute. */ + retval = (retval < 0)? retval : 0; + } + return retval; +} + +#else /* USE_XATTR */ + +static int +write_xattr_metadata (const char *name, const char *value, FILE *fp) { + (void)name; + (void)value; + (void)fp; + + return 0; +} + +#endif /* USE_XATTR */ + +int +set_file_metadata (const char *origin_url, const char *referrer_url, FILE *fp) { + /* Save metadata about where the file came from (requested, final URLs) to + * user POSIX Extended Attributes of retrieved file. + * + * For more details about the user namespace see + * [http://freedesktop.org/wiki/CommonExtendedAttributes] and + * [http://0pointer.de/lennart/projects/mod_mime_xattr/]. + */ + int retval = -1; + if (!origin_url || !fp) + return retval; + retval = write_xattr_metadata ("user.xdg.origin.url", escnonprint_uri (origin_url), fp); + if ((!retval) && referrer_url) + { + retval = write_xattr_metadata ("user.xdg.referrer.url", escnonprint_uri (referrer_url), fp); + } + return retval; +} diff --git a/src/xattr.h b/src/xattr.h new file mode 100644 index 0000000..375d34f --- /dev/null +++ b/src/xattr.h @@ -0,0 +1,45 @@ +/* xattr.h -- POSIX Extended Attribute function mappings. + + Copyright (C) 2016 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +#include + +#ifndef _XATTR_H +#define _XATTR_H + +/* Store metadata name/value attributes against fp. */ +int set_file_metadata (const char *origin_url, const char *referrer_url, FILE *fp); + +#if defined(__linux) +/* libc on Linux has fsetxattr (5 arguments). */ +# include +# define USE_XATTR +#elif defined(__APPLE__) +/* libc on OS/X has fsetxattr (6 arguments). */ +# include +# define fsetxattr(file, name, buffer, size, flags) \ + fsetxattr((file), (name), (buffer), (size), 0, (flags)) +# define USE_XATTR +#elif defined(__FreeBSD_version) && (__FreeBSD_version > 500000) +/* FreeBSD */ +# include +# include +# define fsetxattr(file, name, buffer, size, flags) \ + extattr_set_fd((file), EXTATTR_NAMESPACE_USER, (name), (buffer), (size)) +# define USE_XATTR +#endif + +#endif /* _XATTR_H */ -- 2.8.0.rc3.226.g39d4020