bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] [PATCH] Disable automatic wget headers.


From: adham elkarn
Subject: [Bug-wget] [PATCH] Disable automatic wget headers.
Date: Sat, 4 May 2019 15:30:54 +0000

From: sulfastor <address@hidden>

Hi, Thank you Darshit for your feedback and code review.
Those are the updates:

        * doc/wget.texi: Added --disable-header documentation.
        * fuzz/wget_options_fuzzer.dict: Update with --disable-header inputs.
        * src/http.c (disabled_header): Checks for disabled headers
        (request_set_header): Doesn't let header to be set if disabled
        (gethttp): frees disabled header to let overriding
        * src/init.c (cmd_dis_header), (check_user_disabled_header), 
(vec_remove_header): added new option disabled_headers.
        * src/main.c: added new option --disable-header, added help description
        * src/options.h: added new option --disable-header
        * testenv/Makefile.am: Added new test files
        * testenv/server/http/http_server.py: Added new rule RejectHeaderField
        * testenv/conf/reject_header_field.py: Added new rule RejectHeaderField
        * testenv/README: Added help description for new rule
        * testenv/Test-disable-default-headers.py: Test without using --header
        * testenv/Test-disable-headers-after.py: Test using --header before 
--disable-header
        * testenv/Test-disable-headers-before.py: Test using --header after 
--disable-header

Signed-off-by: sulfastor <address@hidden>, adham elkarn <address@hidden>
---
 doc/wget.texi                           | 19 ++++++
 fuzz/wget_options_fuzzer.dict           | 17 +++++
 src/http.c                              | 31 +++++++++
 src/init.c                              | 88 +++++++++++++++++++++++++
 src/main.c                              |  4 ++
 src/options.h                           |  1 +
 testenv/Makefile.am                     |  3 +
 testenv/README                          |  4 ++
 testenv/Test-disable-default-headers.py | 73 ++++++++++++++++++++
 testenv/Test-disable-headers-after.py   | 77 ++++++++++++++++++++++
 testenv/Test-disable-headers-before.py  | 77 ++++++++++++++++++++++
 testenv/conf/reject_header_field.py     | 12 ++++
 testenv/server/http/http_server.py      |  8 +++
 13 files changed, 414 insertions(+)
 create mode 100644 testenv/Test-disable-default-headers.py
 create mode 100644 testenv/Test-disable-headers-after.py
 create mode 100644 testenv/Test-disable-headers-before.py
 create mode 100644 testenv/conf/reject_header_field.py

diff --git a/doc/wget.texi b/doc/wget.texi
index 7eada2dd..a43224a3 100644
--- a/doc/wget.texi
+++ b/doc/wget.texi
@@ -1542,6 +1542,25 @@ wget --header="Host: foo.bar" http://localhost/
 In versions of Wget prior to 1.10 such use of @samp{--header} caused
 sending of duplicate headers.
 
address@hidden disable header, choose
address@hidden address@hidden
+Remove @var{header-field} among the headers in each @sc{http} request.
+
+You may define more than one additional header field by specifying
address@hidden more than once as in @samp{--header}.
+
address@hidden
address@hidden
+wget --disable-header='Accept'          \
+     --disable-header='User-Agent'      \
+     --disable-header='Accept-Encoding' \
+        https://example.com/
address@hidden group
address@hidden example
+
+Specifying a header field with @samp{--header} after disabling it
+will override it and include it in the @sc{http} request headers.
+
 @cindex Content-Encoding, choose
 @item address@hidden
 Choose the type of compression to be used.  Legal values are
diff --git a/fuzz/wget_options_fuzzer.dict b/fuzz/wget_options_fuzzer.dict
index 9a2dbd8e..12d54d60 100644
--- a/fuzz/wget_options_fuzzer.dict
+++ b/fuzz/wget_options_fuzzer.dict
@@ -30,6 +30,22 @@
 "human"
 "csv"
 "json"
+"Authorization"
+"User-Agent"
+"Referer"
+"Cache-Control"
+"Pragma"
+"If-Modified-Since"
+"Range"
+"Accept"
+"Accept-Encoding"
+"Host"
+"Connection"
+"Proxy-Connection"
+"Content-Type"
+"Content-Length"
+"Proxy-Authorization"
+"Cookie"
 "accept="
 "accept-regex="
 "adjust-extension="
@@ -66,6 +82,7 @@
 "delete-after="
 "directories="
 "directory-prefix="
+"disable-header="
 "dns-caching="
 "dns-timeout="
 "domains="
diff --git a/src/http.c b/src/http.c
index 289d1101..225be265 100644
--- a/src/http.c
+++ b/src/http.c
@@ -88,6 +88,7 @@ static char *basic_authentication_encode (const char *, const 
char *);
 static bool known_authentication_scheme_p (const char *, const char *);
 static void ensure_extension (struct http_stat *, const char *, int *);
 static void load_cookies (void);
+static bool disabled_header (char*);
 
 static bool cookies_loaded_p;
 static struct cookie_jar *wget_cookie_jar;
@@ -236,6 +237,9 @@ request_set_header (struct request *req, const char *name, 
const char *value,
   struct request_header *hdr;
   int i;
 
+  if (disabled_header ((char*) name))
+    return;
+
   if (!value)
     {
       /* A NULL value is a no-op; if freeing the name is requested,
@@ -1842,6 +1846,27 @@ time_to_rfc1123 (time_t time, char *buf, size_t bufsize)
   return RETROK;
 }
 
+static bool
+disabled_header (char* header_name)
+{
+  char** p = opt.disabled_headers;
+  char *s;
+  size_t n;
+
+  if (!p)
+    return 0;
+
+  for (; *p; ++p)
+    {
+      s = strchrnul (header_name, ':');
+      n = (size_t) (s - header_name);
+      if (n == strlen (*p) && 0 == strncmp (header_name, *p, n))
+       return 1;
+    }
+
+  return 0;
+}
+
 static struct request *
 initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct 
url *proxy,
                     bool inhibit_keep_alive, bool *basic_auth_finished,
@@ -3263,6 +3288,11 @@ gethttp (const struct url *u, struct url *original_url, 
struct http_stat *hs,
   if (opt.user_headers)
     {
       int i;
+      /* Empty the disabled headers as they are no longer used
+        and this will let headers to be overriden by the user */
+      free_vec (opt.disabled_headers);
+      opt.disabled_headers = NULL;
+
       for (i = 0; opt.user_headers[i]; i++)
         request_set_user_header (req, opt.user_headers[i]);
     }
@@ -5274,6 +5304,7 @@ ensure_extension (struct http_stat *hs, const char *ext, 
int *dt)
   shortext[0] = '\0';
   len = strlen (ext);
   if (len == 5)
+
     {
       memcpy (shortext, ext, len - 1);
       shortext[len - 1] = '\0';
diff --git a/src/init.c b/src/init.c
index 9b6665a6..aaf99ba6 100644
--- a/src/init.c
+++ b/src/init.c
@@ -101,6 +101,7 @@ CMD_DECLARE (cmd_spec_compression);
 #endif
 CMD_DECLARE (cmd_spec_dirstruct);
 CMD_DECLARE (cmd_spec_header);
+CMD_DECLARE (cmd_dis_header);
 CMD_DECLARE (cmd_spec_warc_header);
 CMD_DECLARE (cmd_spec_htmlify);
 CMD_DECLARE (cmd_spec_mirror);
@@ -183,6 +184,7 @@ static const struct {
   { "deleteafter",      &opt.delete_after,      cmd_boolean },
   { "dirprefix",        &opt.dir_prefix,        cmd_directory },
   { "dirstruct",        NULL,                   cmd_spec_dirstruct },
+  { "disableheader",    NULL,                   cmd_dis_header},
   { "dnscache",         &opt.dns_cache,         cmd_boolean },
 #ifdef HAVE_LIBCARES
   { "dnsservers",       &opt.dns_servers,       cmd_string },
@@ -398,6 +400,7 @@ defaults (void)
   opt.metalink_index = -1;
 #endif
 
+  opt.disabled_headers = NULL;
   opt.cookies = true;
   opt.verbose = -1;
   opt.ntry = 20;
@@ -990,6 +993,7 @@ struct decode_item {
 static bool decode_string (const char *, const struct decode_item *, int, int 
*);
 static bool simple_atof (const char *, const char *, double *);
 
+
 #define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
 
 #define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0)        \
@@ -1459,6 +1463,8 @@ cmd_cert_type (const char *com, const char *val, void 
*place)
    options specially.  */
 
 static bool check_user_specified_header (const char *);
+static bool check_user_disabled_header (const char *);
+static char ** vec_remove_header (char **, const char *);
 
 #ifdef HAVE_LIBZ
 static bool
@@ -1493,6 +1499,29 @@ cmd_spec_dirstruct (const char *com, const char *val, 
void *place_ignored _GL_UN
   return true;
 }
 
+static bool
+cmd_dis_header (const char *com, const char *val, void *place_ignored 
_GL_UNUSED)
+{
+  /* Empty value means reset the list of headers. */
+  if (*val == '\0')
+    {
+      free_vec (opt.disabled_headers);
+      opt.disabled_headers = NULL;
+      return true;
+    }
+
+  if (!check_user_disabled_header (val))
+    {
+      fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
+               exec_name, com, quote (val));
+      return false;
+    }
+  /* Removes disabled headers from user defined headers */
+  opt.user_headers = vec_remove_header (opt.user_headers, val);
+  opt.disabled_headers = vec_append (opt.disabled_headers, val);
+  return true;
+}
+
 static bool
 cmd_spec_header (const char *com, const char *val, void *place_ignored 
_GL_UNUSED)
 {
@@ -1850,6 +1879,23 @@ simple_atof (const char *beg, const char *end, double 
*dest)
    contain a colon preceded by non-white-space characters and must not
    contain newlines.  */
 
+
+static bool
+check_user_disabled_header (const char* s)
+{
+  const char *p;
+
+  for (p = s; *p && !c_isspace (*p); p++)
+    ;
+
+  if (p == s)
+    return false;
+  /* The header MUST NOT contain newlines.  */
+  if (strchr (s, '\n'))
+    return false;
+  return true;
+}
+
 static bool
 check_user_specified_header (const char *s)
 {
@@ -1867,6 +1913,47 @@ check_user_specified_header (const char *s)
   return true;
 }
 
+/* Removes a header from a request headers vector */
+
+static char **
+vec_remove_header (char **vec, const char *str)
+{
+  char* s;
+  int i, cnt;                   /* count of vector elements */
+  size_t n;
+
+  if (vec != NULL)
+    {
+      for (cnt = 0; vec[cnt]; cnt++)
+       ;
+      /* remove all duplicates */
+      i = 0;
+      while (vec[i])
+       {
+         s = strchrnul (vec[i], ':');
+         n = (size_t) (s - vec[i]);
+         if (n == strlen (str) && 0 == strncmp (vec[i], str, n))
+           {
+             if (cnt == 1)
+               {
+                 vec[i] = NULL;
+                 return vec;
+               }
+             else
+               {
+                 vec[i] = xstrdup (vec[cnt - 1]);
+                 vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
+                 vec[cnt] = NULL;
+                 --cnt;
+               }
+           }
+         else
+           ++i;
+       }
+    }
+  return vec;
+}
+
 /* Decode VAL into a number, according to ITEMS. */
 
 static bool
@@ -1977,6 +2064,7 @@ cleanup (void)
   xfree (opt.http_passwd);
   xfree (opt.dot_style);
   free_vec (opt.user_headers);
+  free_vec (opt.disabled_headers);
   free_vec (opt.warc_user_headers);
 # ifdef HAVE_SSL
   xfree (opt.cert_file);
diff --git a/src/main.c b/src/main.c
index 65b7f3f3..a73b39d9 100644
--- a/src/main.c
+++ b/src/main.c
@@ -304,6 +304,7 @@ static struct cmdline_option option_data[] =
     { "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 },
     { "directories", 0, OPT_BOOLEAN, "dirstruct", -1 },
     { "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 },
+    { "disable-header", 0, OPT_VALUE, "disableheader", -1 },
     { "dns-cache", 0, OPT_BOOLEAN, "dnscache", -1 },
 #ifdef HAVE_LIBCARES
     { "dns-servers", 0, OPT_VALUE, "dnsservers", -1 },
@@ -544,6 +545,7 @@ init_switches (void)
              identical to "--foo", except it has opposite meaning and
              it doesn't allow an argument.  */
           longopt = &long_options[o++];
+
           longopt->name = no_prefix (cmdopt->long_name);
           longopt->has_arg = no_argument;
           /* Mask the value so we'll be able to recognize that we're
@@ -792,6 +794,8 @@ HTTP options:\n"),
        --ignore-length             ignore 'Content-Length' header field\n"),
     N_("\
        --header=STRING             insert STRING among the headers\n"),
+    N_("\
+       --disable-header=STRING     disable STRING among the headers\n"),
 #ifdef HAVE_LIBZ
     N_("\
        --compression=TYPE          choose compression, one of auto, gzip and 
none. (default: none)\n"),
diff --git a/src/options.h b/src/options.h
index 881e2b2e..5559c6fa 100644
--- a/src/options.h
+++ b/src/options.h
@@ -147,6 +147,7 @@ struct options
   char *http_user;              /* HTTP username. */
   char *http_passwd;            /* HTTP password. */
   char **user_headers;          /* User-defined header(s). */
+  char **disabled_headers;       /* User-disabled header(s) */
   bool http_keep_alive;         /* whether we use keep-alive */
 
   bool use_proxy;               /* Do we use proxy? */
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index b5a39ad2..4b3e2d08 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -95,6 +95,9 @@ if HAVE_PYTHON3
     Test-cookie-domain-mismatch.py                  \
     Test-cookie-expires.py                          \
     Test-cookie.py                                  \
+    Test-disable-default-headers.py                 \
+    Test-disable-headers-after.py                   \
+    Test-disable-headers-before.py                  \
     Test-Head.py                                    \
     Test-hsts.py                                    \
     Test--https.py                                  \
diff --git a/testenv/README b/testenv/README
index 6580bc99..d2f38a67 100644
--- a/testenv/README
+++ b/testenv/README
@@ -182,6 +182,10 @@ This section lists the currently supported File Rules and 
their structure.
     * RejectHeader  : This list of Headers must NEVER occur in a request. It
     uses the same value format as ExpectHeader.
 
+    * RejectHeaderField  : This list of Headers Fields must NOT appear in a 
request.
+    The value for this key is a list of strings where each header field is 
represented as:
+    |-->Header Field: <Header Field Name>
+
     * SendHeader    : This list of Headers will be sent in EVERY response to a
     request for the respective file. It follows the same value format as
     ExpectHeader. Additionally you can specify a list of strings as <Header 
Data>
diff --git a/testenv/Test-disable-default-headers.py 
b/testenv/Test-disable-default-headers.py
new file mode 100644
index 00000000..22ea54ad
--- /dev/null
+++ b/testenv/Test-disable-default-headers.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from test.base_test import HTTP, HTTPS
+from misc.wget_file import WgetFile
+
+"""
+    This is test ensures that the --disable-header option removes default 
request
+    headers. There aren't any user defined header.
+"""
+############# File Definitions ###############################################
+file_content = """Les paroles de la bouche d'un homme sont des eaux profondes;
+La source de la sagesse est un torrent qui jaillit."""
+
+Headers = {
+    'Authorization',
+    'User-Agent',
+    'Referer',
+    'Cache-Control',
+    'Pragma',
+    'If-Modified-Since',
+    'Range',
+    'Accept',
+    'Accept-Encoding',
+    'Host',
+    'Connection',
+    'Proxy-Connection',
+    'Content-Type',
+    'Content-Length',
+    'Proxy-Authorization',
+    'Cookie',
+    'MyHeader',
+}
+
+WGET_OPTIONS = ''
+WGET_URLS = [[]]
+Files = [[]]
+
+for index, header in enumerate(Headers, start=1):
+    File_rules = {
+        "RejectHeaderField"    : {
+            header
+        }
+    }
+    file_name = "File" + str(index)
+    Files[0].append (WgetFile(file_name, file_content, rules=File_rules))
+    WGET_OPTIONS += ' --disable-header="' + header + '"'
+    WGET_URLS[0].append (file_name)
+
+Servers = [HTTP]
+
+ExpectedReturnCode = 0
+
+################ Pre and Post Test Hooks #####################################
+pre_test = {
+    "ServerFiles"       : Files
+}
+test_options = {
+    "WgetCommands"      : WGET_OPTIONS,
+    "Urls"              : WGET_URLS
+}
+post_test = {
+    "ExpectedRetcode"   : ExpectedReturnCode
+}
+
+err = HTTPTest (
+                pre_hook=pre_test,
+                test_params=test_options,
+                post_hook=post_test,
+                protocols=Servers
+).begin ()
+
+exit (err)
diff --git a/testenv/Test-disable-headers-after.py 
b/testenv/Test-disable-headers-after.py
new file mode 100644
index 00000000..c0ffc84d
--- /dev/null
+++ b/testenv/Test-disable-headers-after.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from test.base_test import HTTP, HTTPS
+from misc.wget_file import WgetFile
+
+"""
+    This is test ensures that the --disable-header option removes user headers
+    from the HTTP request when it's placed after --header="header: value".
+"""
+############# File Definitions ###############################################
+file_content = """Les paroles de la bouche d'un homme sont des eaux profondes;
+La source de la sagesse est un torrent qui jaillit."""
+
+Headers = {
+    'Authorization',
+    'User-Agent',
+    'Referer',
+    'Cache-Control',
+    'Pragma',
+    'If-Modified-Since',
+    'Range',
+    'Accept',
+    'Accept-Encoding',
+    'Host',
+    'Connection',
+    'Proxy-Connection',
+    'Content-Type',
+    'Content-Length',
+    'Proxy-Authorization',
+    'Cookie',
+    'MyHeader',
+}
+
+WGET_OPTIONS = ''
+WGET_URLS = [[]]
+Files = [[]]
+
+# Define user defined headers
+for header in Headers:
+    WGET_OPTIONS += ' --header="' + header + ': any"'
+
+for index, header in enumerate(Headers, start=1):
+    File_rules = {
+        "RejectHeader"    : {
+            header : 'any'
+        }
+    }
+    file_name = "File" + str(index)
+    Files[0].append(WgetFile(file_name, file_content, rules=File_rules))
+    WGET_OPTIONS += ' --disable-header="' + header + '"'
+    WGET_URLS[0].append(file_name)
+
+Servers = [HTTP]
+
+ExpectedReturnCode = 0
+
+################ Pre and Post Test Hooks #####################################
+pre_test = {
+    "ServerFiles"       : Files
+}
+test_options = {
+    "WgetCommands"      : WGET_OPTIONS,
+    "Urls"              : WGET_URLS
+}
+post_test = {
+    "ExpectedRetcode"   : ExpectedReturnCode
+}
+
+err = HTTPTest (
+                pre_hook=pre_test,
+                test_params=test_options,
+                post_hook=post_test,
+                protocols=Servers
+).begin ()
+
+exit (err)
diff --git a/testenv/Test-disable-headers-before.py 
b/testenv/Test-disable-headers-before.py
new file mode 100644
index 00000000..d442b008
--- /dev/null
+++ b/testenv/Test-disable-headers-before.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from test.base_test import HTTP, HTTPS
+from misc.wget_file import WgetFile
+
+"""
+    This is test ensures that the --disable-header option doesn't remove user 
headers
+    from the HTTP request when it's placed before --header="header: value".
+"""
+############# File Definitions ###############################################
+file_content = """Les paroles de la bouche d'un homme sont des eaux profondes;
+La source de la sagesse est un torrent qui jaillit."""
+
+Headers = {
+    'Authorization',
+    'User-Agent',
+    'Referer',
+    'Cache-Control',
+    'Pragma',
+    'If-Modified-Since',
+    'Range',
+    'Accept',
+    'Accept-Encoding',
+    'Host',
+    'Connection',
+    'Proxy-Connection',
+    'Content-Type',
+    'Content-Length',
+    'Proxy-Authorization',
+    'Cookie',
+    'MyHeader',
+}
+
+WGET_OPTIONS = ''
+WGET_URLS = [[]]
+Files = [[]]
+
+for index, header in enumerate(Headers, start=1):
+    File_rules = {
+        "ExpectHeader"    : {
+            header : 'any'
+        }
+    }
+    file_name = "File" + str(index)
+    Files[0].append (WgetFile(file_name, file_content, rules=File_rules))
+    WGET_OPTIONS += ' --disable-header="' + header + '"'
+    WGET_URLS[0].append (file_name)
+
+# Define user defined headers
+for header in Headers:
+    WGET_OPTIONS += ' --header="' + header + ': any"'
+
+Servers = [HTTP]
+
+ExpectedReturnCode = 0
+
+################ Pre and Post Test Hooks #####################################
+pre_test = {
+    "ServerFiles"       : Files
+}
+test_options = {
+    "WgetCommands"      : WGET_OPTIONS,
+    "Urls"              : WGET_URLS
+}
+post_test = {
+    "ExpectedRetcode"   : ExpectedReturnCode
+}
+
+err = HTTPTest (
+                pre_hook=pre_test,
+                test_params=test_options,
+                post_hook=post_test,
+                protocols=Servers
+).begin ()
+
+exit (err)
diff --git a/testenv/conf/reject_header_field.py 
b/testenv/conf/reject_header_field.py
new file mode 100644
index 00000000..e1009cdd
--- /dev/null
+++ b/testenv/conf/reject_header_field.py
@@ -0,0 +1,12 @@
+from conf import rule
+
+""" Rule: RejectHeaderField
+This is a server side rule which expects a string list of Header Fields
+which should be blacklisted by the server for a particular file's requests.
+"""
+
+
address@hidden()
+class RejectHeaderField:
+    def __init__(self, header_fields):
+        self.header_fields = header_fields
diff --git a/testenv/server/http/http_server.py 
b/testenv/server/http/http_server.py
index 2cc82fb9..6f358335 100644
--- a/testenv/server/http/http_server.py
+++ b/testenv/server/http/http_server.py
@@ -370,6 +370,14 @@ class _Handler(BaseHTTPRequestHandler):
                                 header_line)
                 raise ServerError("Header " + header_line + ' received')
 
+    def RejectHeaderField(self, header_fields_obj):
+        rej_header_fields = header_fields_obj.header_fields
+        for field in rej_header_fields:
+            if field in self.headers:
+                self.send_error(400, 'Blacklisted Header Field %s received' %
+                                field)
+                raise ServerError('Header Field %s received' % field)
+
     def __log_request(self, method):
         req = method + " " + self.path
         self.server.request_headers.append(req)
-- 
2.21.0


reply via email to

[Prev in Thread] Current Thread [Next in Thread]