bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] [PATCH] Disable automatic wget headers.


From: adham elkarn
Subject: [Bug-wget] [PATCH] Disable automatic wget headers.
Date: Sat, 27 Apr 2019 16:34:43 +0000

From: sulfastor <address@hidden>

Hello,
We've worked on this features (bug #54769 
(https://savannah.gnu.org/bugs/?54769)) taking in account the comments made by 
Darshit some time ago. We would like you to review our changes and code. We 
thank you for your helping us improving our programming skills.

* src/http.c: removed disabled headers before its creation
* src/init.c: added new functions to check user disabled headers, disable 
headers
* src/main.c: added new option disable-header, added help description
* src/options.h: added new option disable-header

From bug #54769 (https://savannah.gnu.org/bugs/?54769).
Some servers doesn't handle well some headers. A --disable-header option will 
ensure a request header
will not be included on the request. In addition a empty header value in 
--header="headername: " will also disable
the request header.

Signed-off-by: sulfastor <address@hidden>, adham elkarn <address@hidden>
---
 src/http.c    | 137 ++++++++++++++++++++++++++++++++++----------------
 src/init.c    |  43 ++++++++++++++++
 src/main.c    |   4 ++
 src/options.h |   1 +
 4 files changed, 142 insertions(+), 43 deletions(-)

diff --git a/src/http.c b/src/http.c
index 289d1101..79cd2168 100644
--- a/src/http.c
+++ b/src/http.c
@@ -88,6 +88,7 @@ static char *basic_authentication_encode (const char *, const 
char *);
 static bool known_authentication_scheme_p (const char *, const char *);
 static void ensure_extension (struct http_stat *, const char *, int *);
 static void load_cookies (void);
+static bool disabled_header(char*);
 
 static bool cookies_loaded_p;
 static struct cookie_jar *wget_cookie_jar;
@@ -152,6 +153,8 @@ struct request {
   int hcount, hcapacity;
 };
 
+/* Forward decls. */
+static bool request_remove_header (struct request*, const char*);
 
 /* Create a new, empty request. Set the request's method and its
    arguments.  METHOD should be a literal string (or it should outlive
@@ -245,6 +248,13 @@ request_set_header (struct request *req, const char *name, 
const char *value,
       return;
     }
 
+  /* A empty value is a disabled header; so remove it from the request */
+  if (!*value)
+    {
+      request_remove_header(req, name);
+      return;
+    }
+
   for (i = 0; i < req->hcount; i++)
     {
       hdr = &req->headers[i];
@@ -436,7 +446,7 @@ maybe_send_basic_creds (const char *hostname, const char 
*user,
       DEBUGP (("Host %s has not issued a general basic challenge.\n",
               quote (hostname)));
     }
-  if (do_challenge)
+  if (!disabled_header("Authorization") && do_challenge)
     {
       request_set_header (req, "Authorization",
                           basic_authentication_encode (user, passwd),
@@ -1770,23 +1780,29 @@ read_response_body (struct http_stat *hs, int sock, 
FILE *fp, wgint contlen,
 
 #ifdef __VMS
 #define SET_USER_AGENT(req) do {                                         \
-  if (!opt.useragent)                                                    \
-    request_set_header (req, "User-Agent",                               \
-                        aprintf ("Wget/%s (VMS %s %s)",                  \
-                        version_string, vms_arch(), vms_vers()),         \
-                        rel_value);                                      \
-  else if (*opt.useragent)                                               \
-    request_set_header (req, "User-Agent", opt.useragent, rel_none);     \
+  if(!disabled_header("User-Agent"))                                    \
+    {                                                                    \
+      if (!opt.useragent)                                               \
+       request_set_header (req, "User-Agent",                           \
+                           aprintf ("Wget/%s (VMS %s %s)",              \
+                           version_string, vms_arch(), vms_vers()),     \
+                           rel_value);                                  \
+      else if (*opt.useragent)                                           \
+       request_set_header (req, "User-Agent", opt.useragent, rel_none); \
+    }                                                                   \
 } while (0)
 #else /* def __VMS */
 #define SET_USER_AGENT(req) do {                                         \
-  if (!opt.useragent)                                                    \
-    request_set_header (req, "User-Agent",                               \
-                        aprintf ("Wget/%s (%s)",                         \
-                        version_string, OS_TYPE),                        \
-                        rel_value);                                      \
-  else if (*opt.useragent)                                               \
-    request_set_header (req, "User-Agent", opt.useragent, rel_none);     \
+  if(!disabled_header("User-Agent"))                                     \
+    {                                                                    \
+      if (!opt.useragent)                                                \
+       request_set_header (req, "User-Agent",                           \
+                           aprintf ("Wget/%s (%s)",                     \
+                           version_string, OS_TYPE),                    \
+                           rel_value);                                  \
+      else if (*opt.useragent)                                           \
+       request_set_header (req, "User-Agent", opt.useragent, rel_none); \
+    }                                                                    \
 } while (0)
 #endif /* def __VMS [else] */
 
@@ -1842,6 +1858,24 @@ time_to_rfc1123 (time_t time, char *buf, size_t bufsize)
   return RETROK;
 }
 
+static bool
+disabled_header (char* header_name)
+{
+  char** p = opt.disabled_headers;  
+  char *s;
+  
+  if(!p)
+    return 0;
+
+  for(;*p != NULL;++p) {
+    s = strchrnul(header_name, ':');
+    if (!strncmp(header_name, *p, (size_t)(s - header_name)))
+      return 1;
+  }
+  
+  return 0;
+}
+
 static struct request *
 initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct 
url *proxy,
                     bool inhibit_keep_alive, bool *basic_auth_finished,
@@ -1874,15 +1908,17 @@ initialize_request (const struct url *u, struct 
http_stat *hs, int *dt, struct u
       meth_arg = url_full_path (u);
     req = request_new (meth, meth_arg);
   }
-
-  request_set_header (req, "Referer", (char *) hs->referer, rel_none);
+  if(!disabled_header("Referer"))
+    request_set_header (req, "Referer", (char *) hs->referer, rel_none);
   if (*dt & SEND_NOCACHE)
     {
       /* Cache-Control MUST be obeyed by all HTTP/1.1 caching mechanisms...  */
-      request_set_header (req, "Cache-Control", "no-cache", rel_none);
+      if(!disabled_header("Cache-Control"))
+       request_set_header (req, "Cache-Control", "no-cache", rel_none);
 
       /* ... but some HTTP/1.0 caches doesn't implement Cache-Control.  */
-      request_set_header (req, "Pragma", "no-cache", rel_none);
+      if(!disabled_header("Pragma"))
+       request_set_header (req, "Pragma", "no-cache", rel_none);
     }
   if (*dt & IF_MODIFIED_SINCE)
     {
@@ -1896,21 +1932,27 @@ initialize_request (const struct url *u, struct 
http_stat *hs, int *dt, struct u
                                   "time.\n"));
           strcpy (strtime, "Thu, 01 Jan 1970 00:00:00 GMT");
         }
-      request_set_header (req, "If-Modified-Since", xstrdup (strtime), 
rel_value);
+      if(!disabled_header("If-Modified-Since"))
+       request_set_header (req, "If-Modified-Since", xstrdup (strtime), 
rel_value);
     }
-  if (hs->restval)
+  if (!disabled_header("Range") && hs->restval)
     request_set_header (req, "Range",
                         aprintf ("bytes=%s-",
                                  number_to_static_string (hs->restval)),
                         rel_value);
+
   SET_USER_AGENT (req);
-  request_set_header (req, "Accept", "*/*", rel_none);
+
+  if(!disabled_header("Accept"))    
+    request_set_header (req, "Accept", "*/*", rel_none);
+  
 #ifdef HAVE_LIBZ
-  if (opt.compression != compression_none)
+  if (!disabled_header("Accept-Encoding") && opt.compression != 
compression_none)
     request_set_header (req, "Accept-Encoding", "gzip", rel_none);
   else
 #endif
-    request_set_header (req, "Accept-Encoding", "identity", rel_none);
+    if(!disabled_header("Accept-Encoding"))
+      request_set_header (req, "Accept-Encoding", "identity", rel_none);
 
   /* Find the username with priority */
   if (u->user)
@@ -1966,17 +2008,19 @@ initialize_request (const struct url *u, struct 
http_stat *hs, int *dt, struct u
     };
     int add_port = u->port != scheme_default_port (u->scheme);
     int add_squares = strchr (u->host, ':') != NULL;
-    request_set_header (req, "Host",
-                        aprintf (hfmt[add_port][add_squares], u->host, 
u->port),
-                        rel_value);
+    if(!disabled_header("Host"))
+      request_set_header (req, "Host",
+                         aprintf (hfmt[add_port][add_squares], u->host, 
u->port),
+                         rel_value);
   }
 
-  if (inhibit_keep_alive)
+  if (!disabled_header("Connection") && inhibit_keep_alive)
     request_set_header (req, "Connection", "Close", rel_none);
   else
     {
-      request_set_header (req, "Connection", "Keep-Alive", rel_none);
-      if (proxy)
+      if (!disabled_header("Connection"))
+       request_set_header (req, "Connection", "Keep-Alive", rel_none);
+      if (!disabled_header("Proxy-Connection") && proxy)
         request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none);
     }
 
@@ -1985,8 +2029,9 @@ initialize_request (const struct url *u, struct http_stat 
*hs, int *dt, struct u
 
       if (opt.body_data || opt.body_file)
         {
-          request_set_header (req, "Content-Type",
-                              "application/x-www-form-urlencoded", rel_none);
+         if (!disabled_header("Content-Type"))
+           request_set_header (req, "Content-Type",
+                               "application/x-www-form-urlencoded", rel_none);
 
           if (opt.body_data)
             *body_data_size = strlen (opt.body_data);
@@ -2002,11 +2047,13 @@ initialize_request (const struct url *u, struct 
http_stat *hs, int *dt, struct u
                   return NULL;
                 }
             }
-          request_set_header (req, "Content-Length",
-                              xstrdup (number_to_static_string 
(*body_data_size)),
-                              rel_value);
+         if (!disabled_header("Content-Length"))
+           request_set_header (req, "Content-Length",
+                               xstrdup (number_to_static_string 
(*body_data_size)),
+                               rel_value);
         }
-      else if (c_strcasecmp (opt.method, "post") == 0
+      else if (!disabled_header("Content-Length")
+              && c_strcasecmp (opt.method, "post") == 0
                || c_strcasecmp (opt.method, "put") == 0
                || c_strcasecmp (opt.method, "patch") == 0)
         request_set_header (req, "Content-Length", "0", rel_none);
@@ -2043,7 +2090,8 @@ initialize_proxy_configuration (const struct url *u, 
struct request *req,
 #ifdef HAVE_SSL
   if (u->scheme != SCHEME_HTTPS)
 #endif
-    request_set_header (req, "Proxy-Authorization", *proxyauth, rel_value);
+    if (!disabled_header("Proxy-Authorization"))
+      request_set_header (req, "Proxy-Authorization", *proxyauth, rel_value);
 }
 
 static uerr_t
@@ -2133,8 +2181,9 @@ establish_connection (const struct url *u, const struct 
url **conn_ref,
              CONNECT method to request passthrough.  */
           struct request *connreq = request_new ("CONNECT",
                               aprintf ("%s:%d", u->host, u->port));
+
           SET_USER_AGENT (connreq);
-          if (proxyauth)
+          if (!disabled_header("Proxy-Authorization") && proxyauth)
             {
               request_set_header (connreq, "Proxy-Authorization",
                                   *proxyauth, rel_value);
@@ -2143,9 +2192,10 @@ establish_connection (const struct url *u, const struct 
url **conn_ref,
                  the regular request below.  */
               *proxyauth = NULL;
             }
-          request_set_header (connreq, "Host",
-                              aprintf ("%s:%d", u->host, u->port),
-                              rel_value);
+         if (!disabled_header("Host"))
+           request_set_header (connreq, "Host",
+                               aprintf ("%s:%d", u->host, u->port),
+                               rel_value);
 
           write_error = request_send (connreq, sock, 0);
           request_free (&connreq);
@@ -2456,7 +2506,7 @@ check_auth (const struct url *u, char *user, char 
*passwd, struct response *resp
           auth_err = *auth_stat;
           xfree (auth_stat);
           xfree (pth);
-          if (auth_err == RETROK)
+          if (!disabled_header("Authorisation") && auth_err == RETROK)
             {
               request_set_header (req, "Authorization", value, rel_value);
 
@@ -3264,7 +3314,8 @@ gethttp (const struct url *u, struct url *original_url, 
struct http_stat *hs,
     {
       int i;
       for (i = 0; opt.user_headers[i]; i++)
-        request_set_user_header (req, opt.user_headers[i]);
+       if (!disabled_header(opt.user_headers[i]))
+         request_set_user_header (req, opt.user_headers[i]);
     }
 
   proxyauth = NULL;
diff --git a/src/init.c b/src/init.c
index 9b6665a6..a2b6f311 100644
--- a/src/init.c
+++ b/src/init.c
@@ -101,6 +101,7 @@ CMD_DECLARE (cmd_spec_compression);
 #endif
 CMD_DECLARE (cmd_spec_dirstruct);
 CMD_DECLARE (cmd_spec_header);
+CMD_DECLARE (cmd_dis_header);
 CMD_DECLARE (cmd_spec_warc_header);
 CMD_DECLARE (cmd_spec_htmlify);
 CMD_DECLARE (cmd_spec_mirror);
@@ -183,6 +184,7 @@ static const struct {
   { "deleteafter",      &opt.delete_after,      cmd_boolean },
   { "dirprefix",        &opt.dir_prefix,        cmd_directory },
   { "dirstruct",        NULL,                   cmd_spec_dirstruct },
+  { "disableheader",    NULL,                   cmd_dis_header},
   { "dnscache",         &opt.dns_cache,         cmd_boolean },
 #ifdef HAVE_LIBCARES
   { "dnsservers",       &opt.dns_servers,       cmd_string },
@@ -398,6 +400,7 @@ defaults (void)
   opt.metalink_index = -1;
 #endif
 
+  opt.disabled_headers = NULL;
   opt.cookies = true;
   opt.verbose = -1;
   opt.ntry = 20;
@@ -1459,6 +1462,7 @@ cmd_cert_type (const char *com, const char *val, void 
*place)
    options specially.  */
 
 static bool check_user_specified_header (const char *);
+static bool check_user_disabled_header (const char *);
 
 #ifdef HAVE_LIBZ
 static bool
@@ -1493,6 +1497,27 @@ cmd_spec_dirstruct (const char *com, const char *val, 
void *place_ignored _GL_UN
   return true;
 }
 
+static bool
+cmd_dis_header (const char *com, const char *val, void *place_ignored 
_GL_UNUSED)
+{
+  /* Empty value means reset the list of headers. */
+  if (*val == '\0')
+    {
+      free_vec (opt.disabled_headers);
+      opt.disabled_headers = NULL;
+      return true;
+    }
+
+  if (!check_user_disabled_header (val))
+    {
+      fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
+               exec_name, com, quote (val));
+      return false;
+    }
+  opt.disabled_headers = vec_append (opt.disabled_headers, val);
+  return true;
+}
+
 static bool
 cmd_spec_header (const char *com, const char *val, void *place_ignored 
_GL_UNUSED)
 {
@@ -1850,6 +1875,23 @@ simple_atof (const char *beg, const char *end, double 
*dest)
    contain a colon preceded by non-white-space characters and must not
    contain newlines.  */
 
+
+static bool
+check_user_disabled_header (const char* s)
+{
+  const char *p;
+
+  for (p = s; *p && *p != '\0' && !c_isspace (*p); p++)
+    ;
+
+  if (p == s)
+    return false;
+  /* The header MUST NOT contain newlines.  */
+  if (strchr (s, '\n'))
+    return false;
+  return true;
+}
+
 static bool
 check_user_specified_header (const char *s)
 {
@@ -1977,6 +2019,7 @@ cleanup (void)
   xfree (opt.http_passwd);
   xfree (opt.dot_style);
   free_vec (opt.user_headers);
+  free_vec (opt.disabled_headers);
   free_vec (opt.warc_user_headers);
 # ifdef HAVE_SSL
   xfree (opt.cert_file);
diff --git a/src/main.c b/src/main.c
index 65b7f3f3..a73b39d9 100644
--- a/src/main.c
+++ b/src/main.c
@@ -304,6 +304,7 @@ static struct cmdline_option option_data[] =
     { "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 },
     { "directories", 0, OPT_BOOLEAN, "dirstruct", -1 },
     { "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 },
+    { "disable-header", 0, OPT_VALUE, "disableheader", -1 },
     { "dns-cache", 0, OPT_BOOLEAN, "dnscache", -1 },
 #ifdef HAVE_LIBCARES
     { "dns-servers", 0, OPT_VALUE, "dnsservers", -1 },
@@ -544,6 +545,7 @@ init_switches (void)
              identical to "--foo", except it has opposite meaning and
              it doesn't allow an argument.  */
           longopt = &long_options[o++];
+
           longopt->name = no_prefix (cmdopt->long_name);
           longopt->has_arg = no_argument;
           /* Mask the value so we'll be able to recognize that we're
@@ -792,6 +794,8 @@ HTTP options:\n"),
        --ignore-length             ignore 'Content-Length' header field\n"),
     N_("\
        --header=STRING             insert STRING among the headers\n"),
+    N_("\
+       --disable-header=STRING     disable STRING among the headers\n"),
 #ifdef HAVE_LIBZ
     N_("\
        --compression=TYPE          choose compression, one of auto, gzip and 
none. (default: none)\n"),
diff --git a/src/options.h b/src/options.h
index 881e2b2e..5559c6fa 100644
--- a/src/options.h
+++ b/src/options.h
@@ -147,6 +147,7 @@ struct options
   char *http_user;              /* HTTP username. */
   char *http_passwd;            /* HTTP password. */
   char **user_headers;          /* User-defined header(s). */
+  char **disabled_headers;       /* User-disabled header(s) */
   bool http_keep_alive;         /* whether we use keep-alive */
 
   bool use_proxy;               /* Do we use proxy? */
-- 
2.21.0


reply via email to

[Prev in Thread] Current Thread [Next in Thread]