bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] [PATCH] Add support for --retry-on-http-error


From: Tom Szilagyi
Subject: [Bug-wget] [PATCH] Add support for --retry-on-http-error
Date: Mon, 6 Feb 2017 20:42:29 +0100
User-agent: Mutt/1.5.23 (2014-03-12)

Consider given HTTP response codes as non-fatal, transient errors.
Supply a comma-separated list of 3-digit HTTP response codes as
argument. Useful to work around special circumstances where retries
are required, but the server responds with an error code normally not
retried by Wget. Such errors might be 503 (Service Unavailable) and
429 (Too Many Requests). Retries enabled by this option are performed
subject to the normal retry timing and retry count limitations of
Wget.

Using this option is intended to support special use cases only and is
generally not recommended, as it can force retries even in cases where
the server is actually trying to decrease its load. Please use it
wisely and only if you know what you are doing.

Example use and a starting point for manual testing:
  wget --retry-on-http-error=429,503 http://httpbin.org/status/503
---
 doc/wget.texi | 15 +++++++++++++++
 src/http.c    | 29 +++++++++++++++++++++++++++++
 src/init.c    |  1 +
 src/main.c    |  1 +
 src/options.h |  1 +
 5 files changed, 47 insertions(+)

diff --git a/doc/wget.texi b/doc/wget.texi
index 8e57aaa..00a8d4b 100644
--- a/doc/wget.texi
+++ b/doc/wget.texi
@@ -1718,6 +1718,21 @@ some few obscure servers, which never send HTTP 
authentication
 challenges, but accept unsolicited auth info, say, in addition to
 form-based authentication.
 
address@hidden address@hidden,code,...]}
+Consider given HTTP response codes as non-fatal, transient errors.
+Supply a comma-separated list of 3-digit HTTP response codes as
+argument. Useful to work around special circumstances where retries
+are required, but the server responds with an error code normally not
+retried by Wget. Such errors might be 503 (Service Unavailable) and
+429 (Too Many Requests). Retries enabled by this option are performed
+subject to the normal retry timing and retry count limitations of
+Wget.
+
+Using this option is intended to support special use cases only and is
+generally not recommended, as it can force retries even in cases where
+the server is actually trying to decrease its load. Please use it
+wisely and only if you know what you are doing.
+
 @end table
 
 @node HTTPS (SSL/TLS) Options, FTP Options, HTTP Options, Invoking
diff --git a/src/http.c b/src/http.c
index 3c3c8b2..6822bee 100644
--- a/src/http.c
+++ b/src/http.c
@@ -3982,6 +3982,30 @@ gethttp (const struct url *u, struct url *original_url, 
struct http_stat *hs,
   return retval;
 }
 
+/* Check whether the supplied HTTP status code is among those
+   listed for the --retry-on-http-error option. */
+static bool
+check_retry_on_http_error (const int statcode)
+{
+  if (!opt.retry_on_http_error)
+    return false;
+
+  bool ret = false;
+  char * retry_conf = strdup(opt.retry_on_http_error);
+  char * tok = strtok(retry_conf, ",");
+  while (tok)
+    {
+      if (atoi(tok) == statcode)
+        {
+          ret = true;
+          break;
+        }
+      tok = strtok(NULL, ",");
+    }
+  free(retry_conf);
+  return ret;
+}
+
 /* The genuine HTTP loop!  This is the part where the retrieval is
    retried, and retried, and retried, and...  */
 uerr_t
@@ -4319,6 +4343,11 @@ http_loop (const struct url *u, struct url 
*original_url, char **newloc,
               logprintf (LOG_NOTQUIET, _("\
 Remote file does not exist -- broken link!!!\n"));
             }
+          else if (check_retry_on_http_error(hstat.statcode))
+            {
+              printwhat (count, opt.ntry);
+              continue;
+            }
           else
             {
               logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
diff --git a/src/init.c b/src/init.c
index 623ef9d..01dcb06 100644
--- a/src/init.c
+++ b/src/init.c
@@ -304,6 +304,7 @@ static const struct {
   { "restrictfilenames", NULL,                  cmd_spec_restrict_file_names },
   { "retrsymlinks",     &opt.retr_symlinks,     cmd_boolean },
   { "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
+  { "retryonhttperror", &opt.retry_on_http_error, cmd_string },
   { "robots",           &opt.use_robots,        cmd_boolean },
   { "savecookies",      &opt.cookies_output,    cmd_file },
   { "saveheaders",      &opt.save_headers,      cmd_boolean },
diff --git a/src/main.c b/src/main.c
index e393597..581a33d 100644
--- a/src/main.c
+++ b/src/main.c
@@ -404,6 +404,7 @@ static struct cmdline_option option_data[] =
     { "restrict-file-names", 0, OPT_BOOLEAN, "restrictfilenames", -1 },
     { "retr-symlinks", 0, OPT_BOOLEAN, "retrsymlinks", -1 },
     { "retry-connrefused", 0, OPT_BOOLEAN, "retryconnrefused", -1 },
+    { "retry-on-http-error", 0, OPT_VALUE, "retryonhttperror", -1 },
     { "save-cookies", 0, OPT_VALUE, "savecookies", -1 },
     { "save-headers", 0, OPT_BOOLEAN, "saveheaders", -1 },
     { IF_SSL ("secure-protocol"), 0, OPT_VALUE, "secureprotocol", -1 },
diff --git a/src/options.h b/src/options.h
index 8a818ca..3972945 100644
--- a/src/options.h
+++ b/src/options.h
@@ -43,6 +43,7 @@ struct options
   bool quiet;                   /* Are we quiet? */
   int ntry;                     /* Number of tries per URL */
   bool retry_connrefused;       /* Treat CONNREFUSED as non-fatal. */
+  char *retry_on_http_error;    /* Treat given HTTP errors as non-fatal. */
   bool background;              /* Whether we should work in background. */
   bool ignore_length;           /* Do we heed content-length at all?  */
   bool recursive;               /* Are we recursive? */
-- 
2.1.4

Attachment: signature.asc
Description: Digital signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]