bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] [PATCH 1/3] Add Content-Encoding support to --adjust-extensio


From: Tim Schlueter
Subject: [Bug-wget] [PATCH 1/3] Add Content-Encoding support to --adjust-extension
Date: Fri, 28 Jul 2017 18:45:21 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.2.1

When -E or --adjust-extension are specified, and the remote web
server responds with the content-encoding header set to gzip, deflate,
compress, or br, wget will now add the .gz, .zlib, .Z, and .br extensions
respectively.

This was inspired by Yuriy M. Kaminskiy's patch set:
https://lists.gnu.org/archive/html/bug-wget/2014-12/msg00087.html
---
 src/http.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/src/http.c b/src/http.c
index f5d9caf..a8c6e18 100644
--- a/src/http.c
+++ b/src/http.c
@@ -1539,6 +1539,16 @@ persistent_available_p (const char *host, int port,
   fd = -1;                                      \
 } while (0)

+typedef enum
+{
+  ENC_INVALID = -1,             /* invalid encoding */
+  ENC_NONE = 0,                 /* no special encoding */
+  ENC_GZIP,                     /* gzip compression */
+  ENC_DEFLATE,                  /* deflate compression */
+  ENC_COMPRESS,                 /* compress compression */
+  ENC_BROTLI                    /* brotli compression */
+} encoding_t;
+
 struct http_stat
 {
   wgint len;                    /* received length */
@@ -1569,6 +1579,9 @@ struct http_stat
 #ifdef HAVE_METALINK
   metalink_t *metalink;
 #endif
+
+  encoding_t local_encoding;    /* the encoding of the local file */
+
   bool temporary;               /* downloading a temporary file */
 };

@@ -3189,6 +3202,7 @@ gethttp (const struct url *u, struct url *
   xfree (hs->remote_time);
   hs->error = NULL;
   hs->message = NULL;
+  hs->local_encoding = ENC_NONE;

   conn = u;

@@ -3639,6 +3653,49 @@ gethttp (const struct url *u, struct url *
         }
     }

+  if (resp_header_copy (resp, "Content-Encoding", hdrval, sizeof (hdrval)))
+    {
+      hs->local_encoding = ENC_INVALID;
+
+      switch (hdrval[0])
+        {
+        case 'b': case 'B':
+          if (0 == c_strcasecmp(hdrval, "br"))
+            hs->local_encoding = ENC_BROTLI;
+          break;
+        case 'c': case 'C':
+          if (0 == c_strcasecmp(hdrval, "compress"))
+            hs->local_encoding = ENC_COMPRESS;
+          break;
+        case 'd': case 'D':
+          if (0 == c_strcasecmp(hdrval, "deflate"))
+            hs->local_encoding = ENC_DEFLATE;
+          break;
+        case 'g': case 'G':
+          if (0 == c_strcasecmp(hdrval, "gzip"))
+            hs->local_encoding = ENC_GZIP;
+          break;
+        case 'i': case 'I':
+          if (0 == c_strcasecmp(hdrval, "identity"))
+            hs->local_encoding = ENC_NONE;
+          break;
+        case 'x': case 'X':
+          if (0 == c_strcasecmp(hdrval, "x-compress"))
+            hs->local_encoding = ENC_COMPRESS;
+          else if (0 == c_strcasecmp(hdrval, "x-gzip"))
+            hs->local_encoding = ENC_GZIP;
+          break;
+        case '\0':
+          hs->local_encoding = ENC_NONE;
+        }
+
+      if (hs->local_encoding == ENC_INVALID)
+        {
+          DEBUGP (("Unrecognized Content-Encoding: %s\n", hdrval));
+          hs->local_encoding = ENC_NONE;
+        }
+    }
+
   /* 20x responses are counted among successful by default.  */
   if (H_20X (statcode))
     *dt |= RETROKF;
@@ -3767,6 +3824,35 @@ gethttp (const struct url *u, struct url *

   if (opt.adjust_extension)
     {
+      const char *encoding_ext = NULL;
+      switch (hs->local_encoding)
+        {
+        case ENC_INVALID:
+        case ENC_NONE:
+          break;
+        case ENC_BROTLI:
+          encoding_ext = ".br";
+          break;
+        case ENC_COMPRESS:
+          encoding_ext = ".Z";
+          break;
+        case ENC_DEFLATE:
+          encoding_ext = ".zlib";
+          break;
+        case ENC_GZIP:
+          encoding_ext = ".gz";
+          break;
+        default:
+          DEBUGP (("No extension found for encoding %d\n",
+                   hs->local_encoding));
+      }
+      if (encoding_ext != NULL)
+        {
+          char *file_ext = strrchr (hs->local_file, '.');
+          /* strip Content-Encoding extension (it will be re-added
later) */
+          if (file_ext != NULL && 0 == strcasecmp (file_ext, encoding_ext))
+            *file_ext = '\0';
+        }
       if (*dt & TEXTHTML)
         /* -E / --adjust-extension / adjust_extension = on was specified,
            and this is a text/html file.  If some case-insensitive
@@ -3779,6 +3865,10 @@ gethttp (const struct url *u, struct url *
         {
           ensure_extension (hs, ".css", dt);
         }
+      if (encoding_ext != NULL)
+        {
+          ensure_extension (hs, encoding_ext, dt);
+        }
     }

   if (cond_get)
-- 

Attachment: signature.asc
Description: OpenPGP digital signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]