bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Bug-wget] [PATCH 1/3] Add Content-Encoding support to --adjust-exte


From: Tim Rühsen
Subject: Re: [Bug-wget] [PATCH 1/3] Add Content-Encoding support to --adjust-extension
Date: Sat, 29 Jul 2017 11:56:19 +0200
User-agent: KMail/5.2.3 (Linux/4.11.0-2-amd64; KDE/5.28.0; x86_64; ; )

Hey Tim, great work !

I would like to put it into an own branch until everything is complete, 
including the FSF assignment.

Please, could you make up your commit messages to GNU style. See 'git log' for 
examples.
And please send patches as attachments, use 'git format-patch'. So I can 
easily apply and review/test them.

We also need documentation in doc/wget.texi and consider writing some test 
code (see the python test suite in testenv/).

With Best Regards, Tim

On Freitag, 28. Juli 2017 18:45:21 CEST Tim Schlueter wrote:
> When -E or --adjust-extension are specified, and the remote web
> server responds with the content-encoding header set to gzip, deflate,
> compress, or br, wget will now add the .gz, .zlib, .Z, and .br extensions
> respectively.
> 
> This was inspired by Yuriy M. Kaminskiy's patch set:
> https://lists.gnu.org/archive/html/bug-wget/2014-12/msg00087.html
> ---
>  src/http.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 90 insertions(+)
> 
> diff --git a/src/http.c b/src/http.c
> index f5d9caf..a8c6e18 100644
> --- a/src/http.c
> +++ b/src/http.c
> @@ -1539,6 +1539,16 @@ persistent_available_p (const char *host, int port,
>    fd = -1;                                      \
>  } while (0)
> 
> +typedef enum
> +{
> +  ENC_INVALID = -1,             /* invalid encoding */
> +  ENC_NONE = 0,                 /* no special encoding */
> +  ENC_GZIP,                     /* gzip compression */
> +  ENC_DEFLATE,                  /* deflate compression */
> +  ENC_COMPRESS,                 /* compress compression */
> +  ENC_BROTLI                    /* brotli compression */
> +} encoding_t;
> +
>  struct http_stat
>  {
>    wgint len;                    /* received length */
> @@ -1569,6 +1579,9 @@ struct http_stat
>  #ifdef HAVE_METALINK
>    metalink_t *metalink;
>  #endif
> +
> +  encoding_t local_encoding;    /* the encoding of the local file */
> +
>    bool temporary;               /* downloading a temporary file */
>  };
> 
> @@ -3189,6 +3202,7 @@ gethttp (const struct url *u, struct url *
>    xfree (hs->remote_time);
>    hs->error = NULL;
>    hs->message = NULL;
> +  hs->local_encoding = ENC_NONE;
> 
>    conn = u;
> 
> @@ -3639,6 +3653,49 @@ gethttp (const struct url *u, struct url *
>          }
>      }
> 
> +  if (resp_header_copy (resp, "Content-Encoding", hdrval, sizeof (hdrval)))
> +    {
> +      hs->local_encoding = ENC_INVALID;
> +
> +      switch (hdrval[0])
> +        {
> +        case 'b': case 'B':
> +          if (0 == c_strcasecmp(hdrval, "br"))
> +            hs->local_encoding = ENC_BROTLI;
> +          break;
> +        case 'c': case 'C':
> +          if (0 == c_strcasecmp(hdrval, "compress"))
> +            hs->local_encoding = ENC_COMPRESS;
> +          break;
> +        case 'd': case 'D':
> +          if (0 == c_strcasecmp(hdrval, "deflate"))
> +            hs->local_encoding = ENC_DEFLATE;
> +          break;
> +        case 'g': case 'G':
> +          if (0 == c_strcasecmp(hdrval, "gzip"))
> +            hs->local_encoding = ENC_GZIP;
> +          break;
> +        case 'i': case 'I':
> +          if (0 == c_strcasecmp(hdrval, "identity"))
> +            hs->local_encoding = ENC_NONE;
> +          break;
> +        case 'x': case 'X':
> +          if (0 == c_strcasecmp(hdrval, "x-compress"))
> +            hs->local_encoding = ENC_COMPRESS;
> +          else if (0 == c_strcasecmp(hdrval, "x-gzip"))
> +            hs->local_encoding = ENC_GZIP;
> +          break;
> +        case '\0':
> +          hs->local_encoding = ENC_NONE;
> +        }
> +
> +      if (hs->local_encoding == ENC_INVALID)
> +        {
> +          DEBUGP (("Unrecognized Content-Encoding: %s\n", hdrval));
> +          hs->local_encoding = ENC_NONE;
> +        }
> +    }
> +
>    /* 20x responses are counted among successful by default.  */
>    if (H_20X (statcode))
>      *dt |= RETROKF;
> @@ -3767,6 +3824,35 @@ gethttp (const struct url *u, struct url *
> 
>    if (opt.adjust_extension)
>      {
> +      const char *encoding_ext = NULL;
> +      switch (hs->local_encoding)
> +        {
> +        case ENC_INVALID:
> +        case ENC_NONE:
> +          break;
> +        case ENC_BROTLI:
> +          encoding_ext = ".br";
> +          break;
> +        case ENC_COMPRESS:
> +          encoding_ext = ".Z";
> +          break;
> +        case ENC_DEFLATE:
> +          encoding_ext = ".zlib";
> +          break;
> +        case ENC_GZIP:
> +          encoding_ext = ".gz";
> +          break;
> +        default:
> +          DEBUGP (("No extension found for encoding %d\n",
> +                   hs->local_encoding));
> +      }
> +      if (encoding_ext != NULL)
> +        {
> +          char *file_ext = strrchr (hs->local_file, '.');
> +          /* strip Content-Encoding extension (it will be re-added
> later) */
> +          if (file_ext != NULL && 0 == strcasecmp (file_ext, encoding_ext))
> +            *file_ext = '\0';
> +        }
>        if (*dt & TEXTHTML)
>          /* -E / --adjust-extension / adjust_extension = on was specified,
>             and this is a text/html file.  If some case-insensitive
> @@ -3779,6 +3865,10 @@ gethttp (const struct url *u, struct url *
>          {
>            ensure_extension (hs, ".css", dt);
>          }
> +      if (encoding_ext != NULL)
> +        {
> +          ensure_extension (hs, encoding_ext, dt);
> +        }
>      }
> 
>    if (cond_get)

Attachment: signature.asc
Description: This is a digitally signed message part.


reply via email to

[Prev in Thread] Current Thread [Next in Thread]