bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] Re: [PATCH] Fixed problem under content-disposition filename


From: Giuseppe Scrivano
Subject: [Bug-wget] Re: [PATCH] Fixed problem under content-disposition filename and recursive downloading
Date: Fri, 24 Sep 2010 14:00:32 +0200
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/23.2 (gnu/linux)

"Dennis, CHENG Renquan" <address@hidden> writes:

> On Thu, Sep 16, 2010 at 4:55 PM, Giuseppe Scrivano <address@hidden> wrote:
>> Hi Cheng,
>>
>> the development version is hosted on the bazaar repository.
>>
>> If you have problems accessing the bazaar repository then you can try
>> with this alpha version, rebasing your patch on it:
>>
>>  ftp://alpha.gnu.org/gnu/wget/wget-1.12-2416.tar.bz2
>
> This attached "wget-content-disposition-filename-recursive.patch
> (text/x-patch) 8K" is rebased on above wget-1.12-2416 snapshot;

Thanks!

I am going to push it.  I have done some small changes.  I have attached
the final version I will commit.

Giuseppe



=== modified file 'src/ChangeLog'
--- src/ChangeLog       2010-09-14 10:14:34 +0000
+++ src/ChangeLog       2010-09-24 10:48:40 +0000
@@ -1,3 +1,16 @@
+2010-09-24  Dennis, CHENG Renquan <address@hidden>
+
+       Fix problem when content-disposition is used with recursive downloading.
+       * url.h (url_file_name): Add a new argument `replaced_filename'.
+       * url.c (url_file_name): Likewise.
+       * http.c (parse_content_disposition): Do not add a prefix to the return
+       value.
+       (test_parse_content_disposition): Adjust tests.
+       (gethttp): Pass additional parameter to `url_file_name'.
+       (http_loop): Likewise.
+       * ftp.c (ftp_loop_internal, ftp_get_listing, ftp_retrieve_list)
+       (ftp_loop): Likewise.
+
 2010-09-14  Giuseppe Scrivano  <address@hidden>
 
        * convert.c (local_quote_string): Accept new parameter `no_html_quote'.

=== modified file 'src/ftp.c'
--- src/ftp.c   2010-07-28 19:22:22 +0000
+++ src/ftp.c   2010-09-24 09:21:44 +0000
@@ -1382,7 +1382,7 @@
   else
     {
       /* URL-derived file.  Consider "-O file" name. */
-      con->target = url_file_name (u);
+      con->target = url_file_name (u, NULL);
       if (!opt.output_document)
         locf = con->target;
       else
@@ -1496,7 +1496,7 @@
             {
               /* Re-determine the file name. */
               xfree_null (con->target);
-              con->target = url_file_name (u);
+              con->target = url_file_name (u, NULL);
               locf = con->target;
             }
           continue;
@@ -1624,7 +1624,7 @@
   /* Find the listing file name.  We do it by taking the file name of
      the URL and replacing the last component with the listing file
      name.  */
-  uf = url_file_name (u);
+  uf = url_file_name (u, NULL);
   lf = file_merge (uf, LIST_FILENAME);
   xfree (uf);
   DEBUGP ((_("Using %s as listing tmp file.\n"), quote (lf)));
@@ -1718,7 +1718,7 @@
       ofile = xstrdup (u->file);
       url_set_file (u, f->name);
 
-      con->target = url_file_name (u);
+      con->target = url_file_name (u, NULL);
       err = RETROK;
 
       dlthis = true;
@@ -2168,7 +2168,7 @@
               char *filename = (opt.output_document
                                 ? xstrdup (opt.output_document)
                                 : (con.target ? xstrdup (con.target)
-                                   : url_file_name (u)));
+                                   : url_file_name (u, NULL)));
               res = ftp_index (filename, u, f);
               if (res == FTPOK && opt.verbose)
                 {

=== modified file 'src/http.c'
--- src/http.c  2010-08-09 10:56:49 +0000
+++ src/http.c  2010-09-24 10:48:06 +0000
@@ -1149,71 +1149,44 @@
    false.
 
    The file name is stripped of directory components and must not be
-   empty.  */
+   empty.
+
+   Historically, this function returned filename prefixed with opt.dir_prefix,
+   now that logic is handled by the caller, new code should pay attention,
+   changed by crq, Sep 2010.
+
+*/
 static bool
 parse_content_disposition (const char *hdr, char **filename)
 {
+  param_token name, value;
   *filename = NULL;
-  param_token name, value;
   while (extract_param (&hdr, &name, &value, ';'))
     {
       int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" );
       if ( isFilename && value.b != NULL)
-       {
-         /* Make the file name begin at the last slash or backslash. */
-         const char *last_slash = memrchr (value.b, '/', value.e - value.b);
-         const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
-         if (last_slash && last_bs)
-           value.b = 1 + MAX (last_slash, last_bs);
-         else if (last_slash || last_bs)
-           value.b = 1 + (last_slash ? last_slash : last_bs);
-         if (value.b == value.e)
-           continue;
-         /* Start with the directory prefix, if specified. */
-         if (opt.dir_prefix)
-           {
-             if (!(*filename))
-               {
-                 int prefix_length = strlen (opt.dir_prefix);
-                 bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
-                 int total_length;
-                 
-                 if (add_slash) 
-                   ++prefix_length;
-                 total_length = prefix_length + (value.e - value.b);           
 
-                 *filename = xmalloc (total_length + 1);
-                 strcpy (*filename, opt.dir_prefix);
-                 if (add_slash) 
-                   (*filename)[prefix_length - 1] = '/';
-                 memcpy (*filename + prefix_length, value.b, (value.e - 
value.b));
-                 (*filename)[total_length] = '\0';
-               }
-             else
-               {
-                 append_value_to_filename (filename, &value);
-               }  
-           }
-         else
-           {
-             if (*filename)
-               {
-                 append_value_to_filename (filename, &value);
-               }
-             else
-               {
-                 *filename = strdupdelim (value.b, value.e);
-               }
-           }
-       }
+        {
+          /* Make the file name begin at the last slash or backslash. */
+          const char *last_slash = memrchr (value.b, '/', value.e - value.b);
+          const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
+          if (last_slash && last_bs)
+            value.b = 1 + MAX (last_slash, last_bs);
+          else if (last_slash || last_bs)
+            value.b = 1 + (last_slash ? last_slash : last_bs);
+          if (value.b == value.e)
+            continue;
+
+          if (*filename)
+            append_value_to_filename (filename, &value);
+          else
+            *filename = strdupdelim (value.b, value.e);
+        }
     }
+
   if (*filename)
-    {
-      return true;
-    }
+    return true;
   else
-    {
-      return false;
-    }
+    return false;
 }
 

@@ -2163,15 +2136,23 @@
    * hstat.local_file is set by http_loop to the argument of -O. */
   if (!hs->local_file)
     {
+      char *local_file = NULL;
+
       /* Honor Content-Disposition whether possible. */
       if (!opt.content_disposition
           || !resp_header_copy (resp, "Content-Disposition",
                                 hdrval, sizeof (hdrval))
-          || !parse_content_disposition (hdrval, &hs->local_file))
+          || !parse_content_disposition (hdrval, &local_file))
         {
           /* The Content-Disposition header is missing or broken.
            * Choose unique file name according to given URL. */
-          hs->local_file = url_file_name (u);
+          hs->local_file = url_file_name (u, NULL);
+        }
+      else
+        {
+          DEBUGP (("Parsed filename from Content-Disposition: %s\n",
+                  local_file));
+          hs->local_file = url_file_name (u, local_file);
         }
     }
 
@@ -2647,7 +2628,7 @@
   else if (!opt.content_disposition)
     {
       hstat.local_file =
-        url_file_name (opt.trustservernames ? u : original_url);
+        url_file_name (opt.trustservernames ? u : original_url, NULL);
       got_name = true;
     }
 
@@ -2685,7 +2666,7 @@
 
   /* Send preliminary HEAD request if -N is given and we have an existing
    * destination file. */
-  file_name = url_file_name (opt.trustservernames ? u : original_url);
+  file_name = url_file_name (opt.trustservernames ? u : original_url, NULL);
   if (opt.timestamping && (file_exists_p (file_name)
                            || opt.content_disposition))
     send_head_first = true;
@@ -3549,20 +3530,15 @@
   int i;
   struct {
     char *hdrval;
-    char *opt_dir_prefix;
     char *filename;
     bool result;
   } test_array[] = {
-    { "filename=\"file.ext\"", NULL, "file.ext", true },
-    { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
-    { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
-    { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true 
},
-    { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
-    { "attachment; filename=\"file.ext\"; dummy", "somedir", 
"somedir/file.ext", true },
-    { "attachment", NULL, NULL, false },
-    { "attachment", "somedir", NULL, false },
-    { "attachement; filename*=UTF-8'en-US'hello.txt", NULL, "hello.txt", true 
},
-    { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", NULL, 
"helloworld.txt", true },
+    { "filename=\"file.ext\"", "file.ext", true },
+    { "attachment; filename=\"file.ext\"", "file.ext", true },
+    { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
+    { "attachment", NULL, false },
+    { "attachement; filename*=UTF-8'en-US'hello.txt", "hello.txt", true },
+    { "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", 
"helloworld.txt", true },
   };
 
   for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
@@ -3570,7 +3546,6 @@
       char *filename;
       bool res;
 
-      opt.dir_prefix = test_array[i].opt_dir_prefix;
       res = parse_content_disposition (test_array[i].hdrval, &filename);
 
       mu_assert ("test_parse_content_disposition: wrong result",

=== modified file 'src/url.c'
--- src/url.c   2010-05-08 19:56:15 +0000
+++ src/url.c   2010-09-24 09:21:44 +0000
@@ -1499,7 +1499,7 @@
    possible.  Does not create directories on the file system.  */
 
 char *
-url_file_name (const struct url *u)
+url_file_name (const struct url *u, char *replaced_filename)
 {
   struct growable fnres;        /* stands for "file name result" */
 
@@ -1554,18 +1554,29 @@
       append_dir_structure (u, &fnres);
     }
 
-  /* Add the file name. */
-  if (fnres.tail)
-    append_char ('/', &fnres);
-  u_file = *u->file ? u->file : index_filename;
-  append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
+  if (!replaced_filename)
+    {
+      /* Add the file name. */
+      if (fnres.tail)
+       append_char ('/', &fnres);
+      u_file = *u->file ? u->file : index_filename;
+      append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
 
-  /* Append "?query" to the file name. */
-  u_query = u->query && *u->query ? u->query : NULL;
-  if (u_query)
+      /* Append "?query" to the file name. */
+      u_query = u->query && *u->query ? u->query : NULL;
+      if (u_query)
+       {
+         append_char (FN_QUERY_SEP, &fnres);
+         append_uri_pathel (u_query, u_query + strlen (u_query),
+                            true, &fnres);
+       }
+    }
+  else
     {
-      append_char (FN_QUERY_SEP, &fnres);
-      append_uri_pathel (u_query, u_query + strlen (u_query), true, &fnres);
+      if (fnres.tail)
+       append_char ('/', &fnres);
+      u_file = replaced_filename;
+      append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
     }
 
   /* Zero-terminate the file name. */

=== modified file 'src/url.h'
--- src/url.h   2010-05-08 19:56:15 +0000
+++ src/url.h   2010-09-24 09:21:44 +0000
@@ -99,7 +99,7 @@
 void scheme_disable (enum url_scheme);
 
 char *url_string (const struct url *, enum url_auth_mode);
-char *url_file_name (const struct url *);
+char *url_file_name (const struct url *, char *);
 
 char *uri_merge (const char *, const char *);
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]