bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] [PATCH] Fixed problem under content-disposition filename and


From: Cheng Renquan
Subject: [Bug-wget] [PATCH] Fixed problem under content-disposition filename and recursive
Date: Fri, 22 May 2009 03:57:52 +0800

The original wget-1.11.4 has a problem while both content-disposition filename
and recursive enabled:

  $ wget -P ~/archives/ -e contentdisposition=on -mS 
'http://tree.celinuxforum.org/CelfPubWiki/ELC2009Presentations?action=AttachFile&do=get&target=LTTng-presentation-celf-2009-0.2.pdf'
it will get out the file
  ~/archives/LTTng-presentation-celf-2009-0.2.pdf
just seems that mirror downloading (which enabled recursive) have no effect,
While the real wanted result is
  
~/archives/tree.celinuxforum.org/CelfPubWiki/LTTng-presentation-celf-2009-0.2.pdf

Changes:
1. url_file_name interface, add a replaced_filename parameter, then it can 
return
   with the default filename or a customed given one, in this example, is the 
parsed
   filename from content-disposition header.
2. parse_content_disposition function's return value, it does not return with
   opt.dir_prefix prefixed anymore, it just return a bare parsed filename,
   this makes it look more tidier.
3. accordingly, the unittest of parse_content_disposition has also be changed:
   the opt.dir_prefix test items cleaned.

Signed-off-by: Cheng Renquan <address@hidden>

Index: wget-1.11.4/src/ftp.c
===================================================================
--- wget-1.11.4.orig/src/ftp.c
+++ wget-1.11.4/src/ftp.c
@@ -1090,7 +1090,7 @@ ftp_loop_internal (struct url *u, struct
   struct_stat st;
 
   if (!con->target)
-    con->target = url_file_name (u);
+    con->target = url_file_name (u, NULL);
 
   /* If the output_document was given, then this check was already done and
      the file didn't exist. Hence the !opt.output_document */
@@ -1203,7 +1203,7 @@ ftp_loop_internal (struct url *u, struct
             {
               /* Re-determine the file name. */
               xfree_null (con->target);
-              con->target = url_file_name (u);
+              con->target = url_file_name (u, NULL);
               locf = con->target;
             }
           continue;
@@ -1318,7 +1318,7 @@ ftp_get_listing (struct url *u, ccon *co
   /* Find the listing file name.  We do it by taking the file name of
      the URL and replacing the last component with the listing file
      name.  */
-  uf = url_file_name (u);
+  uf = url_file_name (u, NULL);
   lf = file_merge (uf, LIST_FILENAME);
   xfree (uf);
   DEBUGP ((_("Using `%s' as listing tmp file.\n"), lf));
@@ -1406,7 +1406,7 @@ ftp_retrieve_list (struct url *u, struct
       ofile = xstrdup (u->file);
       url_set_file (u, f->name);
 
-      con->target = url_file_name (u);
+      con->target = url_file_name (u, NULL);
       err = RETROK;
 
       dlthis = true;
@@ -1829,7 +1829,7 @@ ftp_loop (struct url *u, int *dt, struct
               char *filename = (opt.output_document
                                 ? xstrdup (opt.output_document)
                                 : (con.target ? xstrdup (con.target)
-                                   : url_file_name (u)));
+                                   : url_file_name (u, NULL)));
               res = ftp_index (filename, u, f);
               if (res == FTPOK && opt.verbose)
                 {
Index: wget-1.11.4/src/http.c
===================================================================
--- wget-1.11.4.orig/src/http.c
+++ wget-1.11.4/src/http.c
@@ -1025,7 +1025,13 @@ extract_param (const char **source, para
    false.
 
    The file name is stripped of directory components and must not be
-   empty.  */
+   empty.
+
+   Historically, this function returned filename prefixed with opt.dir_prefix,
+   now that logic is handled in the caller, new code should pay attention,
+   changes by crq.
+
+   */
 
 static bool
 parse_content_disposition (const char *hdr, char **filename)
@@ -1043,25 +1049,7 @@ parse_content_disposition (const char *h
           value.b = 1 + (last_slash ? last_slash : last_bs);
         if (value.b == value.e)
           continue;
-        /* Start with the directory prefix, if specified. */
-        if (opt.dir_prefix)
-          {
-            int prefix_length = strlen (opt.dir_prefix);
-            bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
-            int total_length;
-
-            if (add_slash) 
-              ++prefix_length;
-            total_length = prefix_length + (value.e - value.b);            
-            *filename = xmalloc (total_length + 1);
-            strcpy (*filename, opt.dir_prefix);
-            if (add_slash) 
-              (*filename)[prefix_length - 1] = '/';
-            memcpy (*filename + prefix_length, value.b, (value.e - value.b));
-            (*filename)[total_length] = '\0';
-          }
-        else
-          *filename = strdupdelim (value.b, value.e);
+        *filename = strdupdelim (value.b, value.e);
         return true;
       }
   return false;
@@ -1807,18 +1795,24 @@ gethttp (struct url *u, struct http_stat
    * hstat.local_file is set by http_loop to the argument of -O. */
   if (!hs->local_file)
     {
+      char *local_file = NULL;
       /* Honor Content-Disposition whether possible. */
       if (!opt.content_disposition
           || !resp_header_copy (resp, "Content-Disposition", 
                                 hdrval, sizeof (hdrval))
-          || !parse_content_disposition (hdrval, &hs->local_file))
+          || !parse_content_disposition (hdrval, &local_file))
         {
           /* The Content-Disposition header is missing or broken. 
            * Choose unique file name according to given URL. */
-          hs->local_file = url_file_name (u);
+          hs->local_file = url_file_name (u, NULL);
+        }
+      else
+        {
+          DEBUGP(("Parsed filename from Content-Disposition: %s\n", 
local_file));
+          hs->local_file = url_file_name(u, local_file);
         }
     }
-  
+
   /* TODO: perform this check only once. */
   if (!hs->existence_checked && file_exists_p (hs->local_file))
     {
@@ -2370,7 +2364,7 @@ http_loop (struct url *u, char **newloc,
     }
   else if (!opt.content_disposition)
     {
-      hstat.local_file = url_file_name (u);
+      hstat.local_file = url_file_name (u, NULL);
       got_name = true;
     }
 
@@ -2412,7 +2406,7 @@ File `%s' already there; not retrieving.
    * destination file. */
   if (opt.timestamping 
       && !opt.content_disposition
-      && file_exists_p (url_file_name (u)))
+      && file_exists_p (url_file_name (u, NULL)))
     send_head_first = true;
   
   /* THE loop */
@@ -3202,26 +3196,20 @@ test_parse_content_disposition()
   int i;
   struct {
     char *hdrval;    
-    char *opt_dir_prefix;
     char *filename;
     bool result;
   } test_array[] = {
-    { "filename=\"file.ext\"", NULL, "file.ext", true },
-    { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
-    { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
-    { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true 
},
-    { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
-    { "attachment; filename=\"file.ext\"; dummy", "somedir", 
"somedir/file.ext", true },
-    { "attachment", NULL, NULL, false },
-    { "attachment", "somedir", NULL, false },
+    { "filename=\"file.ext\"", "file.ext", true },
+    { "attachment; filename=\"file.ext\"", "file.ext", true },
+    { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
+    { "attachment", NULL, false },
   };
-  
+
   for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i) 
     {
       char *filename;
       bool res;
 
-      opt.dir_prefix = test_array[i].opt_dir_prefix;
       res = parse_content_disposition (test_array[i].hdrval, &filename);
 
       mu_assert ("test_parse_content_disposition: wrong result", 
Index: wget-1.11.4/src/url.c
===================================================================
--- wget-1.11.4.orig/src/url.c
+++ wget-1.11.4/src/url.c
@@ -1425,7 +1425,7 @@ append_dir_structure (const struct url *
    possible.  Does not create directories on the file system.  */
 
 char *
-url_file_name (const struct url *u)
+url_file_name (const struct url *u, char *replaced_filename)
 {
   struct growable fnres;        /* stands for "file name result" */
 
@@ -1474,19 +1474,29 @@ url_file_name (const struct url *u)
       append_dir_structure (u, &fnres);
     }
 
-  /* Add the file name. */
-  if (fnres.tail)
-    append_char ('/', &fnres);
-  u_file = *u->file ? u->file : "index.html";
-  append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
-
-  /* Append "?query" to the file name. */
-  u_query = u->query && *u->query ? u->query : NULL;
-  if (u_query)
-    {
-      append_char (FN_QUERY_SEP, &fnres);
-      append_uri_pathel (u_query, u_query + strlen (u_query), true, &fnres);
-    }
+  if (!replaced_filename)
+         {
+      /* Add the file name. */
+      if (fnres.tail)
+        append_char ('/', &fnres);
+      u_file = *u->file ? u->file : "index.html";
+      append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
+
+      /* Append "?query" to the file name. */
+      u_query = u->query && *u->query ? u->query : NULL;
+      if (u_query)
+        {
+          append_char (FN_QUERY_SEP, &fnres);
+          append_uri_pathel (u_query, u_query + strlen (u_query), true, 
&fnres);
+        }
+         }
+  else
+         {
+      if (fnres.tail)
+        append_char ('/', &fnres);
+      u_file = replaced_filename;
+      append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
+         }
 
   /* Zero-terminate the file name. */
   append_char ('\0', &fnres);
Index: wget-1.11.4/src/url.h
===================================================================
--- wget-1.11.4.orig/src/url.h
+++ wget-1.11.4/src/url.h
@@ -97,7 +97,7 @@ int scheme_default_port (enum url_scheme
 void scheme_disable (enum url_scheme);
 
 char *url_string (const struct url *, enum url_auth_mode);
-char *url_file_name (const struct url *);
+char *url_file_name (const struct url *, char *);
 
 char *uri_merge (const char *, const char *);
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]