>From a15b4f08efdf59471c45d8f322c72248d75ebd54 Mon Sep 17 00:00:00 2001
From: Jure Grabnar
Date: Sun, 8 Jun 2014 08:08:38 +0200
Subject: [PATCH] Download to single temporary file.
---
src/ChangeLog | 14 ++++++++++
src/http.c | 10 ++++---
src/multi.c | 87 ++++++++++++++++++++++++++---------------------------------
src/multi.h | 11 ++++----
src/retr.c | 18 ++++++-------
5 files changed, 74 insertions(+), 66 deletions(-)
diff --git a/src/ChangeLog b/src/ChangeLog
index 55a1278..fd76bb1 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,5 +1,19 @@
2014-05-07 Jure Grabnar
+ * multi.c: Parallel download is now stored in one temporary file rather than
+ multiple files.
+ (SUFFIX_TEMP): Define.
+ (name_temp_files): Use function mkstemp() instead of tmpnam() which is safer
+ and allows for customized path.
+ (init_temp_files, name_temp_files, delete_temp_files, clean_temp_files):
+ Rewritten to work with one file.
+ (merge_temp_files): Remove.
+ (rename_temp_file): Add.
+ * retr.c (retrieve_from_file): Change code to work with one temporary file.
+ * http.c (gethttp): Likewise.
+
+2014-05-07 Jure Grabnar
+
* multi.c: Add condition to fix memory corruption and downloading in parallel
in general.
* retr.c: Increase buffer size by 1 for '\0' to avoid memory corruption.
diff --git a/src/http.c b/src/http.c
index 388530c..d44530e 100644
--- a/src/http.c
+++ b/src/http.c
@@ -150,7 +150,6 @@ struct request {
};
extern int numurls;
-
/* Create a new, empty request. Set the request's method and its
arguments. METHOD should be a literal string (or it should outlive
the request) because it will not be freed. ARG will be freed by
@@ -2784,7 +2783,7 @@ read_header:
REGISTER_PERSISTENT_CONNECTION (4);
return RETRUNNEEDED;
}
- else if (!ALLOW_CLOBBER)
+ else if (!ALLOW_CLOBBER && !opt.metalink_file)
{
char *unique = unique_name (hs->local_file, true);
if (unique != hs->local_file)
@@ -3167,7 +3166,12 @@ read_header:
#endif /* def __VMS [else] */
/* Open the local file. */
- if (!output_stream)
+ if (opt.jobs > 1)
+ {
+ fp = fopen (hs->local_file, "r+b");
+ fseek (fp, hs->restval, SEEK_SET);
+ }
+ else if (!output_stream)
{
mkalldirs (hs->local_file);
if (opt.backups)
diff --git a/src/multi.c b/src/multi.c
index 43c2f73..665b50e 100644
--- a/src/multi.c
+++ b/src/multi.c
@@ -36,90 +36,79 @@ as that of the covered work. */
#include
#include
#include
+#include
#include "multi.h"
#include "url.h"
+
+/* Suffix for temporary files. Last 6 chars must be 'X' because of mkstemp(). */
+#define SUFFIX_TEMP ".tmp.XXXXXX"
+
static struct range *ranges;
-char **files;
+char *main_file;
/* Allocate space for temporary file names. */
void
-init_temp_files()
+init_temp_files(char *file_name)
{
- int i;
+ int alloc_size = (opt.dir_prefix ? strlen (opt.dir_prefix) + (sizeof "/") : 0)
+ + strlen (file_name) + (sizeof SUFFIX_TEMP) + 1;
- if(!(files = malloc (opt.jobs * (sizeof *files))))
+ if(!(main_file = malloc (alloc_size)))
{
- logprintf (LOG_VERBOSE, "Space for temporary file data could not be allocated.\n");
+ logprintf (LOG_VERBOSE, "Space for temporary file names could not be allocated.\n");
exit(1);
}
- for (i = 0; i < opt.jobs; ++i)
- if(!(files[i] = malloc (L_tmpnam * sizeof(char))))
- {
- logprintf (LOG_VERBOSE, "Space for temporary file names could not be allocated.\n");
- exit(1);
- }
}
/* Assign names to temporary files to be used. */
void
-name_temp_files()
+name_temp_files(char *file_name, long long int file_size)
{
- int i;
+ int fd;
- for (i = 0; i < opt.jobs; ++i)
- if(!tmpnam(files[i]))
- {
- logprintf (LOG_VERBOSE, "Temporary file name could not be assigned.\n");
- exit(1);
- }
+ if(opt.dir_prefix)
+ sprintf(main_file, "%s/%s%s", opt.dir_prefix, file_name, SUFFIX_TEMP);
+ else
+ sprintf(main_file, "%s%s", file_name, SUFFIX_TEMP);
+
+ if(!(fd = mkstemp (main_file)))
+ {
+ logprintf (LOG_VERBOSE, "Temporary file name could not be assigned.\n");
+ exit(1);
+ }
+
+ if (posix_fallocate(fd, 0, file_size))
+ {
+ logprintf (LOG_VERBOSE, "File could not be allocated.\n");
+ exit(1);
+ }
+ close (fd);
}
-/* Merge the temporary files in which the chunks are stored to form the
- resulting file(output). */
+/* Rename the temporary file used to the final file name. */
void
-merge_temp_files(char *output)
+rename_temp_file (char *new_file_name)
{
- FILE *out, *in;
- int j, ret;
- void *buf = malloc (MIN_CHUNK_SIZE);
+ rename (main_file, new_file_name);
- out = fopen (output, "wb");
- for(j = 0; j < opt.jobs; ++j)
- {
- in = fopen(files[j],"rb");
- ret = MIN_CHUNK_SIZE;
- while(ret == MIN_CHUNK_SIZE)
- {
- ret = fread(buf, 1, MIN_CHUNK_SIZE, in);
- fwrite(buf, 1, ret, out);
- }
- fclose(in);
- }
- fclose(out);
- free(buf);
+ free (main_file);
+ main_file = xstrdup (new_file_name);
}
/* Delete the temporary files used. */
void
delete_temp_files()
{
- int j = 0;
-
- while(j < opt.jobs)
- unlink(files[j++]);
+ unlink (main_file);
}
/* Clean the space allocated for temporary files data. */
void
clean_temp_files()
{
- int i;
-
- for (i = 0; i < opt.jobs; ++i)
- free (files[i]);
- free(files);
+ free (main_file);
}
/* Allocate ranges array to store the ranges data. */
@@ -188,7 +177,7 @@ spawn_thread (struct s_thread_ctx *thread_ctx, int index, int resource)
if(!thread_ctx[index].url_parsed)
return 1;
- thread_ctx[index].file = files[index];
+ thread_ctx[index].file = main_file;
thread_ctx[index].range = ranges + index;
(thread_ctx[index].range)->is_assigned = 1;
(thread_ctx[index].range)->resources[resource] = true;
diff --git a/src/multi.h b/src/multi.h
index ad9bd21..2c28a36 100644
--- a/src/multi.h
+++ b/src/multi.h
@@ -32,6 +32,7 @@ as that of the covered work. */
#define MULTI_H
#include
+#include
#include "wget.h"
@@ -62,15 +63,15 @@ struct s_thread_ctx
uerr_t status;
};
-void init_temp_files();
+void init_temp_files(char *);
-void name_temp_files();
+void name_temp_files(char *, long long int);
-void merge_temp_files(char *);
+void rename_temp_file (char *);
-void delete_temp_files();
+void delete_temp_files ();
-void clean_temp_files();
+void clean_temp_files ();
void init_ranges();
diff --git a/src/retr.c b/src/retr.c
index 2f45fa5..35f500c 100644
--- a/src/retr.c
+++ b/src/retr.c
@@ -1087,7 +1087,7 @@ retrieve_from_file (const char *file, bool html, int *count)
elect_resources (mlink);
elect_checksums (mlink);
- init_temp_files();
+ init_temp_files(mlink->files->name);
init_ranges ();
thread_ctx = malloc (opt.jobs * (sizeof *thread_ctx));
@@ -1108,9 +1108,10 @@ retrieve_from_file (const char *file, bool html, int *count)
if (j < opt.jobs)
opt.jobs = j;
- name_temp_files ();
+ name_temp_files (file->name, file->size);
sem_init (&retr_sem, 0, 0);
+
j = ranges_covered = 0;
resource = file->resources;
@@ -1255,10 +1256,10 @@ retrieve_from_file (const char *file, bool html, int *count)
sprintf(file_path, "%s/%s", opt.dir_prefix, file->name);
else
sprintf(file_path, "%s", file->name);
- mkalldirs(file_path);
- merge_temp_files(file_path);
+
+ rename_temp_file (file_path);
res = verify_file_hash(file_path, file->checksums);
- free(file_path);
+ free (file_path);
if(!res)
{
++*count;
@@ -1274,13 +1275,12 @@ retrieve_from_file (const char *file, bool html, int *count)
logprintf (LOG_VERBOSE,
_("Retrying to download(%s). (TRY #%d)\n"),
file->name, ++retries + 1);
+ delete_temp_files();
continue;
}
}
}
- delete_temp_files();
-
clean_range_res_data();
if (opt.quota && total_downloaded_bytes > opt.quota)
{
@@ -1292,7 +1292,6 @@ retrieve_from_file (const char *file, bool html, int *count)
free(thread_ctx);
clean_ranges ();
- clean_temp_files ();
delete_mlink(mlink);
}
else
@@ -1481,7 +1480,8 @@ rotate_backups(const char *fname)
sprintf (from, "%s%s%d", fname, SEP, i - 1);
rename (from, to);
}
-
+ sprintf (to, "%s%s%d", fname, SEP, 1);
+ rename(fname, to);
sprintf (to, "%s%s%d", fname, SEP, 1);
rename(fname, to);
}
--
2.0.0