>From a15b4f08efdf59471c45d8f322c72248d75ebd54 Mon Sep 17 00:00:00 2001 From: Jure Grabnar Date: Sun, 8 Jun 2014 08:08:38 +0200 Subject: [PATCH] Download to single temporary file. --- src/ChangeLog | 14 ++++++++++ src/http.c | 10 ++++--- src/multi.c | 87 ++++++++++++++++++++++++++--------------------------------- src/multi.h | 11 ++++---- src/retr.c | 18 ++++++------- 5 files changed, 74 insertions(+), 66 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 55a1278..fd76bb1 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,19 @@ 2014-05-07 Jure Grabnar + * multi.c: Parallel download is now stored in one temporary file rather than + multiple files. + (SUFFIX_TEMP): Define. + (name_temp_files): Use function mkstemp() instead of tmpnam() which is safer + and allows for customized path. + (init_temp_files, name_temp_files, delete_temp_files, clean_temp_files): + Rewritten to work with one file. + (merge_temp_files): Remove. + (rename_temp_file): Add. + * retr.c (retrieve_from_file): Change code to work with one temporary file. + * http.c (gethttp): Likewise. + +2014-05-07 Jure Grabnar + * multi.c: Add condition to fix memory corruption and downloading in parallel in general. * retr.c: Increase buffer size by 1 for '\0' to avoid memory corruption. diff --git a/src/http.c b/src/http.c index 388530c..d44530e 100644 --- a/src/http.c +++ b/src/http.c @@ -150,7 +150,6 @@ struct request { }; extern int numurls; - /* Create a new, empty request. Set the request's method and its arguments. METHOD should be a literal string (or it should outlive the request) because it will not be freed. ARG will be freed by @@ -2784,7 +2783,7 @@ read_header: REGISTER_PERSISTENT_CONNECTION (4); return RETRUNNEEDED; } - else if (!ALLOW_CLOBBER) + else if (!ALLOW_CLOBBER && !opt.metalink_file) { char *unique = unique_name (hs->local_file, true); if (unique != hs->local_file) @@ -3167,7 +3166,12 @@ read_header: #endif /* def __VMS [else] */ /* Open the local file. */ - if (!output_stream) + if (opt.jobs > 1) + { + fp = fopen (hs->local_file, "r+b"); + fseek (fp, hs->restval, SEEK_SET); + } + else if (!output_stream) { mkalldirs (hs->local_file); if (opt.backups) diff --git a/src/multi.c b/src/multi.c index 43c2f73..665b50e 100644 --- a/src/multi.c +++ b/src/multi.c @@ -36,90 +36,79 @@ as that of the covered work. */ #include #include #include +#include #include "multi.h" #include "url.h" + +/* Suffix for temporary files. Last 6 chars must be 'X' because of mkstemp(). */ +#define SUFFIX_TEMP ".tmp.XXXXXX" + static struct range *ranges; -char **files; +char *main_file; /* Allocate space for temporary file names. */ void -init_temp_files() +init_temp_files(char *file_name) { - int i; + int alloc_size = (opt.dir_prefix ? strlen (opt.dir_prefix) + (sizeof "/") : 0) + + strlen (file_name) + (sizeof SUFFIX_TEMP) + 1; - if(!(files = malloc (opt.jobs * (sizeof *files)))) + if(!(main_file = malloc (alloc_size))) { - logprintf (LOG_VERBOSE, "Space for temporary file data could not be allocated.\n"); + logprintf (LOG_VERBOSE, "Space for temporary file names could not be allocated.\n"); exit(1); } - for (i = 0; i < opt.jobs; ++i) - if(!(files[i] = malloc (L_tmpnam * sizeof(char)))) - { - logprintf (LOG_VERBOSE, "Space for temporary file names could not be allocated.\n"); - exit(1); - } } /* Assign names to temporary files to be used. */ void -name_temp_files() +name_temp_files(char *file_name, long long int file_size) { - int i; + int fd; - for (i = 0; i < opt.jobs; ++i) - if(!tmpnam(files[i])) - { - logprintf (LOG_VERBOSE, "Temporary file name could not be assigned.\n"); - exit(1); - } + if(opt.dir_prefix) + sprintf(main_file, "%s/%s%s", opt.dir_prefix, file_name, SUFFIX_TEMP); + else + sprintf(main_file, "%s%s", file_name, SUFFIX_TEMP); + + if(!(fd = mkstemp (main_file))) + { + logprintf (LOG_VERBOSE, "Temporary file name could not be assigned.\n"); + exit(1); + } + + if (posix_fallocate(fd, 0, file_size)) + { + logprintf (LOG_VERBOSE, "File could not be allocated.\n"); + exit(1); + } + close (fd); } -/* Merge the temporary files in which the chunks are stored to form the - resulting file(output). */ +/* Rename the temporary file used to the final file name. */ void -merge_temp_files(char *output) +rename_temp_file (char *new_file_name) { - FILE *out, *in; - int j, ret; - void *buf = malloc (MIN_CHUNK_SIZE); + rename (main_file, new_file_name); - out = fopen (output, "wb"); - for(j = 0; j < opt.jobs; ++j) - { - in = fopen(files[j],"rb"); - ret = MIN_CHUNK_SIZE; - while(ret == MIN_CHUNK_SIZE) - { - ret = fread(buf, 1, MIN_CHUNK_SIZE, in); - fwrite(buf, 1, ret, out); - } - fclose(in); - } - fclose(out); - free(buf); + free (main_file); + main_file = xstrdup (new_file_name); } /* Delete the temporary files used. */ void delete_temp_files() { - int j = 0; - - while(j < opt.jobs) - unlink(files[j++]); + unlink (main_file); } /* Clean the space allocated for temporary files data. */ void clean_temp_files() { - int i; - - for (i = 0; i < opt.jobs; ++i) - free (files[i]); - free(files); + free (main_file); } /* Allocate ranges array to store the ranges data. */ @@ -188,7 +177,7 @@ spawn_thread (struct s_thread_ctx *thread_ctx, int index, int resource) if(!thread_ctx[index].url_parsed) return 1; - thread_ctx[index].file = files[index]; + thread_ctx[index].file = main_file; thread_ctx[index].range = ranges + index; (thread_ctx[index].range)->is_assigned = 1; (thread_ctx[index].range)->resources[resource] = true; diff --git a/src/multi.h b/src/multi.h index ad9bd21..2c28a36 100644 --- a/src/multi.h +++ b/src/multi.h @@ -32,6 +32,7 @@ as that of the covered work. */ #define MULTI_H #include +#include #include "wget.h" @@ -62,15 +63,15 @@ struct s_thread_ctx uerr_t status; }; -void init_temp_files(); +void init_temp_files(char *); -void name_temp_files(); +void name_temp_files(char *, long long int); -void merge_temp_files(char *); +void rename_temp_file (char *); -void delete_temp_files(); +void delete_temp_files (); -void clean_temp_files(); +void clean_temp_files (); void init_ranges(); diff --git a/src/retr.c b/src/retr.c index 2f45fa5..35f500c 100644 --- a/src/retr.c +++ b/src/retr.c @@ -1087,7 +1087,7 @@ retrieve_from_file (const char *file, bool html, int *count) elect_resources (mlink); elect_checksums (mlink); - init_temp_files(); + init_temp_files(mlink->files->name); init_ranges (); thread_ctx = malloc (opt.jobs * (sizeof *thread_ctx)); @@ -1108,9 +1108,10 @@ retrieve_from_file (const char *file, bool html, int *count) if (j < opt.jobs) opt.jobs = j; - name_temp_files (); + name_temp_files (file->name, file->size); sem_init (&retr_sem, 0, 0); + j = ranges_covered = 0; resource = file->resources; @@ -1255,10 +1256,10 @@ retrieve_from_file (const char *file, bool html, int *count) sprintf(file_path, "%s/%s", opt.dir_prefix, file->name); else sprintf(file_path, "%s", file->name); - mkalldirs(file_path); - merge_temp_files(file_path); + + rename_temp_file (file_path); res = verify_file_hash(file_path, file->checksums); - free(file_path); + free (file_path); if(!res) { ++*count; @@ -1274,13 +1275,12 @@ retrieve_from_file (const char *file, bool html, int *count) logprintf (LOG_VERBOSE, _("Retrying to download(%s). (TRY #%d)\n"), file->name, ++retries + 1); + delete_temp_files(); continue; } } } - delete_temp_files(); - clean_range_res_data(); if (opt.quota && total_downloaded_bytes > opt.quota) { @@ -1292,7 +1292,6 @@ retrieve_from_file (const char *file, bool html, int *count) free(thread_ctx); clean_ranges (); - clean_temp_files (); delete_mlink(mlink); } else @@ -1481,7 +1480,8 @@ rotate_backups(const char *fname) sprintf (from, "%s%s%d", fname, SEP, i - 1); rename (from, to); } - + sprintf (to, "%s%s%d", fname, SEP, 1); + rename(fname, to); sprintf (to, "%s%s%d", fname, SEP, 1); rename(fname, to); } -- 2.0.0