diff --git a/doc/wget.texi b/doc/wget.texi index f42773e..2990408 100644 --- a/doc/wget.texi +++ b/doc/wget.texi @@ -2357,6 +2357,11 @@ your shell from expanding it, like in @samp{-A "*.mp3"} or @samp{-A '*.mp3'}. @itemx --reject-regex @var{urlregex} Specify a regular expression to accept or reject the complete URL. address@hidden that the effect of @samp{--accept-regex} and address@hidden is suppressed for +fetching redirected URLs and for fetching page requisite URLs if address@hidden is specified. + @item --regex-type @var{regextype} Specify the regular expression type. Possible types are @samp{posix} or @samp{pcre}. Note that to be able to use @samp{pcre} type, wget has to be @@ -2437,12 +2442,21 @@ Specify a comma-separated list of directories you wish to exclude from download (@pxref{Directory-Based Limits}). Elements of @var{list} may contain wildcards. address@hidden that the effect of @samp{--include-directories} and address@hidden is suppressed for +fetching redirected URLs and for fetching page requisite ULRs if address@hidden is specified. + @item -np @item --no-parent Do not ever ascend to the parent directory when retrieving recursively. This is a useful option, since it guarantees that only the files @emph{below} a certain hierarchy will be downloaded. @xref{Directory-Based Limits}, for more details. + address@hidden that the effect of @samp{--no-parent} is suppressed for +fetching redirected URLs and for fetching page requisite ULRs if address@hidden is specified. @end table @c man end diff --git a/src/convert.h b/src/convert.h index e3ff6f0..af0ab79 100644 --- a/src/convert.h +++ b/src/convert.h @@ -72,6 +72,7 @@ struct urlpos { unsigned int link_noquote_html_p :1; /* from HTML, but doesn't need " */ unsigned int link_expect_html :1; /* expected to contain HTML */ unsigned int link_expect_css :1; /* expected to contain CSS */ + unsigned int link_redirect_p :1; /* the url comes from a redirection */ unsigned int link_refresh_p :1; /* link was received from */ diff --git a/src/recur.c b/src/recur.c index 1469e31..7bbcd44 100644 --- a/src/recur.c +++ b/src/recur.c @@ -651,13 +651,14 @@ download_child (const struct urlpos *upos, struct url *parent, int depth, If we descended to a different host or changed the scheme, ignore opt.no_parent. Also ignore it for documents needed to display - the parent page when in -p mode. */ + the parent page when in -p mode or redirections. */ if (opt.no_parent && schemes_are_similar_p (u->scheme, start_url_parsed->scheme) && 0 == strcasecmp (u->host, start_url_parsed->host) && (u->scheme != start_url_parsed->scheme || u->port == start_url_parsed->port) - && !(opt.page_requisites && upos->link_inline_p)) + && !(opt.page_requisites && upos->link_inline_p) + && !upos->link_redirect_p) { if (!subdir_p (start_url_parsed->dir, u->dir)) { @@ -670,21 +671,28 @@ download_child (const struct urlpos *upos, struct url *parent, int depth, /* 5. If the file does not match the acceptance list, or is on the rejection list, chuck it out. The same goes for the directory - exclusion and inclusion lists. */ - if (opt.includes || opt.excludes) - { - if (!accdir (u->dir)) - { - DEBUGP (("%s (%s) is excluded/not-included.\n", url, u->dir)); - reason = WG_RR_LIST; - goto out; - } - } - if (!accept_url (url)) + exclusion and inclusion lists. + + Ignore this test for documents needed to display the parent page + when in -p mode or redirections. */ + if (!(opt.page_requisites && upos->link_inline_p) + && !upos->link_redirect_p) { - DEBUGP (("%s is excluded/not-included through regex.\n", url)); - reason = WG_RR_REGEX; - goto out; + if (opt.includes || opt.excludes) + { + if (!accdir (u->dir)) + { + DEBUGP (("%s (%s) is excluded/not-included.\n", url, u->dir)); + reason = WG_RR_LIST; + goto out; + } + } + if (!accept_url (url)) + { + DEBUGP (("%s is excluded/not-included through regex.\n", url)); + reason = WG_RR_REGEX; + goto out; + } } /* 6. Check for acceptance/rejection rules. We ignore these rules @@ -800,18 +808,13 @@ descend_redirect (const char *redirected, struct url *orig_parsed, int depth, upos = xnew0 (struct urlpos); upos->url = new_parsed; + upos->link_redirect_p = 1; reason = download_child (upos, orig_parsed, depth, start_url_parsed, blacklist, iri); if (reason == WG_RR_SUCCESS) blacklist_add (blacklist, upos->url->url); - else if (reason == WG_RR_LIST || reason == WG_RR_REGEX) - { - DEBUGP (("Ignoring decision for redirects, decided to load it.\n")); - blacklist_add (blacklist, upos->url->url); - reason = WG_RR_SUCCESS; - } else DEBUGP (("Redirection \"%s\" failed the test.\n", redirected));