diff -Naru wget-1.12/configure.ac wget-1.12.new/configure.ac --- wget-1.12/configure.ac 2009-09-22 18:39:49.000000000 +0200 +++ wget-1.12.new/configure.ac 2010-01-25 01:55:19.000000000 +0100 @@ -548,6 +548,61 @@ dnl Needed by src/Makefile.am AM_CONDITIONAL([IRI_IS_ENABLED], [test "X$iri" != "Xno"]) +AC_ARG_ENABLE(djvu, + AC_HELP_STRING([--disable-djvu],[disable DjVu indirect documents support]), + [case "${enable_djvu}" in + no) + dnl Disable DjVu checking + AC_MSG_NOTICE([disabling DjVu at user request]) + djvu=no + ;; + yes) + dnl DjVu explicitly enabled + djvu=yes + force_djvu=yes + ;; + auto) + dnl Auto-detect DjVu + djvu=yes + ;; + *) + AC_MSG_ERROR([Invalid --enable-djvu argument \`$enable_djvu']) + ;; + esac + ], [ + dnl If nothing is specified, assume auto-detection + djvu=yes + ] +) + +AC_ARG_WITH(ddjvuapi, AC_HELP_STRING([--with-ddjvuapi=[DIR]], + [Support DjVu (needs DjVuLibre)]), + ddjvuapi=$withval, ddjvuapi="") +if test "X$djvu" != "Xno"; then + if test "$ddjvuapi" != ""; then + DDJVUAPI_LDFLAGS="-L$libdjvu/lib" + DDJVUAPI_CPPFLAGS="-I$libdjvu/include" + else + PKG_CHECK_MODULES(DDJVUAPI, ddjvuapi) + fi + + # If idna.h can't be found, check to see if it was installed under + # /usr/include/idn (OpenSolaris, at least, places it there). + # Check for idn-int.h in that case, because idna.h won't find + # idn-int.h until we've decided to add -I/usr/include/idn. + AC_CHECK_HEADER(libdjvu/ddjvuapi.h, ,[djvu=no]) + + if test "X$djvu" != "Xno" ; then + AC_DEFINE([ENABLE_DJVU], 1, [Define if DjVu support is enabled.]) + AC_MSG_NOTICE([Enabling support for DjVu.]) + else + AC_MSG_WARN([Libdjvu not found]) + fi +fi + +dnl Needed by src/Makefile.am +AM_CONDITIONAL([DJVU_IS_ENABLED], [test "X$djvu" != "Xno"]) + dnl dnl Create output diff -Naru wget-1.12/src/Makefile.am wget-1.12.new/src/Makefile.am --- wget-1.12/src/Makefile.am 2009-09-22 18:39:49.000000000 +0200 +++ wget-1.12.new/src/Makefile.am 2010-01-25 02:01:27.000000000 +0100 @@ -38,13 +38,18 @@ DEFS = @DEFS@ -DSYSTEM_WGETRC=\"$(sysconfdir)/wgetrc\" -DLOCALEDIR=\"$(localedir)\" LIBS = @LIBSSL@ @LIBGNUTLS@ @LIBICONV@ @LIBINTL@ @LIBS@ +if DJVU_IS_ENABLED +DJVU_OBJ = djvu-url.c +LIBS += @DDJVUAPI_LIBS@ +endif + bin_PROGRAMS = wget wget_SOURCES = cmpt.c connect.c convert.c cookies.c ftp.c \ css.l css-url.c \ ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \ http.c init.c log.c main.c netrc.c progress.c ptimer.c \ recur.c res.c retr.c snprintf.c spider.c url.c \ - utils.c exits.c build_info.c $(IRI_OBJ) \ + utils.c exits.c build_info.c $(IRI_OBJ) $(DJVU_OBJ) \ css-url.h css-tokens.h connect.h convert.h cookies.h \ ftp.h gen-md5.h hash.h host.h html-parse.h html-url.h \ http.h http-ntlm.h init.h log.h mswindows.h netrc.h \ @@ -54,7 +59,7 @@ nodist_wget_SOURCES = version.c EXTRA_wget_SOURCES = mswindows.c iri.c LDADD = $(LIBOBJS) ../lib/libgnu.a @MD5_LDADD@ -AM_CPPFLAGS = -I$(top_srcdir)/lib @MD5_CPPFLAGS@ +AM_CPPFLAGS = -I$(top_srcdir)/lib @MD5_CPPFLAGS@ @DDJVUAPI_CFLAGS@ ../lib/libgnu.a: cd ../lib && $(MAKE) $(AM_MAKEFLAGS) diff -Naru wget-1.12/src/djvu-url.c wget-1.12.new/src/djvu-url.c --- wget-1.12/src/djvu-url.c 1970-01-01 01:00:00.000000000 +0100 +++ wget-1.12.new/src/djvu-url.c 2010-01-25 01:37:01.000000000 +0100 @@ -0,0 +1,95 @@ +#include + +#include +#ifdef HAVE_STRING_H +# include +#else +# include +#endif +#include +#include +#include + +#include "wget.h" +#include "utils.h" +#include "convert.h" +#include "html-url.h" + +#include +#include + +int handle_ddjvu_messages(ddjvu_context_t *ctx, int wait, ddjvu_message_tag_t wait_tag) +{ + int found = 0; + const ddjvu_message_t *msg; + if (wait) + ddjvu_message_wait(ctx); + while (!found && (msg = ddjvu_message_peek(ctx))) + { + switch(msg->m_any.tag) + { + case DDJVU_ERROR: + logprintf (LOG_NOTQUIET, "DjVu error\n"); + return 1; +/* case DDJVU_INFO: .... ; break; + case DDJVU_NEWSTREAM: .... ; break; + .... +*/ default: break; + } + if (msg->m_any.tag == wait_tag) + found=1; + ddjvu_message_pop(ctx); + if (wait_tag == 0) + break; + } + return 0; +} + + +struct urlpos * +get_urls_djvu (const char *file, const char *url) +{ + struct file_memory *fm; + struct map_context ctx; + + ctx.head = ctx.tail = NULL; + ctx.base = NULL; + ctx.parent_base = url ? url : opt.base_href; + ctx.document_file = file; + ctx.nofollow = 0; + + DEBUGP(("Opening djvu: %s\n", file)); + + ddjvu_context_t *djvu_ctx = ddjvu_context_create("wget"); + ddjvu_document_t *djvu_doc = ddjvu_document_create_by_filename(djvu_ctx, file, 1); + + handle_ddjvu_messages(djvu_ctx, 1, DDJVU_DOCINFO); + if (ddjvu_document_get_type(djvu_doc) == DDJVU_DOCTYPE_INDIRECT) { + int files = ddjvu_document_get_filenum(djvu_doc); + DEBUGP(("[DJVU] %d files\n", files)); + int i; // filenum + for (i=0;ilink_inline_p = 1; + } + } else {/* error */ + break; + } + } + } + + return ctx.head; +} + + + + + diff -Naru wget-1.12/src/djvu-url.h wget-1.12.new/src/djvu-url.h --- wget-1.12/src/djvu-url.h 1970-01-01 01:00:00.000000000 +0100 +++ wget-1.12.new/src/djvu-url.h 2010-01-20 23:57:19.000000000 +0100 @@ -0,0 +1,35 @@ +/* Declarations for djvu-url.c. + Copyright (C) 2006, 2009 Free Software Foundation, Inc. + +This file is part of GNU Wget. + +GNU Wget is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at +your option) any later version. + +GNU Wget is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Wget. If not, see . + +Additional permission under GNU GPL version 3 section 7 + +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ + +#ifndef DJVU_URL_H +#define DJVU_URL_H + +struct urlpos *get_urls_djvu (const char *, const char *); + +#endif /* DJVU_URL_H */ diff -Naru wget-1.12/src/http.c wget-1.12.new/src/http.c --- wget-1.12/src/http.c 2009-09-22 05:02:18.000000000 +0200 +++ wget-1.12.new/src/http.c 2010-01-25 02:05:18.000000000 +0100 @@ -93,6 +93,8 @@ #define TEXTHTML_S "text/html" #define TEXTXHTML_S "application/xhtml+xml" #define TEXTCSS_S "text/css" +#define IMAGEDJVU_S "image/vnd.djvu" +#define IMAGEDJVU2_S "image/x.djvu" /* Some status code validation macros: */ #define H_20X(x) (((x) >= 200) && ((x) < 300)) @@ -2010,6 +2012,10 @@ /* If its suffix is "html" or "htm" or similar, assume text/html. */ if (has_html_suffix_p (hs->local_file)) *dt |= TEXTHTML; +#ifdef ENABLE_DJVU + if (has_djvu_suffix_p (hs->local_file)) + *dt |= IMAGEDJVU; +#endif /* ENABLE_DJVU */ xfree (head); xfree_null (message); @@ -2180,6 +2186,16 @@ else *dt &= ~TEXTCSS; +#ifdef ENABLE_DJVU + if (type && ( + 0 == strncasecmp (type, IMAGEDJVU_S, strlen (IMAGEDJVU_S)) || + 0 == strncasecmp (type, IMAGEDJVU2_S, strlen (IMAGEDJVU2_S)))) + *dt |= IMAGEDJVU; + else + *dt &= ~IMAGEDJVU; + DEBUGP(("type: %s\n", type)); +#endif /* ENABLE_DJVU */ + if (opt.adjust_extension) { if (*dt & TEXTHTML) @@ -2194,6 +2210,12 @@ { ensure_extension (hs, ".css", dt); } +#ifdef ENABLE_DJVU + else if (*dt & IMAGEDJVU) + { + ensure_extension (hs, ".djvu", dt); + } +#endif /* ENABLE_DJVU */ } if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE @@ -2478,6 +2500,10 @@ /* If its suffix is "html" or "htm" or similar, assume text/html. */ if (has_html_suffix_p (hstat.local_file)) *dt |= TEXTHTML; +#ifdef ENABLE_DJVU + if (has_djvu_suffix_p (hstat.local_file)) + *dt |= IMAGEDJVU; +#endif /* ENABLE_DJVU */ ret = RETROK; goto exit; @@ -2755,7 +2781,7 @@ bool finished = true; if (opt.recursive) { - if (*dt & TEXTHTML) + if (*dt & (TEXTHTML | IMAGEDJVU)) { logputs (LOG_VERBOSE, _("\ Remote file exists and could contain links to other resources -- retrieving.\n\n")); diff -Naru wget-1.12/src/recur.c wget-1.12.new/src/recur.c --- wget-1.12/src/recur.c 2009-09-04 18:31:54.000000000 +0200 +++ wget-1.12.new/src/recur.c 2010-01-25 02:11:11.000000000 +0100 @@ -51,6 +51,9 @@ #include "html-url.h" #include "css-url.h" #include "spider.h" +#ifdef ENABLE_DJVU +# include "djvu-url.h" +#endif /* ENABLE_DJVU */ /* Functions for maintaining the URL queue. */ @@ -231,6 +234,7 @@ int depth; bool html_allowed, css_allowed; bool is_css = false; + bool is_djvu = false; bool dash_p_leaf_HTML = false; if (opt.quota && total_downloaded_bytes > opt.quota) @@ -292,6 +296,16 @@ is_css = false; } +#ifdef ENABLE_DJVU + /* Treat DJVU same as html */ + if (html_allowed && file && status == RETROK + && (dt & RETROKF) && (dt & IMAGEDJVU)) + { + descend = true; + is_djvu = true; + is_css = false; + } +#endif /* ENABLE_DJVU */ /* a little different, css_allowed can override content type lots of web servers serve css with an incorrect content type */ @@ -368,8 +382,11 @@ { bool meta_disallow_follow = false; struct urlpos *children - = is_css ? get_urls_css_file (file, url) : - get_urls_html (file, url, &meta_disallow_follow, i); + = is_css ? get_urls_css_file (file, url) : ( +#ifdef ENABLE_DJVU + is_djvu ? get_urls_djvu (file, url) : +#endif /* ENABLE_DJVU */ + get_urls_html (file, url, &meta_disallow_follow, i)); if (opt.use_robots && meta_disallow_follow) { diff -Naru wget-1.12/src/utils.c wget-1.12.new/src/utils.c --- wget-1.12/src/utils.c 2009-09-22 05:05:52.000000000 +0200 +++ wget-1.12.new/src/utils.c 2010-01-25 02:12:16.000000000 +0100 @@ -1092,6 +1092,20 @@ return false; } +#ifdef ENABLE_DJVU +bool +has_djvu_suffix_p (const char *fname) +{ + char *suf; + + if ((suf = suffix (fname)) == NULL) + return false; + if (!strcasecmp (suf, "djvu")) + return true; + return false; +} +#endif /* ENABLE_DJVU */ + /* Read a line from FP and return the pointer to freshly allocated storage. The storage space is obtained through malloc() and should be freed with free() when it is no longer needed. diff -Naru wget-1.12/src/utils.h wget-1.12.new/src/utils.h --- wget-1.12/src/utils.h 2009-09-12 04:29:35.000000000 +0200 +++ wget-1.12.new/src/utils.h 2010-01-25 02:12:35.000000000 +0100 @@ -95,6 +95,9 @@ bool has_wildcards_p (const char *); bool has_html_suffix_p (const char *); +#ifdef ENABLE_DJVU +bool has_djvu_suffix_p (const char *); +#endif /* ENABLE_DJVU */ char *read_whole_line (FILE *); struct file_memory *read_file (const char *); diff -Naru wget-1.12/src/wget.h wget-1.12.new/src/wget.h --- wget-1.12/src/wget.h 2009-09-22 05:07:58.000000000 +0200 +++ wget-1.12.new/src/wget.h 2010-01-17 01:05:09.000000000 +0100 @@ -320,8 +320,9 @@ HEAD_ONLY = 0x0004, /* only send the HEAD request */ SEND_NOCACHE = 0x0008, /* send Pragma: no-cache directive */ ACCEPTRANGES = 0x0010, /* Accept-ranges header was found */ - ADDED_HTML_EXTENSION = 0x0020, /* added ".html" extension due to -E */ - TEXTCSS = 0x0040 /* document is of type text/css */ + ADDED_HTML_EXTENSION = 0x0020, /* added ".html" extension due to -E */ + TEXTCSS = 0x0040, /* document is of type text/css */ + IMAGEDJVU = 0x0080 /* document is of image/vnd.djvu */ }; /* Universal error type -- used almost everywhere. Error reporting of