>From 57327307c1ce56f0478e95bb32b8818ec0d9aa78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81ngel=20Gonz=C3=A1lez?= Date: Mon, 16 Sep 2013 01:33:40 +0200 Subject: [PATCH 2/2] Expose wget functionality for extracting links from a web page. Provided by a new program called get-urls --- src/get-urls.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 src/get-urls.c diff --git a/src/get-urls.c b/src/get-urls.c new file mode 100644 index 0000000..9393a62 --- /dev/null +++ b/src/get-urls.c @@ -0,0 +1,75 @@ +#include "wget.h" + +#include +#include + +#include "url.h" +#include "convert.h" +#include "html-url.h" +#include "css-url.h" + + +void +print_urls (const char *file, const char *url, bool is_css) +{ + bool meta_disallow_follow = false; /* Output value */ + struct urlpos *child; + struct urlpos *children + = is_css ? get_urls_css_file (file, url) : + get_urls_html (file, url, &meta_disallow_follow, NULL); + + printf ("# %s\n", url); + + child = children; + for (; child; child = child->next) + { + printf ("%s #", child->url->url); + + if (child->ignore_when_downloading) + printf(" ignore"); + if (child->link_relative_p) + printf(" relative"); + if (child->link_complete_p) + printf(" complete"); + if (child->link_base_p) + printf(" base"); + if (child->link_inline_p) + printf(" inline"); + if (child->link_css_p) + printf(" fromcss"); + if (child->link_expect_html) + printf(" html"); + if (child->link_expect_css) + printf(" css"); + if (child->link_refresh_p) + printf(" refresh"); + printf("\n"); + } + + free_urlpos (children); +} + +const char *exec_name = "get-urls"; +struct options opt; + +int +main (int argc, char *argv[]) +{ + bool is_css = false; + + if (argc > 1 && !strcmp("--css", argv[1])) + { + is_css = true; + argc--; + argv++; + } + + if (argc < 2) + { + fprintf(stderr, _("Usage: %s [--css] file original-URL\n"), exec_name); + return 1; + } + + print_urls (argv[1], argv[2], is_css); + return 0; +} -- 1.8.4