bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] SIDB


From: Rodrigo S Wanderley
Subject: [Bug-wget] SIDB
Date: Mon, 13 Jul 2009 12:49:29 -0300
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/23.0.91 (gnu/linux)

Hi,

I'm trying to implement the Session Info Database.  I'm on the beginning
of the development, so the code still does not handle with errors.
Also, the reader and some other functionality are missing and am
handling only handling the first use case (see wiki).

Following is a diff against mainline, any comments or suggestions would
be appreciated.  Am sending the code now so I can see if anyone spots
any bad design issues that are better fixed at an early stage.

diff -r 1590345d2328 -r bd1566e9eea2 src/Makefile.am
--- a/src/Makefile.am   Sat Jul 11 13:50:52 2009 +0200
+++ b/src/Makefile.am   Mon Jul 13 12:11:03 2009 -0300
@@ -43,13 +43,13 @@
               css.l css-url.c css-tokens.h \
               ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \
               http.c init.c log.c main.c netrc.c progress.c ptimer.c     \
-              recur.c res.c retr.c snprintf.c spider.c url.c             \
+              recur.c res.c retr.c sidb.c snprintf.c spider.c url.c      \
               utils.c $(IRI_OBJ)                                         \
               css-url.h connect.h convert.h cookies.h                    \
               ftp.h gen-md5.h hash.h host.h html-parse.h html-url.h      \
               http.h http-ntlm.h init.h log.h mswindows.h netrc.h        \
               options.h progress.h ptimer.h recur.h res.h retr.h         \
-              spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h
+              sidb.h spider.h ssl.h sysdep.h url.h utils.h wget.h iri.h
 nodist_wget_SOURCES = build_info.c version.c
 EXTRA_wget_SOURCES = mswindows.c iri.c
 LDADD = $(LIBOBJS) ../lib/libgnu.a @MD5_LDADD@
diff -r 1590345d2328 -r bd1566e9eea2 src/sidb.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/src/sidb.c        Mon Jul 13 12:11:03 2009 -0300
@@ -0,0 +1,287 @@
+/* Session Info DataBase (SIDB).
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+This file is part of GNU Wget.
+
+GNU Wget is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+GNU Wget is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Wget.  If not, see <http://www.gnu.org/licenses/>.
+
+Additional permission under GNU GPL version 3 section 7
+
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work.  */
+
+/* FIXME: Uris should be escaped, maybe using url_escape(). */
+
+#include "wget.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hash.h"
+#include "utils.h"
+#include "url.h"
+#include "sidb.h"
+
+#ifdef TESTING
+#include "test.h"
+#endif
+
+/* The indent sequence */
+static char *sidb_indent = "  ";
+
+sidb_writer *
+sidb_writer_start(const char *filename)
+{
+  sidb_writer *w = xnew(sidb_writer);
+  if (w == NULL)
+    {
+      goto err;
+    }
+
+  w->fname = xstrdup(filename);
+  if (w->fname == NULL)
+    {
+      goto err;
+    }
+
+  w->we_list = NULL;
+  return w;
+
+ err:
+  xfree_null(w);
+  return NULL;
+}
+
+/* write data to disk */
+void
+sidb_writer_flush(sidb_writer *w)
+{
+  sidb_entry *we;
+  sidb_redirect *r;
+  FILE *fp;
+  int i;
+
+  fp = fopen(w->fname, "w");
+  if (fp == NULL)
+    {
+      /* FIXME: Deal with error, think just not doing anything and
+        recording the error for later retrieval should be the best
+        thing. */
+      return;
+    }
+
+  for (we = w->we_list; we != NULL; we = we->next)
+    {
+      fprintf(fp, "uri: %s\n", we->uri);
+      
+      /* write the redirects */
+      i = 1;
+      for (r = we->redirects; r != NULL; r = r->next)
+       {
+         fprintf(fp, "%sredirect-%d: %s %d\n",
+                 sidb_indent, i, r->uri, r->http_status_code);
+         i++;
+       }
+      
+      fprintf(fp, "%slocal_path: %s\n", sidb_indent, we->local_path);
+    }
+
+  fclose(fp);
+}
+
+void
+sidb_writer_end(sidb_writer *w) {
+  sidb_entry *cur, *next;
+
+  sidb_writer_flush(w);
+
+  for (cur = w->we_list; cur != NULL; cur = next)
+    {
+      next = cur->next;
+      xfree_null(cur->uri);
+      xfree_null(cur->local_path);
+      xfree_null(cur->redirects);
+      xfree(cur);
+    }
+
+  xfree(w->fname);
+  xfree(w);
+}
+
+sidb_entry *
+sidb_entry_new(sidb_writer *w, const char *uri)
+{
+  sidb_entry *we;
+  sidb_redirect *r;
+
+  /* if we have the uri stored on the database return the
+     sidb_entry containing it, otherwise create a new entry. */
+  for (we = w->we_list; we != NULL; we = we->next)
+    {
+      if (are_urls_equal(we->uri, uri))
+       return we;
+    }
+
+  we = xnew0(sidb_entry);
+  if (we == NULL)
+    {
+      goto err;
+    }
+  we->uri = xstrdup(uri);
+  if (we->uri == NULL)
+    {
+      goto err;
+    }
+
+  /* head insert it on the sidb_entry structure */
+  we->next = w->we_list;
+  w->we_list = we;
+
+  return we;
+
+ err:                          /* in case of error just don't add
+                                  anything to the db */
+  xfree_null(we->uri);
+  return NULL;
+}
+
+void
+sidb_entry_redirect(sidb_entry *we, const char *uri,
+                          int redirect_http_status_code)
+{
+  sidb_redirect *r = xnew(sidb_redirect);
+
+  r->uri = xstrdup(uri);
+  r->http_status_code = redirect_http_status_code;
+
+  r->next = we->redirects;
+  we->redirects = r;
+}
+
+void
+sidb_entry_local_path(sidb_entry *we,
+                     const char *fname)
+{
+  char *lp = xstrdup(fname);
+
+  /* if local_path exists than just overwrite it.
+     Don't touch it in case of errors */
+  if (lp != NULL)
+    {
+      xfree_null(we->local_path);
+      we->local_path = xstrdup(fname);
+    }
+}
+
+void
+sidb_entry_remove(sidb_entry *we)
+{
+  /* remove a entry, maybe if we were doubly linked this job would be
+     easier */
+}
+
+sidb_entry *
+sidb_lookup_uri(sidb_reader *sr, const char *uri)
+{
+  sidb_entry *e;
+  sidb_redirect *r;
+
+  for (e = sr->we_list; e != NULL; e = e->next)
+    {
+      if (are_urls_equal(e->uri, uri))
+       return e;
+
+      /* check redirects */
+      for (r = e->redirects; r != NULL; r = r->next)
+       {
+         if (are_urls_equal(r->uri, uri))
+           return e;
+       }
+    }
+
+  return NULL;                 /* uri not found */
+}
+
+const char *
+sidb_entry_get_local_path(sidb_entry *e)
+{
+  return e ? e->local_path : NULL;
+}
+
+#ifdef TESTING
+
+const char *
+test_sidb_lookup_uri(void)
+{
+  sidb_writer *w = sidb_writer_start("/tmp/sidb");
+  sidb_entry *e1, *e2, *e3;
+  int i;
+  struct {
+    char *uri;
+    char *expected_local_path;
+    bool result;
+  } test_array[] = {
+    {"http://some_redirect3";, "/etc/path2", true},
+    {"http://ex1.org";, "/etc/path", true},
+    {"http://nonexistent.com";, "/blah", false},
+    {"http://examplex.gov";, "/home/user/sub/dir/file", true},
+  };
+
+  e1 = sidb_entry_new(w, "http://ex1.org";);
+  sidb_entry_local_path(e1, "/etc/path");
+
+  e2 = sidb_entry_new(w, "http://www.another.com";);
+  sidb_entry_local_path(e2, "/etc/path2");
+  sidb_entry_redirect(e2, "http://some_redirect";, 500);
+  sidb_entry_redirect(e2, "http://some_redirect2";, 501);
+  sidb_entry_redirect(e2, "http://some_redirect3";, 503);
+
+  e3 = sidb_entry_new(w, "http://examplex.gov";);
+  sidb_entry_local_path(e3, "/home/user/sub/dir/file");
+
+  for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
+    {
+      bool res;
+      sidb_entry *se;
+
+      se = sidb_lookup_uri(w, test_array[i].uri);
+      
+      if (se == NULL)          /* se == NULL means the uri is not in
+                                  the database */
+       {
+         mu_assert("test_sidb_lookup_uri: wrong result, not in db",
+                   test_array[i].result == false);
+       }
+      else
+       {
+         res = strcmp(test_array[i].expected_local_path,
+                      se->local_path) == 0 ? true : false;
+
+         mu_assert("test_sidb_lookup_uri: wrong result",
+                   test_array[i].result == res);
+       }
+    }
+
+  sidb_writer_end(w);
+
+  return NULL;
+}
+#endif /* TESTING */
diff -r 1590345d2328 -r bd1566e9eea2 src/sidb.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/src/sidb.h        Mon Jul 13 12:11:03 2009 -0300
@@ -0,0 +1,85 @@
+/* Session Info DataBase (SIDB).
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+This file is part of GNU Wget.
+
+GNU Wget is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or
+(at your option) any later version.
+
+GNU Wget is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Wget.  If not, see <http://www.gnu.org/licenses/>.
+
+Additional permission under GNU GPL version 3 section 7
+
+If you modify this program, or any covered work, by linking or
+combining it with the OpenSSL project's OpenSSL library (or a
+modified version of that library), containing parts covered by the
+terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
+grants you additional permission to convey the resulting work.
+Corresponding Source for a non-source form of such a combination
+shall include the source code for the parts of OpenSSL used as well
+as that of the covered work.  */
+
+#ifndef SIDB_H
+#define SIDB_H
+
+typedef struct sidb_redirect {
+  char *uri;
+  int http_status_code;
+  struct sidb_redirect *next;
+} sidb_redirect;
+
+typedef struct sidb_entry {
+  char *uri;
+  char *local_path;
+  sidb_redirect *redirects;
+  struct sidb_entry *next;
+} sidb_entry;
+
+typedef struct sidb_writer {
+  char *fname;
+  struct sidb_entry *we_list;
+} sidb_writer;
+
+typedef struct sidb_writer sidb_reader;
+
+/** SIDB Writer Facilities **/
+
+sidb_writer *
+sidb_writer_start(const char *filename);
+
+void
+sidb_writer_end(sidb_writer *w);
+
+sidb_entry *
+sidb_entry_new(sidb_writer *w, const char *uri);
+
+void
+sidb_entry_redirect(sidb_entry *rw, const char *uri,
+                   int redirect_http_status_code);
+
+void
+sidb_entry_local_path(sidb_entry *rw,
+                     const char *fname);
+
+void
+sidb_entry_remove(sidb_entry *rw);
+
+/** SIDB Reader Facilities **/
+/* sidb_reader * */
+/* sidb_read(const char *filename); */
+
+sidb_entry *
+sidb_lookup_uri(sidb_reader *r, const char *uri);
+
+const char *
+sidb_entry_get_local_path(sidb_entry *e);
+
+#endif /* SIDB_H */
diff -r 1590345d2328 -r bd1566e9eea2 src/test.c
--- a/src/test.c        Sat Jul 11 13:50:52 2009 +0200
+++ b/src/test.c        Mon Jul 13 12:11:03 2009 -0300
@@ -44,6 +44,7 @@
 const char *test_append_uri_pathel();
 const char *test_are_urls_equal();
 const char *test_is_robots_txt_url();
+const char *test_sidb_lookup_uri();
 
 int tests_run;
 
@@ -59,6 +60,7 @@
   mu_run_test (test_append_uri_pathel);
   mu_run_test (test_are_urls_equal);
   mu_run_test (test_is_robots_txt_url);
+  mu_run_test (test_sidb_lookup_uri);
   
   return NULL;
 }
diff -r 1590345d2328 -r bd1566e9eea2 ylwrap
--- a/ylwrap    Sat Jul 11 13:50:52 2009 +0200
+++ b/ylwrap    Mon Jul 13 12:11:03 2009 -0300
@@ -1,7 +1,7 @@
 #! /bin/sh
 # ylwrap - wrapper for lex/yacc invocations.
 
-scriptversion=2005-05-14.22
+scriptversion=2009-07-08.15
 
 # Copyright (C) 1996, 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005
 #   Free Software Foundation, Inc.
-- 
Rodrigo S. Wanderley <address@hidden>


reply via email to

[Prev in Thread] Current Thread [Next in Thread]