Re: [Bug-wget] SIDB

From: Rodrigo S Wanderley
Subject: Re: [Bug-wget] SIDB
Date: Mon, 20 Jul 2009 21:35:08 -0300
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/23.1.50 (gnu/linux)

I'm having to do some extra work to do here on the farm, but today I
managed to spend some time thinking on one solution that I think is the
simplest one that solves the problems Micah commented on the last
e-mail.  Following is the sidb.c file, early in the file I put a comment
explaining the idea I had today.  Any critics are welcome.

I didn't touch the "reader" side of the database, want to have the
writer stabilished first.

/* Session Info DataBase (SIDB).
   Copyright (C) 2009 Free Software Foundation, Inc.

This file is part of GNU Wget.

GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
 (at your option) any later version.

GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Wget.  If not, see <http://www.gnu.org/licenses/>.

Additional permission under GNU GPL version 3 section 7

If you modify this program, or any covered work, by linking or
combining it with the OpenSSL project's OpenSSL library (or a
modified version of that library), containing parts covered by the
terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
grants you additional permission to convey the resulting work.
Corresponding Source for a non-source form of such a combination
shall include the source code for the parts of OpenSSL used as well
as that of the covered work.  */

/* FIXME: Uris should be escaped, maybe using url_escape(). */

#include "wget.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "hash.h"
#include "utils.h"
#include "url.h"
#include "sidb.h"

#ifdef TESTING
#include "test.h"

/* Entries in the database have the form:

   <number>-type args

   Where <number> is a index identifying a given writer entry, type
   identifies the field (e.g. url, local_path, redirect, ...) and
   args are the arguments the "type" expects.

   A line that is further down the file has higher priority, that is,
   if we have two "1-local_path" entries in the database, the one
   with higher offset will be the correct entry.

   Later we can make functions to put order on the database, by
   removing lines that are not useful and putting fields with the
   same idx together. */

/* number of sidb_writer_entry's and allocced initially */

sidb_writer *
sidb_writer_start(const char *filename)
  sidb_writer *w = xnew(sidb_writer);

  /* truncate file if it already exists */
  w->fp = fopen(filename, "w");
  if (! w->fp)
      return NULL;
  /* ensuring that buffers get flushed after a line is written */
  setvbuf(w->fp, NULL, _IOLBF, 0);

  w->entries = xnew_array(struct sidb_writer_entry,
  w->ecount = 0;
  w->ecapacity = SIDB_WE_INITIAL_SIZE;

  return w;

sidb_writer_end(sidb_writer *w)

sidb_writer_entry *
sidb_writer_entry_new(sidb_writer *w, const char *uri)
  sidb_writer_entry *we;
  if (w->ecount >= w->ecapacity)
      w->ecapacity *= 2;
      w->entries = xrealloc(w->entries,
                            w->ecapacity * sizeof(w->entries[0]));

  we = &w->entries[w->ecount++];

  we->idx = w->ecount;
  we->rcount = 0;
  we->writer = w;

  fprintf(w->fp, "%d-uri %s\n", we->idx, uri);

  return we;

sidb_writer_entry_redirect(sidb_writer_entry *we, const char *uri,
                           int redirect_http_status_code)
  fprintf(we->writer->fp, "%d-redirect-%d %s %d\n",
          we->idx, ++we->rcount, uri,

sidb_writer_entry_local_path(sidb_writer_entry *we,
                             const char *fname)
  fprintf(we->writer->fp, "%d-local_path %s\n",
          we->idx, fname);

/* sidb_reader * */
/* sidb_read(const char *filename) */
/* {} */

/* sidb_entry * */
/* sidb_lookup_uri(sidb_reader *sr, const char *uri) */
/* { */
/*   sidb_entry *e; */
/*   sidb_redirect *r; */

/*   for (e = sr->we_list; e != NULL; e = e->next) */
/*     { */
/*       if (are_urls_equal(e->uri, uri)) */
/*      return e; */

/*       /\* check redirects *\/ */
/*       for (r = e->redirects; r != NULL; r = r->next) */
/*      { */
/*        if (are_urls_equal(r->uri, uri)) */
/*          return e; */
/*      } */
/*     } */

/*   return NULL;                       /\* uri not found *\/ */
/* } */

/* const char * */
/* sidb_entry_get_local_path(sidb_entry *e) */
/* { */
/*   return e ? e->local_path : NULL; */
/* } */

#ifdef TESTING

const char *
  sidb_writer *w = sidb_writer_start("/tmp/db.sidb");
  sidb_writer_entry *wes[5];

  wes[0] = sidb_writer_entry_new(w, "http://www.example.com";);
  wes[1] = sidb_writer_entry_new(w, "http://bogus.url";);
  wes[2] = sidb_writer_entry_new(w, "ftp://a.ftp.site/url/ex";);
  wes[3] = sidb_writer_entry_new(w, "ftp://another.ftp";);
  wes[4] = sidb_writer_entry_new(w, "http://the.fifth";);

  sidb_writer_entry_local_path(wes[0], "/tmp/test");
  sidb_writer_entry_local_path(wes[1], "/tmp/test2");
  sidb_writer_entry_local_path(wes[2], "/tmp/test3");
  sidb_writer_entry_local_path(wes[3], "/tmp/test4");
  sidb_writer_entry_local_path(wes[4], "/tmp/test5");

  sidb_writer_entry_redirect(wes[0], "http://www.a.redirect";, 501);

  return NULL;
#endif  /* TESTING */

Rodrigo S. Wanderley <address@hidden>
  -- Blog: http://rsw.digi.com.br

