gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r8902 - in gnunet: . src/fs src/include


From: gnunet
Subject: [GNUnet-SVN] r8902 - in gnunet: . src/fs src/include
Date: Sun, 30 Aug 2009 15:07:10 -0600

Author: grothoff
Date: 2009-08-30 15:07:10 -0600 (Sun, 30 Aug 2009)
New Revision: 8902

Modified:
   gnunet/TODO
   gnunet/src/fs/fs.h
   gnunet/src/fs/fs_publish.c
   gnunet/src/include/gnunet_datastore_service.h
   gnunet/src/include/gnunet_protocols.h
Log:
adding indexing support

Modified: gnunet/TODO
===================================================================
--- gnunet/TODO 2009-08-30 21:06:23 UTC (rev 8901)
+++ gnunet/TODO 2009-08-30 21:07:10 UTC (rev 8902)
@@ -37,11 +37,10 @@
   - implement testcases
 * FS (anonymous FS only)
   - design network structs (CS)
-    + list-indexed, index, unindex
+    + list-indexed, unindex
     + search/download, response
   - implement basic FS library
     + sharing API
-      ~ publish (indexing)
       ~ unindex & list indexed!!! (need publish to be done)
       ~ search (need publish to be done)
       ~ download (need publish/search to be done)
@@ -68,6 +67,7 @@
     + location URIs (publish, search, download)
     + persistence support (publish, unindex, search, download)
     + datastore reservation (publishing)
+    + indexing: index-failure-cleanup
   - implement adv. testcases 
     + insert: sblocks, loc uris
     + download: loc uris

Modified: gnunet/src/fs/fs.h
===================================================================
--- gnunet/src/fs/fs.h  2009-08-30 21:06:23 UTC (rev 8901)
+++ gnunet/src/fs/fs.h  2009-08-30 21:07:10 UTC (rev 8902)
@@ -289,6 +289,21 @@
       void *reader_cls;
 
       /**
+       * Name of the file (must be an absolute path).
+       * Only required for indexing.  FIXME: not yet
+       * initialized!
+       */
+      char *filename;
+
+      /**
+       * If this file is being indexed, this value
+       * is set to the hash over the entire file
+       * (when the indexing process is started). 
+       * Otherwise this field is not used.
+       */
+      GNUNET_HashCode file_id;
+
+      /**
        * Size of the file (in bytes).
        */
       uint64_t file_size;
@@ -430,6 +445,13 @@
   GNUNET_SCHEDULER_TaskIdentifier upload_task;
 
   /**
+   * Our own client handle for the FS service;
+   * only briefly used when we start to index a
+   * file, otherwise NULL.
+   */
+  struct GNUNET_CLIENT_Connection *client;
+
+  /**
    * Typically GNUNET_NO.  Set to GNUNET_YES if
    * "upload_task" is GNUNET_SCHEDULER_NO_TASK
    * and we're waiting for a response from the
@@ -507,6 +529,29 @@
 
 
 /**
+ * @brief index block (indexing a DBlock that 
+ *        can be obtained directly from reading
+ *        the plaintext file)
+ */
+struct OnDemandBlock
+{
+  /**
+   * Hash code of the entire content of the
+   * file that was indexed (used to uniquely
+   * identify the plaintext file).
+   */
+  GNUNET_HashCode file_id;
+
+  /**
+   * At which offset should we be able to find
+   * this on-demand encoded block?
+   */
+  uint64_t offset;
+
+};
+
+
+/**
  * @brief keyword block (advertising data under a keyword)
  */
 struct KBlock
@@ -571,9 +616,58 @@
 };
 
 
+/**
+ * Message sent from a GNUnet (fs) publishing
+ * activity to the gnunet-fs-service to 
+ * initiate indexing of a file.  The service
+ * is supposed to check if the specified file
+ * is available and has the same cryptographic
+ * hash.  It should then respond with either
+ * a confirmation or a denial.
+ *
+ * On OSes where this works, it is considered
+ * acceptable if the service only checks that
+ * the path, device and inode match (it can
+ * then be assumed that the hash will also match
+ * without actually computing it; this is an
+ * optimization that should be safe given that
+ * the client is not our adversary).
+ */
 struct IndexStartMessage
 {
 
+  /**
+   * Message type will be 
+   * GNUNET_MESSAGE_TYPE_FS_INDEX_START.
+   */
+  struct GNUNET_MessageHeader header;
+
+  /**
+   * ID of device containing the file, as seen by the client.  This
+   * device ID is obtained using a call like "statvfs" (and converting
+   * the "f_fsid" field to a 32-bit big-endian number).  Use 0 if the
+   * OS does not support this, in which case the service must do a
+   * full hash recomputation.
+   */
+  uint32_t device;
+  
+  /**
+   * Inode of the file on the given device, as seen by the client
+   * ("st_ino" field from "struct stat").  Use 0 if the OS does not
+   * support this, in which case the service must do a full hash
+   * recomputation.
+   */
+  uint64_t inode;
+
+  /**
+   * Hash of the file that we would like to index.
+   */
+  GNUNET_HashCode file_id;
+
+  /* this is followed by a 0-terminated
+     filename of a file with the hash
+     "file_id" as seen by the client */
+
 };
 
 

Modified: gnunet/src/fs/fs_publish.c
===================================================================
--- gnunet/src/fs/fs_publish.c  2009-08-30 21:06:23 UTC (rev 8901)
+++ gnunet/src/fs/fs_publish.c  2009-08-30 21:07:10 UTC (rev 8902)
@@ -26,7 +26,7 @@
  * @author Christian Grothoff
  *
  * TODO:
- * - indexing support
+ * - indexing cleanup: unindex on failure (can wait)
  * - code-sharing with unindex (can wait)
  * - persistence support (can wait)
  * - datastore reservation support (optimization)
@@ -52,6 +52,14 @@
  */
 #define MAX_SBLOCK_SIZE 60000
 
+/**
+ * Blocksize to use when hashing files
+ * for indexing (blocksize for IO, not for
+ * the DBlocks).  Larger blocksizes can
+ * be more efficient but will be more disruptive
+ * as far as the scheduler is concerned.
+ */
+#define HASHING_BLOCKSIZE (1024 * 1024)
 
 /**
  * Main function that performs the upload.
@@ -471,6 +479,7 @@
   void *raw_data;
   char *dd;
   struct PutContCtx * dpc_cls;
+  struct OnDemandBlock odb;
 
   // FIXME: figure out how to share this code
   // with unindex!
@@ -593,8 +602,6 @@
                             enc);
   // NOTE: this block below is all that really differs
   // between publish/unindex!  Parameterize & move this code!
-  // FIXME: something around here would need to change
-  // for indexing!
   if (NULL == sc->dsh)
     {
       sc->upload_task
@@ -614,20 +621,42 @@
       dpc_cls->cont = &do_upload;
       dpc_cls->cont_cls = sc;
       dpc_cls->p = p;
-      GNUNET_DATASTORE_put (sc->dsh,
-                           sc->rid,
-                           &mychk->query,
-                           pt_size,
-                           enc,
-                           (p->current_depth == p->chk_tree_depth) 
-                           ? GNUNET_DATASTORE_BLOCKTYPE_DBLOCK 
-                           : GNUNET_DATASTORE_BLOCKTYPE_IBLOCK,
-                           p->priority,
-                           p->anonymity,
-                           p->expirationTime,
-                           GNUNET_CONSTANTS_SERVICE_TIMEOUT,
-                           &ds_put_cont,
-                           dpc_cls);
+      if ( (p->is_directory) &&
+          (p->data.file.do_index) &&
+          (p->current_depth == p->chk_tree_depth) )
+       {
+         odb.offset = p->publish_offset;
+         odb.file_id = p->data.file.file_id;
+         GNUNET_DATASTORE_put (sc->dsh,
+                               sc->rid,
+                               &mychk->query,
+                               sizeof(struct OnDemandBlock),
+                               &odb,
+                               GNUNET_DATASTORE_BLOCKTYPE_ONDEMAND,
+                               p->priority,
+                               p->anonymity,
+                               p->expirationTime,
+                               GNUNET_CONSTANTS_SERVICE_TIMEOUT,
+                               &ds_put_cont,
+                               dpc_cls);         
+       }
+      else
+       {
+         GNUNET_DATASTORE_put (sc->dsh,
+                               sc->rid,
+                               &mychk->query,
+                               pt_size,
+                               enc,
+                               (p->current_depth == p->chk_tree_depth) 
+                               ? GNUNET_DATASTORE_BLOCKTYPE_DBLOCK 
+                               : GNUNET_DATASTORE_BLOCKTYPE_IBLOCK,
+                               p->priority,
+                               p->anonymity,
+                               p->expirationTime,
+                               GNUNET_CONSTANTS_SERVICE_TIMEOUT,
+                               &ds_put_cont,
+                               dpc_cls);
+       }
     }
   if (p->current_depth == p->chk_tree_depth)
     {
@@ -668,7 +697,154 @@
 }
 
 
+
+
 /**
+ * Process the response (or lack thereof) from
+ * the "fs" service to our 'start index' request.
+ *
+ * @param cls closure (of type "struct GNUNET_FS_PublishContext*"_)
+ * @param msg the response we got
+ */
+static void
+process_index_start_response (void *cls,
+                             const struct GNUNET_MessageHeader *msg)
+{
+  struct GNUNET_FS_PublishContext *sc = cls;
+  struct GNUNET_FS_FileInformation *p;
+  const char *emsg;
+  uint16_t msize;
+
+  GNUNET_CLIENT_disconnect (sc->client);
+  sc->client = NULL;
+  p = sc->fi_pos;
+  if (msg == NULL)
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+                 _("Can not index file `%s': %s.  Will try to insert 
instead.\n"),
+                 p->data.file.filename,
+                 _("timeout on index-start request to `fs' service"));
+      p->data.file.do_index = GNUNET_NO;
+      publish_content (sc, p);
+      return;
+    }
+  if (ntohs (msg->type) != GNUNET_MESSAGE_TYPE_FS_INDEX_START_OK)
+    {
+      msize = ntohs (msg->size);
+      emsg = (const char *) &msg[1];
+      if ( (msize <= sizeof (struct GNUNET_MessageHeader)) ||
+          (emsg[msize - sizeof(struct GNUNET_MessageHeader) - 1] != '\0') )
+       emsg = gettext_noop ("unknown error");
+      GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+                 _("Can not index file `%s': %s.  Will try to insert 
instead.\n"),
+                 p->data.file.filename,
+                 gettext (emsg));
+      p->data.file.do_index = GNUNET_NO;
+      publish_content (sc, p);
+      return;
+    }
+  /* success! continue with indexing */
+  publish_content (sc, p);
+}
+
+
+#if LINUX
+#include <sys/statvfs.h>
+#endif
+
+/**
+ * Function called once the hash computation over an
+ * indexed file has completed.
+ *
+ * @param cls closure, our publishing context
+ * @param res resulting hash, NULL on error
+ */
+static void 
+hash_for_index_cb (void *cls,
+                  const GNUNET_HashCode *
+                  res)
+{
+  struct GNUNET_FS_PublishContext *sc = cls;
+  struct GNUNET_FS_FileInformation *p;
+  struct IndexStartMessage *ism;
+  size_t slen;
+  struct GNUNET_CLIENT_Connection *client;
+#if LINUX
+  struct stat sbuf;
+  struct statvfs fbuf;
+#endif
+
+  p = sc->fi_pos;
+  if (NULL == res) 
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+                 _("Can not index file `%s': %s.  Will try to insert 
instead.\n"),
+                 p->data.file.filename,
+                 _("failed to compute hash"));
+      p->data.file.do_index = GNUNET_NO;
+      publish_content (sc, p);
+      return;
+    }
+  slen = strlen (p->data.file.filename) + 1;
+  if (slen > GNUNET_SERVER_MAX_MESSAGE_SIZE - sizeof(struct IndexStartMessage))
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+                 _("Can not index file `%s': %s.  Will try to insert 
instead.\n"),
+                 p->data.file.filename,
+                 _("filename too long"));
+      p->data.file.do_index = GNUNET_NO;
+      publish_content (sc, p);
+      return;
+    }
+  client = GNUNET_CLIENT_connect (sc->h->sched,
+                                 "fs",
+                                 sc->h->cfg);
+  if (NULL == client)
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+                 _("Can not index file `%s': %s.  Will try to insert 
instead.\n"),
+                 p->data.file.filename,
+                 _("could not connect to `fs' service"));
+      p->data.file.do_index = GNUNET_NO;
+      publish_content (sc, p);
+      return;
+    }
+  p->data.file.file_id = *res;
+  ism = GNUNET_malloc (sizeof(struct IndexStartMessage) +
+                      slen);
+  ism->header.size = htons(sizeof(struct IndexStartMessage) +
+                          slen);
+  ism->header.type = htons(GNUNET_MESSAGE_TYPE_FS_INDEX_START);
+  /* FIXME: activate this on other OSes that
+     support it (or something very similar; make
+     sure to also adjust corresponding code
+     on the service-side) */
+  /* FIXME: the block below should probably be
+     abstracted into a function in the DISK API */
+#if LINUX
+  if ( (0 == stat(p->data.file.filename,
+                 &sbuf)) &&
+       (0 == statvfs (p->data.file.filename,
+                     &fbuf) ) )
+    {
+      ism->device = htonl ((uint32_t) fbuf.f_fsid);
+      ism->inode = GNUNET_htonll( (uint64_t) sbuf.st_ino);
+    }
+#endif
+  memcpy (&ism[1],
+         p->data.file.filename,
+         slen);
+  sc->client = client;
+  GNUNET_CLIENT_transmit_and_get_response (client,
+                                          &ism->header,
+                                          GNUNET_TIME_UNIT_FOREVER_REL,
+                                          &process_index_start_response,
+                                          sc);
+  GNUNET_free (ism);
+}
+
+
+/**
  * Main function that performs the upload.
  * @param cls "struct GNUNET_FS_PublishContext" identifies the upload
  * @param tc task context
@@ -744,9 +920,23 @@
   if ( (!p->is_directory) &&
        (p->data.file.do_index) )
     {
-      // FIXME: need to pre-compute hash over
-      // the entire file and ask FS to prepare
-      // for indexing!
+      if (NULL == p->data.file.filename)
+       {
+         p->data.file.do_index = GNUNET_NO;
+         GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+                     _("Can not index file `%s': %s.  Will try to insert 
instead.\n"),
+                     "<no-name>",
+                     _("needs to be an actual file"));
+         publish_content (sc, p);
+         return;
+       }      
+      GNUNET_CRYPTO_hash_file (sc->h->sched,
+                              GNUNET_SCHEDULER_PRIORITY_IDLE,
+                              GNUNET_NO,
+                              p->data.file.filename,
+                              HASHING_BLOCKSIZE,
+                              &hash_for_index_cb,
+                              sc);
       return;
     }
   publish_content (sc, p);

Modified: gnunet/src/include/gnunet_datastore_service.h
===================================================================
--- gnunet/src/include/gnunet_datastore_service.h       2009-08-30 21:06:23 UTC 
(rev 8901)
+++ gnunet/src/include/gnunet_datastore_service.h       2009-08-30 21:07:10 UTC 
(rev 8902)
@@ -46,7 +46,8 @@
 #define GNUNET_DATASTORE_BLOCKTYPE_IBLOCK 2
 #define GNUNET_DATASTORE_BLOCKTYPE_KBLOCK 3
 #define GNUNET_DATASTORE_BLOCKTYPE_SBLOCK 4
-#define GNUNET_DATASTORE_BLOCKTYPE_SKBLOCK 5
+#define GNUNET_DATASTORE_BLOCKTYPE_ONDEMAND 5
+#define GNUNET_DATASTORE_BLOCKTYPE_SKBLOCK 6 /* not yet used */
 
 /**
  * Handle to the datastore service.

Modified: gnunet/src/include/gnunet_protocols.h
===================================================================
--- gnunet/src/include/gnunet_protocols.h       2009-08-30 21:06:23 UTC (rev 
8901)
+++ gnunet/src/include/gnunet_protocols.h       2009-08-30 21:07:10 UTC (rev 
8902)
@@ -367,6 +367,24 @@
  */
 #define GNUNET_MESSAGE_TYPE_DATASTORE_DROP 102
 
+
+/**
+ * Message sent by fs client to start indexing.
+ */
+#define GNUNET_MESSAGE_TYPE_FS_INDEX_START 128
+
+/**
+ * Affirmative response to a request for start indexing.
+ */
+#define GNUNET_MESSAGE_TYPE_FS_INDEX_START_OK 129
+
+
+/**
+ * Response to a request for start indexing that
+ * refuses.
+ */
+#define GNUNET_MESSAGE_TYPE_FS_INDEX_START_FAILED 130
+
 /*
   TODO:
   - DV





reply via email to

[Prev in Thread] Current Thread [Next in Thread]