gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r18862 - gnunet-gtk/src/fs


From: gnunet
Subject: [GNUnet-SVN] r18862 - gnunet-gtk/src/fs
Date: Sat, 31 Dec 2011 00:52:54 +0100

Author: grothoff
Date: 2011-12-31 00:52:54 +0100 (Sat, 31 Dec 2011)
New Revision: 18862

Modified:
   gnunet-gtk/src/fs/gnunet-fs-gtk-main_window_file_publish.c
Log:
-LRN: applying patch 4 from #2046 -  Count and propagate keywords instead of 
metadata

Modified: gnunet-gtk/src/fs/gnunet-fs-gtk-main_window_file_publish.c
===================================================================
--- gnunet-gtk/src/fs/gnunet-fs-gtk-main_window_file_publish.c  2011-12-30 
23:45:36 UTC (rev 18861)
+++ gnunet-gtk/src/fs/gnunet-fs-gtk-main_window_file_publish.c  2011-12-30 
23:52:54 UTC (rev 18862)
@@ -329,6 +329,11 @@
   struct GNUNET_CONTAINER_MetaData *meta;
 
   /**
+   * Keywords for the file (derived from metadata).
+   */
+  struct GNUNET_FS_Uri *ksk_uri;
+
+  /**
    * Iterator for the entry.
    */
   GtkTreeIter iter;
@@ -336,10 +341,10 @@
 
 
 /**
- * Entry for each unique meta data entry to track how often
+ * Entry for each unique keyword to track how often
  * it occured.  Contains the keyword and the counter.
  */
-struct MetaCounter
+struct KeywordCounter
 {
 
   /**
@@ -348,21 +353,6 @@
   const char *value;
 
   /**
-   * Mimetype of the value.
-   */
-  const char *value_mimetype;
-
-  /**
-   * Type of the value.
-   */
-  enum EXTRACTOR_MetaType type;
-
-  /**
-   * Format of the value.
-   */
-  enum EXTRACTOR_MetaFormat format;
-
-  /**
    * How many files have meta entries matching this value?
    * (type and format do not have to match).
    */
@@ -388,11 +378,11 @@
   GtkTreeStore *ts;
 
   /**
-   * Map from the hash over the meta value to an 'struct MetaCounter'
-   * counter that says how often this value was
+   * Map from the hash over the keyword to an 'struct KeywordCounter'
+   * counter that says how often this keyword was
    * encountered in the current directory.
    */
-  struct GNUNET_CONTAINER_MultiHashMap *metacounter;
+  struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
 
   /**
    * Map from the hash of a filename in the current directory
@@ -401,10 +391,10 @@
   struct GNUNET_CONTAINER_MultiHashMap *metamap;
 
   /**
-   * Metadata to exclude from using for KSK since it'll be associated
+   * Keywords to exclude from using for KSK since they'll be associated
    * with the parent as well.  NULL for nothing blocked.
    */
-  struct GNUNET_CONTAINER_MetaData *no_ksk;
+  struct GNUNET_FS_Uri *exclude_ksk;
 
   /**
    * Block options to use.
@@ -424,71 +414,45 @@
 
 
 /**
- * Add the given meta data item to the
- * meta data statistics tracker.
+ * Add the given keyword to the
+ * keyword statistics tracker.
  *
  * @param cls closure (user-defined)
- * @param plugin_name name of the plugin that produced this value;
- *        special values can be used (i.e. '<zlib>' for zlib being
- *        used in the main libextractor library and yielding
- *        meta data).
- * @param type libextractor-type describing the meta data
- * @param format basic format information about data
- * @param data_mime_type mime-type of data (not of the original file);
- *        can be NULL (if mime-type is not known)
- * @param data actual meta-data found
- * @param data_len number of bytes in data
- * @return 0 to continue extracting, 1 to abort
+ * @param keyword the keyword to count
+ * @param is_mandatory ignored
+ * @return always GNUNET_OK
  */
 static int
-add_to_meta_counter (void *cls, const char *plugin_name,
-                     enum EXTRACTOR_MetaType type,
-                     enum EXTRACTOR_MetaFormat format,
-                     const char *data_mime_type, const char *data,
-                     size_t data_len)
+add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
 {
   struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
-  struct MetaCounter *cnt;
+  struct KeywordCounter *cnt;
   GNUNET_HashCode hc;
-  size_t mlen;
-  size_t dlen;
+  size_t klen;
 
-  if ((format != EXTRACTOR_METAFORMAT_UTF8) &&
-      (format != EXTRACTOR_METAFORMAT_C_STRING))
-    return 0;
-  dlen = strlen (data) + 1;
-  GNUNET_CRYPTO_hash (data, dlen - 1, &hc);
+  klen = strlen (keyword) + 1;
+  GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
   cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
   if (cnt == NULL)
   {
-    mlen = strlen (data_mime_type) + 1;
-    cnt = GNUNET_malloc (sizeof (struct MetaCounter) + dlen + mlen);
+    cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen);
     cnt->count = 1;
     cnt->value = (const char *) &cnt[1];
-    cnt->value_mimetype = &cnt->value[dlen];
-    memcpy (&cnt[1], data, dlen);
-    memcpy ((char *) cnt->value_mimetype, data_mime_type, mlen);
-    cnt->type = type;
-    cnt->format = format;
+    memcpy (&cnt[1], keyword, klen);
     GNUNET_CONTAINER_multihashmap_put (mcm, &hc, cnt,
                                        
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);
-
   }
   else
   {
     cnt->count++;
-    if (cnt->format == EXTRACTOR_METAFORMAT_C_STRING)
-      cnt->format = format;     /* possibly improve to UTF8 */
-    if (cnt->type == EXTRACTOR_METATYPE_UNKNOWN)
-      cnt->type = type;
   }
-  return 0;
+  return GNUNET_OK;
 }
 
 
 /**
  * Extract metadata from a file and add it to the metamap and
- * the metacounter.
+ * the keywordcounter.
  *
  * @param adc context to modify
  * @param filename name of the file to process
@@ -521,8 +485,8 @@
   GNUNET_CONTAINER_multihashmap_put (adc->metamap, &hc, pd,
                                      
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);
   /* FIXME: what if this put fails? I think it actually can... Why unique 
only? */
-  GNUNET_CONTAINER_meta_data_iterate (pd->meta, &add_to_meta_counter,
-                                      adc->metacounter);
+  pd->ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (pd->meta);
+  GNUNET_FS_uri_ksk_get_keywords (pd->ksk_uri, &add_to_keyword_counter, 
adc->keywordcounter);
 }
 
 
@@ -546,21 +510,22 @@
 
 /**
  * Add the specifics of the given entry to the tree store.
- * Derive KSK from the given meta data, but exclude meta
- * data given in "md_no_ksk" for keyword generation.
+ * Use keywords from ksk_uri, but exclude the ones given in
+ * "md_no_ksk".
  *
  * @param ts tree store to modify
  * @param iter position in the tree store for this file
  * @param filename file to add
  * @param bo block options
  * @param do_index should we index or insert?
- * @param md_no_ksk metadata with keywords NOT to add
- * @param meta metadata for the file
+ * @param ksk_uri keywords to use. Will be destroyed at the end.
+ * @param exclude_ksk keywords NOT to use. Won't be modified.
+ * @param meta metadata for the file. Will be destroyed at the end.
  */
 static void
 add_entry_to_ts (GtkTreeStore * ts, GtkTreeIter * iter, const char *filename,
                  const struct GNUNET_FS_BlockOptions *bo, int do_index,
-                 struct GNUNET_CONTAINER_MetaData *md_no_ksk,
+                 struct GNUNET_FS_Uri *ksk_uri, struct GNUNET_FS_Uri 
*exclude_ksk,
                  struct GNUNET_CONTAINER_MetaData *meta)
 {
   char *file_size_fancy;
@@ -568,8 +533,6 @@
   GtkTreeRowReference *row_reference;
   GtkTreePath *path;
   uint64_t file_size;
-  struct GNUNET_FS_Uri *ksk_uri;
-  struct GNUNET_FS_Uri *kill_ksk;
   const char *ss;
   const char *short_fn;
   struct stat sbuf;
@@ -588,12 +551,9 @@
       return;
     }
   }
-  ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (meta);
-  kill_ksk = GNUNET_FS_uri_ksk_create_from_meta_data (md_no_ksk);
-  if (kill_ksk != NULL)
+  if (exclude_ksk != NULL)
   {
-    GNUNET_FS_uri_ksk_get_keywords (kill_ksk, &remove_keyword, ksk_uri);
-    GNUNET_FS_uri_destroy (kill_ksk);
+    GNUNET_FS_uri_ksk_get_keywords (exclude_ksk, &remove_keyword, ksk_uri);
   }
   path = gtk_tree_model_get_path (GTK_TREE_MODEL (ts), iter);
   row_reference = gtk_tree_row_reference_new (GTK_TREE_MODEL (ts), path);
@@ -648,7 +608,7 @@
   GNUNET_CRYPTO_hash (filename, strlen (filename), &hc);
   pd = GNUNET_CONTAINER_multihashmap_get (adc->metamap, &hc);
   add_entry_to_ts (adc->ts, &pd->iter, filename, &adc->bo, adc->do_index,
-                   adc->no_ksk, pd->meta);
+                   pd->ksk_uri, adc->exclude_ksk, pd->meta);
   GNUNET_CONTAINER_multihashmap_remove (adc->metamap, &hc, pd);
   GNUNET_free (pd);
   return GNUNET_OK;
@@ -658,12 +618,12 @@
 /**
  * Context passed to 'migrate_and_drop'.
  */
-struct MetaProcessContext
+struct KeywordProcessContext
 {
   /**
-   * Metadata with all the keywords we migrated to the parent.
+   * All the keywords we migrated to the parent.
    */
-  struct GNUNET_CONTAINER_MetaData *md;
+  struct GNUNET_FS_Uri *ksk;
 
   /**
    * How often does a keyword have to occur to be
@@ -674,22 +634,19 @@
 
 
 /**
- * Copy "frequent" meta data entries over to the
- * target meta data struct, free the counters.
+ * Copy "frequent" keywords over to the
+ * target ksk uri, free the counters.
  *
  */
 static int
 migrate_and_drop (void *cls, const GNUNET_HashCode * key, void *value)
 {
-  struct MetaProcessContext *mpc = cls;
-  struct MetaCounter *counter = value;
+  struct KeywordProcessContext *kpc = cls;
+  struct KeywordCounter *counter = value;
 
-  if (counter->count >= mpc->threshold && counter->count > 1)
+  if (counter->count >= kpc->threshold && counter->count > 1)
   {
-    GNUNET_CONTAINER_meta_data_insert (mpc->md, "<gnunet-gtk>", counter->type,
-                                       counter->format, 
counter->value_mimetype,
-                                       counter->value,
-                                       strlen (counter->value) + 1);
+    GNUNET_FS_uri_ksk_add_keyword (kpc->ksk, counter->value, GNUNET_NO);
   }
   GNUNET_free (counter);
   return GNUNET_YES;
@@ -697,24 +654,31 @@
 
 
 /**
- * Go over the collected meta data from all entries in the
- * directory and push common meta data up one level (by
+ * Go over the collected keywords from all entries in the
+ * directory and push common keywords up one level (by
  * adding it to the returned struct).
  *
  * @param adc collection of child meta data
  * @return meta data to moved to parent
  */
-static struct GNUNET_CONTAINER_MetaData *
-process_metadata (struct AddDirContext *adc)
+static struct GNUNET_FS_Uri *
+process_keywords (struct AddDirContext *adc)
 {
-  struct MetaProcessContext mpc;
+  struct KeywordProcessContext kpc;
+  struct GNUNET_CONTAINER_MetaData *tmp;
 
-  mpc.md = GNUNET_CONTAINER_meta_data_create ();
-  mpc.threshold = (adc->dir_entry_count + 1) / 2;       /* 50% */
-  GNUNET_CONTAINER_multihashmap_iterate (adc->metacounter, &migrate_and_drop,
-                                         &mpc);
-  GNUNET_CONTAINER_multihashmap_destroy (adc->metacounter);
-  return mpc.md;
+  tmp = GNUNET_CONTAINER_meta_data_create ();
+
+  /* Surprisingly, it's impossible to create a ksk with 0 keywords directly.
+   * But we can create one from an empty metadata set
+   */
+  kpc.ksk = GNUNET_FS_uri_ksk_create_from_meta_data (tmp);
+  GNUNET_CONTAINER_meta_data_destroy (tmp);
+  kpc.threshold = (adc->dir_entry_count + 1) / 2;       /* 50% */
+  GNUNET_CONTAINER_multihashmap_iterate (adc->keywordcounter, 
&migrate_and_drop,
+                                         &kpc);
+  GNUNET_CONTAINER_multihashmap_destroy (adc->keywordcounter);
+  return kpc.ksk;
 }
 
 
@@ -735,7 +699,7 @@
   struct PublishData *pd;
   GNUNET_HashCode hc;
   struct GNUNET_CONTAINER_MultiHashMap *mhm;
-  struct GNUNET_CONTAINER_MultiHashMap *mcm;
+  struct GNUNET_CONTAINER_MultiHashMap *kcm;
   unsigned int pc;
   const char *ss;
   const char *short_fn;
@@ -747,21 +711,22 @@
   {
     parent = adc->parent;
     mhm = adc->metamap;
-    mcm = adc->metacounter;
+    kcm = adc->keywordcounter;
     pc = adc->dir_entry_count;
     adc->metamap = GNUNET_CONTAINER_multihashmap_create (1024);
-    adc->metacounter = GNUNET_CONTAINER_multihashmap_create (1024);
+    adc->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024);
     adc->dir_entry_count = 0;
     pd = GNUNET_malloc (sizeof (struct PublishData));
     gtk_tree_store_insert_before (adc->ts, &pd->iter, parent, NULL);
     adc->parent = &pd->iter;
     GNUNET_DISK_directory_scan (filename, &scan_directory, adc);
-    pd->meta = process_metadata (adc);
-    adc->no_ksk = pd->meta;
+    pd->ksk_uri = process_keywords (adc);
+    pd->meta = GNUNET_CONTAINER_meta_data_create ();
+    adc->exclude_ksk = GNUNET_FS_uri_dup (pd->ksk_uri);
     GNUNET_DISK_directory_scan (filename, &publish_entry, adc);
     GNUNET_CONTAINER_multihashmap_destroy (adc->metamap);
     adc->metamap = mhm;
-    adc->metacounter = mcm;
+    adc->keywordcounter = kcm;
     adc->parent = parent;
     adc->dir_entry_count = pc + 1;
     short_fn = filename;
@@ -787,12 +752,13 @@
       GNUNET_CONTAINER_multihashmap_put (adc->metamap, &hc, pd,
                                          
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);
       /* FIXME: what if this put fails? I think it actually can... Why unique 
only? */
-      GNUNET_CONTAINER_meta_data_iterate (pd->meta, &add_to_meta_counter, mcm);
+      GNUNET_FS_uri_ksk_get_keywords (pd->ksk_uri, &add_to_keyword_counter, 
kcm);
     }
     else
     {
+      GNUNET_assert (kcm == NULL);
       add_entry_to_ts (adc->ts, &pd->iter, filename, &adc->bo, adc->do_index,
-                       NULL, pd->meta);
+                       pd->ksk_uri, NULL, pd->meta);
     }
   }
   else




reply via email to

[Prev in Thread] Current Thread [Next in Thread]