[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r18862 - gnunet-gtk/src/fs
From: |
gnunet |
Subject: |
[GNUnet-SVN] r18862 - gnunet-gtk/src/fs |
Date: |
Sat, 31 Dec 2011 00:52:54 +0100 |
Author: grothoff
Date: 2011-12-31 00:52:54 +0100 (Sat, 31 Dec 2011)
New Revision: 18862
Modified:
gnunet-gtk/src/fs/gnunet-fs-gtk-main_window_file_publish.c
Log:
-LRN: applying patch 4 from #2046 - Count and propagate keywords instead of
metadata
Modified: gnunet-gtk/src/fs/gnunet-fs-gtk-main_window_file_publish.c
===================================================================
--- gnunet-gtk/src/fs/gnunet-fs-gtk-main_window_file_publish.c 2011-12-30
23:45:36 UTC (rev 18861)
+++ gnunet-gtk/src/fs/gnunet-fs-gtk-main_window_file_publish.c 2011-12-30
23:52:54 UTC (rev 18862)
@@ -329,6 +329,11 @@
struct GNUNET_CONTAINER_MetaData *meta;
/**
+ * Keywords for the file (derived from metadata).
+ */
+ struct GNUNET_FS_Uri *ksk_uri;
+
+ /**
* Iterator for the entry.
*/
GtkTreeIter iter;
@@ -336,10 +341,10 @@
/**
- * Entry for each unique meta data entry to track how often
+ * Entry for each unique keyword to track how often
* it occured. Contains the keyword and the counter.
*/
-struct MetaCounter
+struct KeywordCounter
{
/**
@@ -348,21 +353,6 @@
const char *value;
/**
- * Mimetype of the value.
- */
- const char *value_mimetype;
-
- /**
- * Type of the value.
- */
- enum EXTRACTOR_MetaType type;
-
- /**
- * Format of the value.
- */
- enum EXTRACTOR_MetaFormat format;
-
- /**
* How many files have meta entries matching this value?
* (type and format do not have to match).
*/
@@ -388,11 +378,11 @@
GtkTreeStore *ts;
/**
- * Map from the hash over the meta value to an 'struct MetaCounter'
- * counter that says how often this value was
+ * Map from the hash over the keyword to an 'struct KeywordCounter'
+ * counter that says how often this keyword was
* encountered in the current directory.
*/
- struct GNUNET_CONTAINER_MultiHashMap *metacounter;
+ struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
/**
* Map from the hash of a filename in the current directory
@@ -401,10 +391,10 @@
struct GNUNET_CONTAINER_MultiHashMap *metamap;
/**
- * Metadata to exclude from using for KSK since it'll be associated
+ * Keywords to exclude from using for KSK since they'll be associated
* with the parent as well. NULL for nothing blocked.
*/
- struct GNUNET_CONTAINER_MetaData *no_ksk;
+ struct GNUNET_FS_Uri *exclude_ksk;
/**
* Block options to use.
@@ -424,71 +414,45 @@
/**
- * Add the given meta data item to the
- * meta data statistics tracker.
+ * Add the given keyword to the
+ * keyword statistics tracker.
*
* @param cls closure (user-defined)
- * @param plugin_name name of the plugin that produced this value;
- * special values can be used (i.e. '<zlib>' for zlib being
- * used in the main libextractor library and yielding
- * meta data).
- * @param type libextractor-type describing the meta data
- * @param format basic format information about data
- * @param data_mime_type mime-type of data (not of the original file);
- * can be NULL (if mime-type is not known)
- * @param data actual meta-data found
- * @param data_len number of bytes in data
- * @return 0 to continue extracting, 1 to abort
+ * @param keyword the keyword to count
+ * @param is_mandatory ignored
+ * @return always GNUNET_OK
*/
static int
-add_to_meta_counter (void *cls, const char *plugin_name,
- enum EXTRACTOR_MetaType type,
- enum EXTRACTOR_MetaFormat format,
- const char *data_mime_type, const char *data,
- size_t data_len)
+add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
{
struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
- struct MetaCounter *cnt;
+ struct KeywordCounter *cnt;
GNUNET_HashCode hc;
- size_t mlen;
- size_t dlen;
+ size_t klen;
- if ((format != EXTRACTOR_METAFORMAT_UTF8) &&
- (format != EXTRACTOR_METAFORMAT_C_STRING))
- return 0;
- dlen = strlen (data) + 1;
- GNUNET_CRYPTO_hash (data, dlen - 1, &hc);
+ klen = strlen (keyword) + 1;
+ GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
if (cnt == NULL)
{
- mlen = strlen (data_mime_type) + 1;
- cnt = GNUNET_malloc (sizeof (struct MetaCounter) + dlen + mlen);
+ cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen);
cnt->count = 1;
cnt->value = (const char *) &cnt[1];
- cnt->value_mimetype = &cnt->value[dlen];
- memcpy (&cnt[1], data, dlen);
- memcpy ((char *) cnt->value_mimetype, data_mime_type, mlen);
- cnt->type = type;
- cnt->format = format;
+ memcpy (&cnt[1], keyword, klen);
GNUNET_CONTAINER_multihashmap_put (mcm, &hc, cnt,
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);
-
}
else
{
cnt->count++;
- if (cnt->format == EXTRACTOR_METAFORMAT_C_STRING)
- cnt->format = format; /* possibly improve to UTF8 */
- if (cnt->type == EXTRACTOR_METATYPE_UNKNOWN)
- cnt->type = type;
}
- return 0;
+ return GNUNET_OK;
}
/**
* Extract metadata from a file and add it to the metamap and
- * the metacounter.
+ * the keywordcounter.
*
* @param adc context to modify
* @param filename name of the file to process
@@ -521,8 +485,8 @@
GNUNET_CONTAINER_multihashmap_put (adc->metamap, &hc, pd,
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);
/* FIXME: what if this put fails? I think it actually can... Why unique
only? */
- GNUNET_CONTAINER_meta_data_iterate (pd->meta, &add_to_meta_counter,
- adc->metacounter);
+ pd->ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (pd->meta);
+ GNUNET_FS_uri_ksk_get_keywords (pd->ksk_uri, &add_to_keyword_counter,
adc->keywordcounter);
}
@@ -546,21 +510,22 @@
/**
* Add the specifics of the given entry to the tree store.
- * Derive KSK from the given meta data, but exclude meta
- * data given in "md_no_ksk" for keyword generation.
+ * Use keywords from ksk_uri, but exclude the ones given in
+ * "md_no_ksk".
*
* @param ts tree store to modify
* @param iter position in the tree store for this file
* @param filename file to add
* @param bo block options
* @param do_index should we index or insert?
- * @param md_no_ksk metadata with keywords NOT to add
- * @param meta metadata for the file
+ * @param ksk_uri keywords to use. Will be destroyed at the end.
+ * @param exclude_ksk keywords NOT to use. Won't be modified.
+ * @param meta metadata for the file. Will be destroyed at the end.
*/
static void
add_entry_to_ts (GtkTreeStore * ts, GtkTreeIter * iter, const char *filename,
const struct GNUNET_FS_BlockOptions *bo, int do_index,
- struct GNUNET_CONTAINER_MetaData *md_no_ksk,
+ struct GNUNET_FS_Uri *ksk_uri, struct GNUNET_FS_Uri
*exclude_ksk,
struct GNUNET_CONTAINER_MetaData *meta)
{
char *file_size_fancy;
@@ -568,8 +533,6 @@
GtkTreeRowReference *row_reference;
GtkTreePath *path;
uint64_t file_size;
- struct GNUNET_FS_Uri *ksk_uri;
- struct GNUNET_FS_Uri *kill_ksk;
const char *ss;
const char *short_fn;
struct stat sbuf;
@@ -588,12 +551,9 @@
return;
}
}
- ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (meta);
- kill_ksk = GNUNET_FS_uri_ksk_create_from_meta_data (md_no_ksk);
- if (kill_ksk != NULL)
+ if (exclude_ksk != NULL)
{
- GNUNET_FS_uri_ksk_get_keywords (kill_ksk, &remove_keyword, ksk_uri);
- GNUNET_FS_uri_destroy (kill_ksk);
+ GNUNET_FS_uri_ksk_get_keywords (exclude_ksk, &remove_keyword, ksk_uri);
}
path = gtk_tree_model_get_path (GTK_TREE_MODEL (ts), iter);
row_reference = gtk_tree_row_reference_new (GTK_TREE_MODEL (ts), path);
@@ -648,7 +608,7 @@
GNUNET_CRYPTO_hash (filename, strlen (filename), &hc);
pd = GNUNET_CONTAINER_multihashmap_get (adc->metamap, &hc);
add_entry_to_ts (adc->ts, &pd->iter, filename, &adc->bo, adc->do_index,
- adc->no_ksk, pd->meta);
+ pd->ksk_uri, adc->exclude_ksk, pd->meta);
GNUNET_CONTAINER_multihashmap_remove (adc->metamap, &hc, pd);
GNUNET_free (pd);
return GNUNET_OK;
@@ -658,12 +618,12 @@
/**
* Context passed to 'migrate_and_drop'.
*/
-struct MetaProcessContext
+struct KeywordProcessContext
{
/**
- * Metadata with all the keywords we migrated to the parent.
+ * All the keywords we migrated to the parent.
*/
- struct GNUNET_CONTAINER_MetaData *md;
+ struct GNUNET_FS_Uri *ksk;
/**
* How often does a keyword have to occur to be
@@ -674,22 +634,19 @@
/**
- * Copy "frequent" meta data entries over to the
- * target meta data struct, free the counters.
+ * Copy "frequent" keywords over to the
+ * target ksk uri, free the counters.
*
*/
static int
migrate_and_drop (void *cls, const GNUNET_HashCode * key, void *value)
{
- struct MetaProcessContext *mpc = cls;
- struct MetaCounter *counter = value;
+ struct KeywordProcessContext *kpc = cls;
+ struct KeywordCounter *counter = value;
- if (counter->count >= mpc->threshold && counter->count > 1)
+ if (counter->count >= kpc->threshold && counter->count > 1)
{
- GNUNET_CONTAINER_meta_data_insert (mpc->md, "<gnunet-gtk>", counter->type,
- counter->format,
counter->value_mimetype,
- counter->value,
- strlen (counter->value) + 1);
+ GNUNET_FS_uri_ksk_add_keyword (kpc->ksk, counter->value, GNUNET_NO);
}
GNUNET_free (counter);
return GNUNET_YES;
@@ -697,24 +654,31 @@
/**
- * Go over the collected meta data from all entries in the
- * directory and push common meta data up one level (by
+ * Go over the collected keywords from all entries in the
+ * directory and push common keywords up one level (by
* adding it to the returned struct).
*
* @param adc collection of child meta data
* @return meta data to moved to parent
*/
-static struct GNUNET_CONTAINER_MetaData *
-process_metadata (struct AddDirContext *adc)
+static struct GNUNET_FS_Uri *
+process_keywords (struct AddDirContext *adc)
{
- struct MetaProcessContext mpc;
+ struct KeywordProcessContext kpc;
+ struct GNUNET_CONTAINER_MetaData *tmp;
- mpc.md = GNUNET_CONTAINER_meta_data_create ();
- mpc.threshold = (adc->dir_entry_count + 1) / 2; /* 50% */
- GNUNET_CONTAINER_multihashmap_iterate (adc->metacounter, &migrate_and_drop,
- &mpc);
- GNUNET_CONTAINER_multihashmap_destroy (adc->metacounter);
- return mpc.md;
+ tmp = GNUNET_CONTAINER_meta_data_create ();
+
+ /* Surprisingly, it's impossible to create a ksk with 0 keywords directly.
+ * But we can create one from an empty metadata set
+ */
+ kpc.ksk = GNUNET_FS_uri_ksk_create_from_meta_data (tmp);
+ GNUNET_CONTAINER_meta_data_destroy (tmp);
+ kpc.threshold = (adc->dir_entry_count + 1) / 2; /* 50% */
+ GNUNET_CONTAINER_multihashmap_iterate (adc->keywordcounter,
&migrate_and_drop,
+ &kpc);
+ GNUNET_CONTAINER_multihashmap_destroy (adc->keywordcounter);
+ return kpc.ksk;
}
@@ -735,7 +699,7 @@
struct PublishData *pd;
GNUNET_HashCode hc;
struct GNUNET_CONTAINER_MultiHashMap *mhm;
- struct GNUNET_CONTAINER_MultiHashMap *mcm;
+ struct GNUNET_CONTAINER_MultiHashMap *kcm;
unsigned int pc;
const char *ss;
const char *short_fn;
@@ -747,21 +711,22 @@
{
parent = adc->parent;
mhm = adc->metamap;
- mcm = adc->metacounter;
+ kcm = adc->keywordcounter;
pc = adc->dir_entry_count;
adc->metamap = GNUNET_CONTAINER_multihashmap_create (1024);
- adc->metacounter = GNUNET_CONTAINER_multihashmap_create (1024);
+ adc->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024);
adc->dir_entry_count = 0;
pd = GNUNET_malloc (sizeof (struct PublishData));
gtk_tree_store_insert_before (adc->ts, &pd->iter, parent, NULL);
adc->parent = &pd->iter;
GNUNET_DISK_directory_scan (filename, &scan_directory, adc);
- pd->meta = process_metadata (adc);
- adc->no_ksk = pd->meta;
+ pd->ksk_uri = process_keywords (adc);
+ pd->meta = GNUNET_CONTAINER_meta_data_create ();
+ adc->exclude_ksk = GNUNET_FS_uri_dup (pd->ksk_uri);
GNUNET_DISK_directory_scan (filename, &publish_entry, adc);
GNUNET_CONTAINER_multihashmap_destroy (adc->metamap);
adc->metamap = mhm;
- adc->metacounter = mcm;
+ adc->keywordcounter = kcm;
adc->parent = parent;
adc->dir_entry_count = pc + 1;
short_fn = filename;
@@ -787,12 +752,13 @@
GNUNET_CONTAINER_multihashmap_put (adc->metamap, &hc, pd,
GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);
/* FIXME: what if this put fails? I think it actually can... Why unique
only? */
- GNUNET_CONTAINER_meta_data_iterate (pd->meta, &add_to_meta_counter, mcm);
+ GNUNET_FS_uri_ksk_get_keywords (pd->ksk_uri, &add_to_keyword_counter,
kcm);
}
else
{
+ GNUNET_assert (kcm == NULL);
add_entry_to_ts (adc->ts, &pd->iter, filename, &adc->bo, adc->do_index,
- NULL, pd->meta);
+ pd->ksk_uri, NULL, pd->meta);
}
}
else
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r18862 - gnunet-gtk/src/fs,
gnunet <=