gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r18821 - in gnunet: doc/man src/fs src/include


From: gnunet
Subject: [GNUnet-SVN] r18821 - in gnunet: doc/man src/fs src/include
Date: Sun, 25 Dec 2011 22:16:11 +0100

Author: grothoff
Date: 2011-12-25 22:16:11 +0100 (Sun, 25 Dec 2011)
New Revision: 18821

Modified:
   gnunet/doc/man/gnunet-publish.1
   gnunet/src/fs/fs_file_information.c
   gnunet/src/fs/fs_uri.c
   gnunet/src/include/gnunet_fs_service.h
Log:
-remove code for keyword caonicalization/normalization, makes no sense in 
international application, normalization methods are questionable to begin with

Modified: gnunet/doc/man/gnunet-publish.1
===================================================================
--- gnunet/doc/man/gnunet-publish.1     2011-12-25 21:15:59 UTC (rev 18820)
+++ gnunet/doc/man/gnunet-publish.1     2011-12-25 21:16:11 UTC (rev 18821)
@@ -10,7 +10,7 @@
 .PP
 In order to start sharing files, the files must be added either using 
gnunet\-publish or a graphical interface such as gnunet\-gtk.  The command line 
tool gnunet\-publish is more useful if many files are supposed to be added.  
gnunet\-publish can automatically publish batches of files, recursively publish 
directories, create directories that can be browsed within GNUnet and publish 
file lists in a namespace.  When run on a directory, gnunet\-publish will 
always recursively publish all of the files in the directory.
 .PP
-gnunet\-publish can automatically extract keywords from the files that are 
shared.  Users that want to download files from GNUnet use keywords to search 
for the appropriate content.  You can disable keyword extraction with the \-D 
option.  You can manually add keywords using the \-k option. The keywords are 
case\-sensitive. (However, keyword normalization can also be used.)
+gnunet\-publish can automatically extract keywords from the files that are 
shared.  Users that want to download files from GNUnet use keywords to search 
for the appropriate content.  You can disable keyword extraction with the \-D 
option.  You can manually add keywords using the \-k option. The keywords are 
case\-sensitive.
 .PP
 You can use automatic meta\-data extraction (based on libextractor) or the 
command\-line option \-m to specify meta-data.  For the \-m option you need to 
use the form keyword\-type:value.  For example, use "\-m os:Linux" to specify 
that the operating system is Linux.  Common meta\-data types are "author name", 
"title" , "mimetype", "filename", "language", "subject" and "keywords".  A full 
list can be obtained from the extract tool using the option \-\-list.  The 
meta\-data is used to help users in searching for files on the network.  
 .PP

Modified: gnunet/src/fs/fs_file_information.c
===================================================================
--- gnunet/src/fs/fs_file_information.c 2011-12-25 21:15:59 UTC (rev 18820)
+++ gnunet/src/fs/fs_file_information.c 2011-12-25 21:16:11 UTC (rev 18821)
@@ -341,7 +341,6 @@
   struct DirScanCls *dsc = cls;
   struct stat sbuf;
   struct GNUNET_FS_FileInformation *fi;
-  struct GNUNET_FS_Uri *ksk_uri;
   struct GNUNET_FS_Uri *keywords;
   struct GNUNET_CONTAINER_MetaData *meta;
 
@@ -370,13 +369,11 @@
     meta = GNUNET_CONTAINER_meta_data_create ();
     GNUNET_FS_meta_data_extract_from_file (meta, filename, dsc->extractors);
     keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta);
-    ksk_uri = GNUNET_FS_uri_ksk_canonicalize (keywords);
     fi = GNUNET_FS_file_information_create_from_file (dsc->h, NULL, filename,
-                                                      ksk_uri, meta,
+                                                      keywords, meta,
                                                       dsc->do_index, dsc->bo);
     GNUNET_CONTAINER_meta_data_destroy (meta);
     GNUNET_FS_uri_destroy (keywords);
-    GNUNET_FS_uri_destroy (ksk_uri);
   }
   dsc->proc (dsc->proc_cls, filename, fi);
   return GNUNET_OK;
@@ -723,7 +720,6 @@
   struct EntryProcCls dc;
   const char *fn;
   const char *ss;
-  struct GNUNET_FS_Uri *cksk;
   char *dn;
   struct GNUNET_FS_FileInformation *epos;
   unsigned int i;
@@ -747,21 +743,20 @@
                                          &compute_directory_keywords, &cdmc);
   GNUNET_CONTAINER_multihashmap_destroy (dc.metamap);
   GNUNET_CONTAINER_multihashmap_destroy (dc.keywordmap);
-  GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk, GNUNET_FS_DIRECTORY_MIME, 
GNUNET_NO);
-  cksk = GNUNET_FS_uri_ksk_canonicalize (cdmc.ksk);
 
   /* remove keywords in children that are already in the
    * parent */
   for (epos = dc.entries; NULL != epos; epos = epos->next)
   {
-    for (i = 0; i < cksk->data.ksk.keywordCount; i++)
+    for (i = 0; i < cdmc.ksk->data.ksk.keywordCount; i++)
     {
-      kw = cksk->data.ksk.keywords[i];
+      kw = cdmc.ksk->data.ksk.keywords[i];
       GNUNET_FS_uri_ksk_remove_keyword (epos->keywords, &kw[1]);
     }
   }
+  GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk, GNUNET_FS_DIRECTORY_MIME, 
GNUNET_NO);
   ret =
-      GNUNET_FS_file_information_create_empty_directory (h, client_info, cksk,
+      GNUNET_FS_file_information_create_empty_directory (h, client_info, 
cdmc.ksk,
                                                          cdmc.meta, bo);
   GNUNET_CONTAINER_meta_data_destroy (cdmc.meta);
   GNUNET_FS_uri_destroy (cdmc.ksk);

Modified: gnunet/src/fs/fs_uri.c
===================================================================
--- gnunet/src/fs/fs_uri.c      2011-12-25 21:15:59 UTC (rev 18820)
+++ gnunet/src/fs/fs_uri.c      2011-12-25 21:16:11 UTC (rev 18821)
@@ -970,129 +970,6 @@
 
 
 /**
- * Canonicalize a keyword.
- *
- * @param in input string (the keyword)
- * @return canonicalized keyword
- */
-static char *
-canonicalize_keyword (const char *in)
-{
-  char *ret;
-  char *wpos;
-  const char *rpos;
-
-  ret = GNUNET_strdup (in);
-  wpos = ret;
-  rpos = in;
-  while ('\0' != *rpos)
-  {
-    switch (tolower ((unsigned char) *rpos))
-    {
-    case 'a':
-    case 'e':
-    case 'i':
-    case 'o':
-    case 'u':
-    case ' ':
-    case '\t':
-    case '\n':
-    case '\r':
-      /* skip characters listed above */
-      break;
-    case 'b':
-    case 'c':
-    case 'd':
-    case 'f':
-    case 'g':
-    case 'h':
-    case 'j':
-    case 'k':
-    case 'l':
-    case 'm':
-    case 'n':
-    case 'p':
-    case 'r':
-    case 's':
-    case 't':
-    case 'v':
-    case 'w':
-    case 'x':
-    case 'y':
-    case 'z':
-      /* convert characters listed above to lower case */
-      *wpos = tolower ((unsigned char) *rpos);
-      wpos++;
-      break;
-    case '!':
-    case '.':
-    case '?':
-    case '-':
-      /* keep characters listed above without changes */
-      *wpos = *rpos;
-      wpos++;
-      break;
-    default:
-      if (isspace ((unsigned char) *rpos) || isdigit ((unsigned char) *rpos))
-        break;
-      /* replace characters listed above with '_' */
-      *wpos = '_';
-      wpos++;
-      break;
-    }
-    rpos++;
-  }
-  *wpos = '\0';
-  return ret;
-}
-
-
-/**
- * Canonicalize keyword URI.  Performs operations such
- * as decapitalization and removal of certain characters.
- * (useful for search).
- *
- * @param uri the URI to canonicalize
- * @return canonicalized version of the URI, NULL on error
- */
-struct GNUNET_FS_Uri *
-GNUNET_FS_uri_ksk_canonicalize (const struct GNUNET_FS_Uri *uri)
-{
-  struct GNUNET_FS_Uri *ret;
-  unsigned int kc;
-  unsigned int i;
-  const char *in;
-  char *sb;
-  char *cc;
-  const char *tok;
-
-  ret = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri));
-  ret->type = ksk;
-  kc = uri->data.ksk.keywordCount;
-  for (i = 0; i < kc; i++)
-  {
-    in = uri->data.ksk.keywords[i];
-    GNUNET_FS_uri_ksk_add_keyword (ret, &in[1],
-                                   (in[0] == '+') ? GNUNET_YES : GNUNET_NO);
-    sb = GNUNET_strdup (&in[1]);
-#define DELIMS " \\|\"'`/&@-_,.;!?+-*^$#~=[]{}()<>"
-    for (tok = strtok (sb, DELIMS); NULL != tok; tok = strtok (NULL, DELIMS))
-#undef DELIMS
-    {
-      if (strlen (tok) < 3)
-        continue;
-      GNUNET_FS_uri_ksk_add_keyword (ret, tok, GNUNET_NO);
-      cc = canonicalize_keyword (tok);
-      if (strlen (cc) > 2)
-        GNUNET_FS_uri_ksk_add_keyword (ret, cc, GNUNET_NO);
-    }
-    GNUNET_free (sb);
-  }
-  return ret;
-}
-
-
-/**
  * Merge the sets of keywords from two KSK URIs.
  * (useful for merging the canonicalized keywords with
  * the original keywords for sharing).

Modified: gnunet/src/include/gnunet_fs_service.h
===================================================================
--- gnunet/src/include/gnunet_fs_service.h      2011-12-25 21:15:59 UTC (rev 
18820)
+++ gnunet/src/include/gnunet_fs_service.h      2011-12-25 21:16:11 UTC (rev 
18821)
@@ -53,7 +53,7 @@
  * 6.1.x: with simplified namespace support
  * 9.0.0: CPS-style integrated API
  */
-#define GNUNET_FS_VERSION 0x00090000
+#define GNUNET_FS_VERSION 0x00090001
 
 
 /* ******************** URI API *********************** */
@@ -228,21 +228,7 @@
 
 
 /**
- * Canonicalize keyword URI.  Performs operations such
- * as decapitalization and removal of certain characters.
- * (useful for search).
- *
- * @param uri the URI to canonicalize
- * @return canonicalized version of the URI, NULL on error
- */
-struct GNUNET_FS_Uri *
-GNUNET_FS_uri_ksk_canonicalize (const struct GNUNET_FS_Uri *uri);
-
-
-/**
  * Merge the sets of keywords from two KSK URIs.
- * (useful for merging the canonicalized keywords with
- * the original keywords for sharing).
  *
  * @param u1 first uri
  * @param u2 second uri
@@ -1898,9 +1884,11 @@
  * files (those starting with a ".").  Metadata will be extracted
  * using GNU libextractor; the specific list of plugins should be
  * specified in "cls", passing NULL will disable (!)  metadata
- * extraction.  Keywords will be derived from the metadata and be
- * subject to default canonicalization.  This is strictly a
- * convenience function.
+ * extraction.  Keywords will be derived from the metadata and
+ * associated with directories as appropriate.  This is strictly a
+ * convenience function (however, if all tools use it, there will
+ * be less of a chance of distinguishing users by the specific 
+ * user-interface they were using).
  *
  * @param cls must be of type "struct EXTRACTOR_Extractor*"
  * @param h handle to the file sharing subsystem




reply via email to

[Prev in Thread] Current Thread [Next in Thread]