gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r4082 - in Extractor: . doc src/main src/plugins src/plugin


From: grothoff
Subject: [GNUnet-SVN] r4082 - in Extractor: . doc src/main src/plugins src/plugins/thumbnail
Date: Thu, 28 Dec 2006 19:23:42 -0800 (PST)

Author: grothoff
Date: 2006-12-28 19:23:38 -0800 (Thu, 28 Dec 2006)
New Revision: 4082

Modified:
   Extractor/ChangeLog
   Extractor/doc/extract.1
   Extractor/src/main/extract.c
   Extractor/src/plugins/splitextractor.c
   Extractor/src/plugins/thumbnail/thumbnailextractor.c
Log:
fixing Mantis #1125 and bug in splitextractor

Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2006-12-29 00:44:34 UTC (rev 4081)
+++ Extractor/ChangeLog 2006-12-29 03:23:38 UTC (rev 4082)
@@ -1,3 +1,9 @@
+Thu Dec 28 20:22:20 MST 2006
+       Fixed bug in splitextractor, addressing also Mantis #1125.
+
+Thu Dec 28 18:12:15 MST 2006
+       Added -g (greppable output, Mantis #1157) option to extact.  
+
 Mon Nov 20 22:08:55 EET 2006
        Added an SID (C64 music file) plugin
 

Modified: Extractor/doc/extract.1
===================================================================
--- Extractor/doc/extract.1     2006-12-29 00:44:34 UTC (rev 4081)
+++ Extractor/doc/extract.1     2006-12-29 03:23:38 UTC (rev 4082)
@@ -1,4 +1,4 @@
-.TH EXTRACT 1 "April 28, 2005" "libextractor 0.5.11"
+.TH EXTRACT 1 "Dec 29, 2006" "libextractor 0.5.17"
 .\" $Id
 .SH NAME
 extract
@@ -6,7 +6,7 @@
 .SH SYNOPSIS
 .B extract
 [
-.B \-abdfhLnrsvV
+.B \-abdfghLnrsvV
 ]
 [
 .B \-B
@@ -32,7 +32,7 @@
 \&...
 .br
 .SH DESCRIPTION
-This manual page documents version 0.5.11 of the 
+This manual page documents version 0.5.17 of the 
 .B extract 
 command.
 .PP
@@ -63,6 +63,9 @@
 .B \-f
 add the filename(s) (without directory) to the list of keywords.
 .TP 8
+.B \-g
+Use grep-friendly output (all keywords on a single line for each file).  Use 
the verbose option to print the filename first, followed by the keywords.  This 
option will not print keyword types or non-textual metadata.
+.TP 8
 .B \-h
 Print a brief summary of the options.
 .TP 8

Modified: Extractor/src/main/extract.c
===================================================================
--- Extractor/src/main/extract.c        2006-12-29 00:44:34 UTC (rev 4081)
+++ Extractor/src/main/extract.c        2006-12-29 03:23:38 UTC (rev 4082)
@@ -132,6 +132,8 @@
       gettext_noop("remove duplicates only if types match") },
     { 'f', "filename", NULL,
       gettext_noop("use the filename as a keyword (loads filename-extractor 
plugin)") },
+    { 'g', "grep-friendly", NULL,
+      gettext_noop("produce grep-friendly output (all results on one line per 
file)") },
     { 'h', "help", NULL,
       gettext_noop("print this help") },
     { 'H', "hash", "ALGORITHM",
@@ -167,7 +169,7 @@
 
 /**
  * Print a keyword list to a file.
- * For debugging.
+ * 
  * @param handle the file to write to (stdout, stderr), may NOT be NULL
  * @param keywords the list of keywords to print, may be NULL
  * @param print array indicating which types to print
@@ -180,24 +182,19 @@
 {
   char * keyword;
   iconv_t cd;
-  char * buf;
 
-  cd = iconv_open(
-    nl_langinfo(CODESET)
-    , "UTF-8");
+  cd = iconv_open(nl_langinfo(CODESET), "UTF-8");
   while (keywords != NULL) {
-    buf = NULL;
-    if (cd != (iconv_t) -1)
-      keyword = iconvHelper(cd,
-                           keywords->keyword);
-    else
-      keyword = strdup(keywords->keyword);
-
-    if (keywords->keywordType == EXTRACTOR_THUMBNAIL_DATA) {
+    if (EXTRACTOR_isBinaryType(keywords->keywordType)) {
       fprintf (handle,
               _("%s - (binary)\n"),
               _(EXTRACTOR_getKeywordTypeAsString(keywords->keywordType)));
     } else {
+      if (cd != (iconv_t) -1)
+       keyword = iconvHelper(cd,
+                             keywords->keyword);
+      else
+       keyword = strdup(keywords->keyword);      
       if (NULL == EXTRACTOR_getKeywordTypeAsString(keywords->keywordType)) {
        if (verbose == YES) {
          fprintf(handle,
@@ -209,8 +206,8 @@
                 "%s - %s\n",
                 _(EXTRACTOR_getKeywordTypeAsString(keywords->keywordType)),
                 keyword);
+      free(keyword);
     }
-    free(keyword);
     keywords = keywords->next;
   }
   if (cd != (iconv_t) -1)
@@ -218,6 +215,42 @@
 }
 
 /**
+ * Print a keyword list to a file in a grep-friendly manner.
+ *
+ * @param handle the file to write to (stdout, stderr), may NOT be NULL
+ * @param keywords the list of keywords to print, may be NULL
+ * @param print array indicating which types to print
+ */
+static void
+printSelectedKeywordsGrepFriendly(FILE * handle,
+                                 EXTRACTOR_KeywordList * keywords,
+                                 const int * print,
+                                 const int verbose)
+{
+  char * keyword;
+  iconv_t cd;
+
+  cd = iconv_open(nl_langinfo(CODESET), "UTF-8");
+  while (keywords != NULL) {
+    if ( (EXTRACTOR_isBinaryType(EXTRACTOR_THUMBNAIL_DATA)) &&
+        (print[keywords->keywordType] == YES) ) {
+      if (cd != (iconv_t) -1)
+       keyword = iconvHelper(cd,
+                             keywords->keyword);
+      else
+       keyword = strdup(keywords->keyword);
+      fprintf (handle,
+              (keywords->next == NULL) ? "%s" : "%s ",
+              keyword); 
+      free(keyword);
+    }
+    keywords = keywords->next;
+  }
+  if (cd != (iconv_t) -1)
+    iconv_close(cd);
+}
+
+/**
  * Take title, auth, year and return a string
  */
 static char *
@@ -390,6 +423,7 @@
   int defaultAll = YES;
   int duplicates = EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN;
   int bibtex = NO;
+  int grepfriendly = NO;
   char * binary = NULL;
   int ret = 0;
 
@@ -413,6 +447,7 @@
        {"bibtex", 0, 0, 'b'},
        {"duplicates", 0, 0, 'd'},
        {"filename", 0, 0, 'f'},
+       {"grep-friendly", 0, 0, 'g'},
        {"help", 0, 0, 'h'},
        {"hash", 1, 0, 'H'},
        {"list", 0, 0, 'L'},
@@ -451,6 +486,9 @@
        case 'f':
          useFilename = YES;
          break;
+       case 'g':
+         grepfriendly = YES;
+         break;
        case 'h':
          printHelp();
          return 0;
@@ -612,10 +650,18 @@
     }
     if ( (duplicates != -1) || (bibtex == YES))
       keywords = EXTRACTOR_removeDuplicateKeywords (keywords, duplicates);
-    if (verbose == YES && bibtex == NO)
-      printf (_("Keywords for file %s:\n"), argv[i]);
+    if ( ( (verbose == YES) || (grepfriendly == YES) )
+        && (bibtex == NO) ) {
+      if (grepfriendly == YES)
+       printf ("%s", argv[i]);
+      else 
+       printf (_("Keywords for file %s:"), 
+               argv[i]);
+    }
     if (bibtex == YES)
       printSelectedKeywordsBibtex (stdout, keywords, print, argv[i]);
+    else if (grepfriendly == YES)
+      printSelectedKeywordsGrepFriendly(stdout, keywords, print, verbose);
     else
       printSelectedKeywords (stdout, keywords, print, verbose);
     if (verbose == YES && bibtex == NO)

Modified: Extractor/src/plugins/splitextractor.c
===================================================================
--- Extractor/src/plugins/splitextractor.c      2006-12-29 00:44:34 UTC (rev 
4081)
+++ Extractor/src/plugins/splitextractor.c      2006-12-29 03:23:38 UTC (rev 
4082)
@@ -21,53 +21,63 @@
 #include "platform.h"
 #include "extractor.h"
 
-static char * TOKENIZERS = "._ ,address@hidden(){}";
+/**
+ * Default split characters.
+ */
+static const char * TOKENIZERS = "._ ,address@hidden(){}";
+
+/**
+ * Do not use keywords shorter than this minimum
+ * length.
+ */
 static int MINIMUM_KEYWORD_LENGTH = 4;
 
 static void addKeyword(struct EXTRACTOR_Keywords ** list,
-                      const char * keyword,
-                      EXTRACTOR_KeywordType type) {
+                      const char * keyword) {
   EXTRACTOR_KeywordList * next;
   next = malloc(sizeof(EXTRACTOR_KeywordList));
   next->next = *list;
   next->keyword = strdup(keyword);
-  next->keywordType = type;
+  next->keywordType = EXTRACTOR_SPLIT;
   *list = next;
 }
 
 static int token(char letter,
                 const char * options) {
-  int i;
-  
-  if (options == NULL)
-    options = TOKENIZERS;
-  for (i=0;i<strlen(TOKENIZERS);i++)
-    if (letter == TOKENIZERS[i])
+  size_t i;
+
+  i = 0;
+  while (options[i] != '\0') {
+    if (letter == options[i])
       return 1;
+    i++;
+  }
   return 0;
 }
 
 static void splitKeywords(const char * keyword,
-                         EXTRACTOR_KeywordType type,
                          struct EXTRACTOR_Keywords ** list,
                          const char * options) {
   char * dp;
-  int pos;
-  int last;
-  int len;
+  size_t pos;
+  size_t last;
+  size_t len;
 
   dp = strdup(keyword);
   len = strlen(dp);
   pos = 0;
   last = 0;
   while (pos < len) {
-    while ((!token(dp[pos],
-                                                                               
                                                                        
options)) && (pos < len))
+    while ( (0 == token(dp[pos], options)) &&
+           (pos < len) )
       pos++;
-    dp[pos++] = 0;
-    if (strlen(&dp[last]) >= MINIMUM_KEYWORD_LENGTH) {
-      addKeyword(list, &dp[last], type);
-    }
+    dp[pos++] = '\0';
+    if (pos - last > MINIMUM_KEYWORD_LENGTH) 
+      addKeyword(list, 
+                &dp[last]);    
+    while ( (1 == token(dp[pos], options)) &&
+           (pos < len) )
+      pos++;
     last = pos;
   }
   free(dp);
@@ -82,13 +92,16 @@
                           const char * options) {
   struct EXTRACTOR_Keywords * pos;
 
+  if (options == NULL)
+    options = TOKENIZERS;
   pos = prev;
   while (pos != NULL) {
-    splitKeywords(pos->keyword,
-                 EXTRACTOR_SPLIT,
+    splitKeywords(pos->keyword,                  
                  &prev,
                  options);
     pos = pos->next;
   }
   return prev;
 }
+
+/* end of splitextractor.c */

Modified: Extractor/src/plugins/thumbnail/thumbnailextractor.c
===================================================================
--- Extractor/src/plugins/thumbnail/thumbnailextractor.c        2006-12-29 
00:44:34 UTC (rev 4081)
+++ Extractor/src/plugins/thumbnail/thumbnailextractor.c        2006-12-29 
03:23:38 UTC (rev 4082)
@@ -75,10 +75,11 @@
   NULL,
 };
 
-struct EXTRACTOR_Keywords * libextractor_thumbnail_extract(const char * 
filename,
-                                                          const unsigned char 
* data,
-                                                          size_t size,
-                                                          struct 
EXTRACTOR_Keywords * prev) {
+struct EXTRACTOR_Keywords *
+libextractor_thumbnail_extract(const char * filename,
+                              const unsigned char * data,
+                              size_t size,
+                              struct EXTRACTOR_Keywords * prev) {
   GdkPixbufLoader * loader;
   GdkPixbuf * in;
   GdkPixbuf * out;





reply via email to

[Prev in Thread] Current Thread [Next in Thread]