gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r2138 - in Extractor: . po src/plugins


From: grothoff
Subject: [GNUnet-SVN] r2138 - in Extractor: . po src/plugins
Date: Thu, 22 Sep 2005 20:56:18 -0700 (PDT)

Author: grothoff
Date: 2005-09-22 20:56:15 -0700 (Thu, 22 Sep 2005)
New Revision: 2138

Modified:
   Extractor/AUTHORS
   Extractor/ChangeLog
   Extractor/po/de.po
   Extractor/src/plugins/tarextractor.c
Log:
sync

Modified: Extractor/AUTHORS
===================================================================
--- Extractor/AUTHORS   2005-09-22 08:08:33 UTC (rev 2137)
+++ Extractor/AUTHORS   2005-09-23 03:56:15 UTC (rev 2138)
@@ -21,7 +21,7 @@
 avi       - core team based in part on code from avinfo 1.0.0 alpha 11 and 
bitcollider 0.6.0
 mpeg      - core team based in part on code from avinfo 1.0.0 alpha 11 and 
bitcollider 0.6.0
 ole2      - core team based on code from libgsf
-tar       - core team
+tar       - core team and Ronan MELENNEC <address@hidden>
 tar.gz    - core team using zlib
 deb       - core team using zlib
 man       - core team using zlib (for man.gz)

Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2005-09-22 08:08:33 UTC (rev 2137)
+++ Extractor/ChangeLog 2005-09-23 03:56:15 UTC (rev 2138)
@@ -1,3 +1,10 @@
+Thu Sep 22 21:05:53 PDT 2005
+       Improved TAR extractor:
+       - it now accepts old-style (UNIX V7) archives
+       - it produces a mimetype for old-style archives
+       - it outputs the file names in the same order as in the TAR file
+       - its end-of-file mark detection is more robust
+
 Wed Sep 21 13:54:19 PDT 2005
        Added Irish translation.
 

Modified: Extractor/po/de.po
===================================================================
--- Extractor/po/de.po  2005-09-22 08:08:33 UTC (rev 2137)
+++ Extractor/po/de.po  2005-09-23 03:56:15 UTC (rev 2138)
@@ -7,10 +7,10 @@
 #
 msgid ""
 msgstr ""
-"Project-Id-Version: libextractor 0.5.6\n"
+"Project-Id-Version: libextractor 0.5.6a\n"
 "Report-Msgid-Bugs-To: address@hidden"
 "POT-Creation-Date: 2005-09-20 23:59-0700\n"
-"PO-Revision-Date: 2005-09-21 07:27+0200\n"
+"PO-Revision-Date: 2005-09-22 10:07+0200\n"
 "Last-Translator: Karl Eichwalder <address@hidden>\n"
 "Language-Team: German <address@hidden>\n"
 "MIME-Version: 1.0\n"
@@ -21,7 +21,7 @@
 #: src/plugins/language/language-compiler.c:37
 #, c-format
 msgid "Please provide a list of klp files as arguments.\n"
-msgstr ""
+msgstr "Geben Sie eine Liste der klp-Dateien als Argument an.\n"
 
 #: src/plugins/language/language-compiler.c:48
 #: src/plugins/printable/dictionary-builder.c:113
@@ -228,9 +228,8 @@
 msgstr "Punk"
 
 #: src/plugins/mp3extractor.c:73
-#, fuzzy
 msgid "Soundtrack"
-msgstr "Sonate"
+msgstr "Filmmusik (Soundtrack)"
 
 #: src/plugins/mp3extractor.c:74
 #, fuzzy
@@ -272,7 +271,7 @@
 
 #: src/plugins/mp3extractor.c:83
 msgid "Acid"
-msgstr ""
+msgstr "Acid"
 
 #: src/plugins/mp3extractor.c:84
 msgid "House"
@@ -390,9 +389,8 @@
 msgstr ""
 
 #: src/plugins/mp3extractor.c:111
-#, fuzzy
 msgid "Pop/Funk"
-msgstr "Punk"
+msgstr "Pop/Funk"
 
 #: src/plugins/mp3extractor.c:112
 msgid "Jungle"
@@ -415,9 +413,8 @@
 msgstr ""
 
 #: src/plugins/mp3extractor.c:117
-#, fuzzy
 msgid "Rave"
-msgstr "Reggae"
+msgstr "Rave"
 
 #: src/plugins/mp3extractor.c:118
 msgid "Showtunes"
@@ -446,27 +443,24 @@
 msgstr "Jazz"
 
 #: src/plugins/mp3extractor.c:124
-#, fuzzy
 msgid "Polka"
-msgstr "Folk"
+msgstr "Polka"
 
 #: src/plugins/mp3extractor.c:125
 msgid "Retro"
 msgstr ""
 
 #: src/plugins/mp3extractor.c:126
-#, fuzzy
 msgid "Musical"
-msgstr "Klassik"
+msgstr "Musical"
 
 #: src/plugins/mp3extractor.c:127
 msgid "Rock & Roll"
 msgstr "Rock & Roll"
 
 #: src/plugins/mp3extractor.c:128
-#, fuzzy
 msgid "Hard Rock"
-msgstr "Rock"
+msgstr "Hard Rock"
 
 #: src/plugins/mp3extractor.c:129
 msgid "Folk"
@@ -491,12 +485,11 @@
 
 #: src/plugins/mp3extractor.c:134
 msgid "Bebob"
-msgstr ""
+msgstr "Bebob"
 
 #: src/plugins/mp3extractor.c:135
-#, fuzzy
 msgid "Latin"
-msgstr "Ort"
+msgstr "Latin"
 
 #: src/plugins/mp3extractor.c:136
 msgid "Revival"
@@ -513,7 +506,7 @@
 
 #: src/plugins/mp3extractor.c:139
 msgid "Avantgarde"
-msgstr ""
+msgstr "Avantgarde"
 
 #: src/plugins/mp3extractor.c:140
 #, fuzzy
@@ -540,11 +533,11 @@
 
 #: src/plugins/mp3extractor.c:145
 msgid "Big Band"
-msgstr ""
+msgstr "Big Band"
 
 #: src/plugins/mp3extractor.c:146
 msgid "Chorus"
-msgstr ""
+msgstr "Chor"
 
 #: src/plugins/mp3extractor.c:147
 msgid "Easy Listening"
@@ -595,9 +588,8 @@
 msgstr ""
 
 #: src/plugins/mp3extractor.c:159
-#, fuzzy
 msgid "Satire"
-msgstr "Datum"
+msgstr "Satire"
 
 #: src/plugins/mp3extractor.c:160
 msgid "Slow Jam"
@@ -616,9 +608,8 @@
 msgstr "Samba"
 
 #: src/plugins/mp3extractor.c:164
-#, fuzzy
 msgid "Folklore"
-msgstr "Folk"
+msgstr "Folklore"
 
 #: src/plugins/mp3extractor.c:165
 msgid "Ballad"
@@ -704,7 +695,7 @@
 
 #: src/plugins/mp3extractor.c:184
 msgid "Beat"
-msgstr ""
+msgstr "Beat"
 
 #: src/plugins/mp3extractor.c:185
 msgid "Christian Gangsta Rap"
@@ -729,9 +720,8 @@
 msgstr ""
 
 #: src/plugins/mp3extractor.c:190
-#, fuzzy
 msgid "Christian Rock"
-msgstr "Klassischer Rock"
+msgstr "Christlicher Rock"
 
 #: src/plugins/mp3extractor.c:191
 msgid "Merengue"
@@ -830,8 +820,7 @@
 
 #: src/main/extract.c:52
 #, c-format
-msgid ""
-"Arguments mandatory for long options are also mandatory for short options.\n"
+msgid "Arguments mandatory for long options are also mandatory for short 
options.\n"
 msgstr ""
 "Argumente, die für lange Optionen notwendig sind, sind ebenfalls für die\n"
 "Optionen in Kurzform notwendig.\n"
@@ -845,12 +834,8 @@
 msgstr "Ausgabe im BibTeX format"
 
 #: src/main/extract.c:130
-msgid ""
-"use the generic plaintext extractor for the language with the 2-letter "
-"language code LANG"
-msgstr ""
-"generischen Klartext-extractor für die Sprache mit dem 2-Buchstabenkürzel "
-"LANG verwenden"
+msgid "use the generic plaintext extractor for the language with the 2-letter 
language code LANG"
+msgstr "generischen Klartext-extractor für die Sprache mit dem 
2-Buchstabenkürzel LANG verwenden"
 
 #: src/main/extract.c:132
 msgid "remove duplicates only if types match"
@@ -858,9 +843,7 @@
 
 #: src/main/extract.c:134
 msgid "use the filename as a keyword (loads filename-extractor plugin)"
-msgstr ""
-"Dateinamen als Schlüsselwort verwenden (filename-extractor-Erweiterung wird "
-"geladen)"
+msgstr "Dateinamen als Schlüsselwort verwenden (filename-extractor-Erweiterung 
wird geladen)"
 
 #: src/main/extract.c:136
 msgid "print this help"
@@ -868,8 +851,7 @@
 
 #: src/main/extract.c:138
 msgid "compute hash using the given ALGORITHM (currently sha1 or md5)"
-msgstr ""
-"Hash gemäß dem angegebenen ALGORITHMUS errechnen (z.Zt. »sha1« oder »md5«)"
+msgstr "Hash gemäß dem angegebenen ALGORITHMUS errechnen (z.Zt. »sha1« oder 
»md5«)"
 
 #: src/main/extract.c:140
 msgid "load an extractor plugin named LIBRARY"
@@ -885,9 +867,7 @@
 
 #: src/main/extract.c:146
 msgid "print only keywords of the given TYPE (use -L to get a list)"
-msgstr ""
-"nur Schlüsselwörter einer bestimmten ART ausgeben (mit -L die Liste anzeigen "
-"lassen)"
+msgstr "nur Schlüsselwörter einer bestimmten ART ausgeben (mit -L die Liste 
anzeigen lassen)"
 
 #: src/main/extract.c:148
 msgid "remove duplicates even if keyword types do not match"
@@ -976,11 +956,9 @@
 msgstr "Seitenanzahl"
 
 #: src/main/extract.c:473
-#, fuzzy, c-format
+#, c-format
 msgid "You must specify an argument for the `%s' option (option ignored).\n"
-msgstr ""
-"Sie müssen ein Argument für die Option »%s« angeben (Option wird "
-"ignoriert).\n"
+msgstr "Sie müssen ein Argument für die Option »%s« angeben (Option wird 
ignoriert).\n"
 
 #: src/main/extract.c:540
 #, c-format
@@ -1239,9 +1217,8 @@
 msgstr ""
 
 #: src/main/extractor.c:115
-#, fuzzy
 msgid "publication date"
-msgstr "Datum der Erstellung"
+msgstr "Datum der Veröffentlichung"
 
 #: src/main/extractor.c:116
 msgid "camera make"
@@ -1265,7 +1242,7 @@
 
 #: src/main/extractor.c:121
 msgid "flash"
-msgstr ""
+msgstr "Blitz"
 
 #: src/main/extractor.c:122
 msgid "flash bias"
@@ -1297,7 +1274,7 @@
 
 #: src/main/extractor.c:129
 msgid "image quality"
-msgstr ""
+msgstr "Bildqualität"
 
 #: src/main/extractor.c:130
 msgid "white balance"
@@ -1315,20 +1292,18 @@
 
 #: src/main/extractor.c:372
 #, c-format
-msgid ""
-"Resolving symbol `%s' in library `%s' failed, so I tried `%s', but that "
-"failed also.  Errors are: `%s' and `%s'.\n"
+msgid "Resolving symbol `%s' in library `%s' failed, so I tried `%s', but that 
failed also.  Errors are: `%s' and `%s'.\n"
 msgstr ""
 
 #: src/main/extractor.c:401
-#, fuzzy, c-format
+#, c-format
 msgid "Loading `%s' plugin failed: %s\n"
 msgstr "Laden des »%s«-Plugins ist fehlgeschlagen: %s\n"
 
 #: src/main/extractor.c:606
-#, fuzzy, c-format
+#, c-format
 msgid "Unloading plugin `%s' failed!\n"
-msgstr "Entladen des »%s«-Erweiterung ist fehlgeschlagen.\n"
+msgstr "Entladen des »%s«-Plugins ist fehlgeschlagen!\n"
 
 #~ msgid "Fatal: could not allocate (%s at %s:%d).\n"
 #~ msgstr "Fatal: Allokieren nicht möglich (%s bei %s:%d).\n"

Modified: Extractor/src/plugins/tarextractor.c
===================================================================
--- Extractor/src/plugins/tarextractor.c        2005-09-22 08:08:33 UTC (rev 
2137)
+++ Extractor/src/plugins/tarextractor.c        2005-09-23 03:56:15 UTC (rev 
2138)
@@ -20,43 +20,42 @@
 
 #include "platform.h"
 #include "extractor.h"
-#include <zlib.h>
 
 /*
  * Note that this code is not complete!
+ * It will not report correct results for very long member filenames
+ * (> 99 octets) when the archive was made with GNU tar or Solaris tar.
  *
  * References:
  * http://www.mkssoftware.com/docs/man4/tar.4.asp
+ * (does document USTAR format common nowadays,
+ *  but not other extended formats such as the one produced
+ *  by GNU tar 1.13 when very long filenames are met.)
  */
 
-
-static EXTRACTOR_KeywordList * addKeyword(EXTRACTOR_KeywordType type,
-                                         char * keyword,
-                                         EXTRACTOR_KeywordList * next) {
+static EXTRACTOR_KeywordList * appendKeyword(EXTRACTOR_KeywordType type,
+                                            char * keyword,
+                                            EXTRACTOR_KeywordList * last) {
   EXTRACTOR_KeywordList * result;
 
+  if ( (last != NULL) &&
+       (last->next != NULL) )
+    abort();
   if (keyword == NULL)
-    return next;
+    return last;
   if (strlen(keyword) == 0) {
     free(keyword);
-    return next;
+    return last;
   }
   result = malloc(sizeof(EXTRACTOR_KeywordList));
-  result->next = next;
+  result->next = last;
+  result->keywordType = type;
   result->keyword = keyword;
-  result->keywordType = type;
+  if (last != NULL)
+    last->next = result;
   return result;
 }
 
-static char * stndup(const char * str,
-                     size_t n) {
-  char * tmp;
-  tmp = malloc(n+1);
-  tmp[n] = '\0';
-  memcpy(tmp, str, n);
-  return tmp;
-}
-
 typedef struct {
   char name[100];
   char mode[8];
@@ -86,53 +85,124 @@
                         const char * data,
                         size_t size,
                         struct EXTRACTOR_Keywords * prev) {
-  TarHeader * tar;
-  USTarHeader * ustar;
+  const TarHeader * tar;
+  const USTarHeader * ustar;
   size_t pos;
+  const char * mimetype = NULL;
+  struct EXTRACTOR_Keywords * last;
+  
+  last = prev;
+  if (last != NULL)
+    while (last->next != NULL)
+      last = last->next;
 
   if (0 != (size % 512) )
     return prev; /* cannot be tar! */
   if (size < 1024)
-    return prev;
-  size -= 1024; /* last 2 blocks are all zeros */
-  /* fixme: we may want to check that the last
-     1024 bytes are all zeros here... */
+    return prev; /* too short, or somehow truncated */
 
   pos = 0;
   while (pos + sizeof(TarHeader) < size) {
     unsigned long long fsize;
     char buf[13];
+    const char * nul_pos;
+    const char * ustar_prefix = NULL;
+    unsigned int ustar_prefix_length = 0;
+    unsigned int tar_name_length;
+    unsigned int zeropos;
+    int header_is_empty = 1;
 
-    tar = (TarHeader*) &data[pos];
+    if (pos + 1024 < size) {
+      const int * idata = (const int*) data;
+      for (zeropos = 0; zeropos < 1024 / sizeof(int); zeropos++) {
+       if(0 != idata[zeropos]) {
+         header_is_empty = 0;
+         break;
+       }
+      }
+    }
+
+    if (header_is_empty) /* assume the EOF mark was reached */
+      break;
+
+    tar = (const TarHeader*) &data[pos];
     /* fixme: we may want to check the header checksum here... */
+    /* fixme: we attempt to follow MKS document for long file names,
+       but no TAR file was found yet which matched what we understood ! */
     if (pos + sizeof(USTarHeader) < size) {
-      ustar = (USTarHeader*) &data[pos];
+
+      nul_pos = memchr(data + pos, 0, sizeof tar->name);
+      tar_name_length = (0 == nul_pos)
+                     ? sizeof(tar->name)
+                      : (nul_pos - (data + pos));
+
+      ustar = (const USTarHeader*) &data[pos];
+
+      if(0 == mimetype) {
+        if(0 == memcmp(ustar->magic, "ustar  ", 7))
+          mimetype = "application/x-gtar";
+        else
+          mimetype = "application/x-tar";
+      }
+
       if (0 == strncmp("ustar",
-                      &ustar->magic[0],
-                      strlen("ustar")))
-       pos += 512; /* sizeof(USTarHeader); */
-      else
-       pos += 257; /* sizeof(TarHeader); minus gcc alignment... */
+                       &ustar->magic[0],
+                       strlen("ustar"))) {
+        if(0 != *ustar->prefix) {
+           nul_pos = memchr(ustar->prefix, 0, sizeof ustar->prefix);
+
+           ustar_prefix_length = (0 == nul_pos)
+                               ? sizeof ustar->prefix
+                               : nul_pos - ustar->prefix;
+           ustar_prefix = ustar->prefix;
+        }
+      }
+
+      pos += 512; /* V7 Tar, USTar and GNU Tar usual headers take 512 octets */
     } else {
       pos += 257; /* sizeof(TarHeader); minus gcc alignment... */
     }
     memcpy(buf, &tar->filesize[0], 12);
     buf[12] = '\0';
     if (1 != sscanf(buf, "%12llo", &fsize)) /* octal! Yuck yuck! */
-      return prev;
+      break;
     if ( (pos + fsize > size) ||
         (fsize > size) ||
         (pos + fsize < pos) )
-      return prev;
-    prev = addKeyword(EXTRACTOR_FILENAME,
-                     stndup(&tar->name[0],
-                            100),
-                     prev);
+      break;
+
+    if (0 < ustar_prefix_length + tar_name_length) {
+      char * fname = malloc(1 + ustar_prefix_length + tar_name_length);
+
+      if(0 != fname) {
+         if(0 < ustar_prefix_length)
+           memcpy(fname, ustar_prefix, ustar_prefix_length);
+         if(0 < tar_name_length)
+           memcpy(fname + ustar_prefix_length, tar->name, tar_name_length);
+         fname[ustar_prefix_length + tar_name_length]= '\0';
+         last = appendKeyword(EXTRACTOR_FILENAME, fname, last);
+        if (prev == NULL)
+          prev = last;
+      }
+    }
+
     if ( (fsize & 511) != 0)
       fsize = (fsize | 511)+1; /* round up! */
     if (pos + fsize < pos)
-      return prev;
+      break;
     pos += fsize;
   }
+
+  /*
+   * a simple guard would be to clobber mimetype to NULL
+   * whenever something bad happens while reading
+   * (check break instructions just above).
+   */
+  if (NULL != mimetype) {
+    last = appendKeyword(EXTRACTOR_MIMETYPE, strdup(mimetype), last);
+    if (prev == NULL)
+      prev = last;
+  }
+
   return prev;
 }





reply via email to

[Prev in Thread] Current Thread [Next in Thread]