gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r22814 - Extractor/src/main


From: gnunet
Subject: [GNUnet-SVN] r22814 - Extractor/src/main
Date: Sun, 22 Jul 2012 11:38:54 +0200

Author: grothoff
Date: 2012-07-22 11:38:54 +0200 (Sun, 22 Jul 2012)
New Revision: 22814

Modified:
   Extractor/src/main/extractor_datasource.c
   Extractor/src/main/extractor_datasource.h
Log:
datasource hacking


Modified: Extractor/src/main/extractor_datasource.c
===================================================================
--- Extractor/src/main/extractor_datasource.c   2012-07-21 22:34:27 UTC (rev 
22813)
+++ Extractor/src/main/extractor_datasource.c   2012-07-22 09:38:54 UTC (rev 
22814)
@@ -22,12 +22,24 @@
 
 #if HAVE_LIBBZ2
 #include <bzlib.h>
+#define MIN_BZ2_HEADER 4
+#ifndef MIN_COMPRESSED_HEADER
+#define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER
 #endif
+#endif
 
 #if HAVE_ZLIB
 #include <zlib.h>
+#define MIN_ZLIB_HEADER 12
+#ifndef MIN_COMPRESSED_HEADER
+#define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER
 #endif
+#endif
 
+#ifndef MIN_COMPRESSED_HEADER
+#define MIN_COMPRESSED_HEADER -1
+#endif
+
 #ifndef O_LARGEFILE
 #define O_LARGEFILE 0
 #endif
@@ -37,25 +49,13 @@
  */
 #define MAX_READ (4 * 1024 * 1024)
 
+/**
+ * Data is read from the source and shoved into decompressor
+ * in chunks this big.
+ */
+#define COM_CHUNK_SIZE (10 * 1024)
 
-#if HAVE_ZLIB
-#define MIN_ZLIB_HEADER 12
-#endif
-#if HAVE_LIBBZ2
-#define MIN_BZ2_HEADER 4
-#endif
-#if !defined (MIN_COMPRESSED_HEADER) && HAVE_ZLIB
-#define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER
-#endif
-#if !defined (MIN_COMPRESSED_HEADER) && HAVE_LIBBZ2
-#define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER
-#endif
-#if !defined (MIN_COMPRESSED_HEADER)
-#define MIN_COMPRESSED_HEADER -1
-#endif
 
-#define COMPRESSED_DATA_PROBE_SIZE 3
-
 /**
  * Enum with the various possible types of compression supported.
  */ 
@@ -106,17 +106,17 @@
   uint64_t fsize;
 
   /**
-   * Position within the file or the data buffer
+   * Position of the buffer in the file.
    */ 
   uint64_t fpos;
 
   /**
-    * Position within the buffer.
+   * Position within the buffer.
    */ 
   uint64_t buffer_pos;
 
   /**
-   * Number of bytes in the buffer (<= buffer_size)
+   * Number of valid bytes in the buffer (<= buffer_size)
    */ 
   uint64_t buffer_bytes;
 
@@ -144,12 +144,22 @@
   struct BufferedFileDataSource *bfds;
 
   /**
+   * Decompression target buffer.
+   */
+  char result[COM_CHUNK_SIZE];
+
+  /** 
+   * At which offset in 'result' is 'fpos'?
+   */
+  size_t result_pos;
+
+  /**
    * Size of the source (same as bfds->fsize)
    */ 
   int64_t fsize;
 
   /**
-   * Position within the source
+   * Position within the (decompressed) source
    */ 
   int64_t fpos;
 
@@ -206,7 +216,7 @@
     return -1; /* invalid */
   if (NULL == bfds->buffer)
     {
-      bfds->buffer_bytes = bfds->fsize;
+      bfds->buffer_pos = pos;
       return 0;
     }
 #if WINDOWS
@@ -219,6 +229,7 @@
   if (position < 0)
     return -1;
   bfds->fpos = position;
+  bfds->buffer_pos = 0;
   rd = read (bfds->fd, bfds->buffer, bfds->buffer_size);
   if (rd < 0)
     return -1;
@@ -258,7 +269,7 @@
   memset (result, 0, sizeof (struct BufferedFileDataSource));
   result->data = (NULL != data) ? data : &result[1];
   result->buffer = (NULL != data) ? NULL : &result[1];
-  result->buffer_size = (NULL != data) ? fsize : xtra;
+  result->buffer_size = (NULL != data) ? fsize : xtra; 
   result->fsize = fsize;
   result->fd = fd;
   bfds_pick_next_buffer_at (result, 0);
@@ -297,86 +308,95 @@
   switch (whence)
     {
     case SEEK_CUR:
-      if (NULL != bfds->buffer)
+      if (bfds->fpos + bfds->buffer_pos + pos < 0)
+       return -1;
+      if (bfds->fpos + bfds->buffer_pos + pos > bfds->fsize)
+       return -1;
+      if ( (NULL == bfds->buffer) ||
+          ( (bfds->buffer_pos + pos < pos->buffer_bytes) &&
+            (bfds->buffer_pos + pos >= 0) ) )
        {
-         if (0 != bfds_pick_next_buffer_at (bfds, 
-                                            bfds->fpos + bfds->buffer_pos + 
pos))
-           return -1;
-         bfds->buffer_pos = 0;
-         return bfds->fpos;
+         bfds->buffer_pos += pos; 
+         return bfds->buffer_pos;
        }
-      bfds->buffer_pos += pos; 
-      return bfds->buffer_pos;
+      if (0 != bfds_pick_next_buffer_at (bfds, 
+                                        bfds->fpos + bfds->buffer_pos + pos))
+       return -1;
+      return bfds->fpos;
+    case SEEK_END:
+      if (pos > 0)
+       return -1;
+      if (bfds->fsize < - pos)
+       return -1;
+      pos = bfds->fsize + pos;
+      /* fall-through! */
     case SEEK_SET:
       if (pos < 0)
        return -1;
-      if (NULL != bfds->buffer)
+      if (pos > bfds->fsize)
+       return -1;
+      if ( (NULL == bfds->buffer) ||
+          ( (bfds->buffer_pos <= pos) &&
+            (bfds->buffer_pos + pos->buffer_bytes > pos) ) )
        {
-         if (0 != bfds_pick_next_buffer_at (bfds, pos))
-           return -1;
-         bfds->buffer_pos = 0;
-         return bfds->fpos;
+         bfds->buffer_pos = pos; 
+         return bfds->buffer_pos;
        }
-      bfds->buffer_pos = pos; 
-      return bfds->buffer_pos;
-    case SEEK_END:
-      if (NULL != bfds->buffer)
-       {
-         if (0 != bfds_pick_next_buffer_at (bfds, bfds->fsize + pos))
-           return -1;
-         bfds->buffer_pos = 0;
-         return bfds->fpos;
-       }
-      bfds->buffer_pos = bfds->fsize + pos; 
-      return bfds->buffer_pos;
+      if (0 != bfds_pick_next_buffer_at (bfds, pos))
+       return -1;
+      return bfds->fpos;
     }
   return -1;
 }
 
 
 /**
- * Fills 'buf_ptr' with a chunk of data.
- * Will seek if necessary. Will fail if 'count' exceeds buffer size.
+ * Fills 'buf_ptr' with a chunk of data. Will
+ * fail if 'count' exceeds buffer size.
  *
  * @param bfds bfds
  * @param buf_ptr location to store data 
  * @param count number of bytes to read
- * @return number of bytes (<= count) available at location pointed by buf_ptr
+ * @return number of bytes (<= count) available at location pointed by buf_ptr,
+ *         0 for end of stream, -1 on error
  */ 
 static ssize_t
 bfds_read (struct BufferedFileDataSource *bfds, 
           void *buf_ptr, 
           size_t count)
 {
-  if (count > MAX_READ)
-    return -1;
-  if (count > bfds->buffer_bytes - bfds->buffer_pos)
+  char *cbuf = buf_ptr;
+  uint64_t old_off;
+  size_t avail;
+  size_t ret;
+
+  old_off = bfds->fpos + bfds->buffer_pos + bfds->buffer_bytes; 
+  if (old_off == bfds->fsize)
+    return 0; /* end of stream */
+  ret = 0;
+  while (count > 0)
     {
-      if (bfds->fpos + bfds->buffer_pos != bfds_seek (bfds, bfds->fpos + 
bfds->buffer_pos, SEEK_SET))
-       return -1;
-      if (NULL != bfds->buffer)
+      if ( (bfds->buffer_bytes == bfds->buffer_pos) &&
+          (0 != bfds_pick_next_buffer_at (bfds, 
+                                          bfds->fpos + bfds->buffer_pos + 
bfds->buffer_bytes)) )
        {
-         *buf_ptr = &bfds->buffer[bfds->buffer_pos];
-         bfds->buffer_pos += count < bfds->buffer_bytes ? count : 
bfds->buffer_bytes;
-         return (count < bfds->buffer_bytes ? count : bfds->buffer_bytes);
+         /* revert to original position, invalidate buffer */
+         bfds->fpos = old_off;
+         bfds->buffer_bytes = 0;
+         bfds->buffer_pos = 0;
+         return -1; /* getting more failed */
        }
-      else
-       {
-         int64_t ret = count < (bfds->buffer_bytes - bfds->buffer_pos) ? count 
: (bfds->buffer_bytes - bfds->buffer_pos);
-         *buf_ptr = (unsigned char*) &bfds->data[bfds->buffer_pos];
-         bfds->buffer_pos += ret;
-         return ret;
-       }
+      avail = bfds->buffer_bytes - bfds->buffer_pos;
+      if (avail > count)
+       avail = count;
+      if (0 == avail) 
+       abort (); /* must not happen */
+      memcpy (&cbuf[ret], &bfds->data[bfds->buffer_pos], avail);
+      bfds->buffer_pos += avail;
+      count -= avail;
+      ret += avail;
     }
-  else
-    {
-      if (NULL != bfds->buffer)
-       *buf_ptr = &bfds->buffer[bfds->buffer_pos];
-      else
-       *buf_ptr = (unsigned char*) &bfds->data[bfds->buffer_pos];
-      bfds->buffer_pos += count;
-      return count;
-    }
+  return ret;
 }
 
 
@@ -395,7 +415,7 @@
 /**
  * Reset gz-compressed data stream to the beginning.
  *
- * @return 1 on success, 0 if we failed to seek,
+ * @return 1 on success, 0 to terminate extraction,
  *        -1 on decompressor initialization failure
  */ 
 static int
@@ -422,13 +442,13 @@
 #ifdef ZLIB_VERNUM
       15 + 32
 #else
-      -MAX_WBITS
+      - MAX_WBITS
 #endif
       ))
     {
       return -1;
     }
-  cfs->fpos = cfs->gzip_header_length;
+  cfs->fpos = 0;
   cfs->shm_pos = 0;
   cfs->shm_buf_size = 0;
   return 1;
@@ -438,7 +458,7 @@
 /**
  * Reset bz2-compressed data stream to the beginning.
  *
- * @return 1 on success, 0 if we failed to seek,
+ * @return 1 on success, 0 to terminate extraction,
  *        -1 on decompressor initialization failure
  */ 
 static int
@@ -455,21 +475,21 @@
  * seeking backward.
  *
  * @param cfs cfs to reset
- * @return 1 on success, , 0 if we failed to seek,
+ * @return 1 on success, 0 to terminate extraction,
  *        -1 on error
  */
 static int
 cfs_reset_stream (struct CompressedFileSource *cfs)
 {
   switch (cfs->compression_type)
-  {
-  case COMP_TYPE_ZLIB:
-    return cfs_reset_stream_zlib (cfs);
-  case COMP_TYPE_BZ2:
-    return cfs_reset_stream_bz2 (cfs);
-  default:
-    return -1;
-  }
+    {
+    case COMP_TYPE_ZLIB:
+      return cfs_reset_stream_zlib (cfs);
+    case COMP_TYPE_BZ2:
+      return cfs_reset_stream_bz2 (cfs);
+    default:
+      return -1;
+    }
 }
 
 
@@ -480,98 +500,75 @@
  * @param cfs cfs to initialize
  * @param proc callback for metadata
  * @param proc_cls callback cls
- * @return 1 on success, -1 on error
+ * @return 1 on success, 0 to terminate extraction, -1 on error
  */
 static int
 cfs_init_decompressor_zlib (struct CompressedFileSource *cfs,
                            EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 {
-  /* Process gzip header */
   unsigned int gzip_header_length = 10;
-  unsigned char data[12];
-  int64_t buf_bytes;
-  int len;
-  unsigned char *buf;
-  unsigned char *cptr;
-  
-  if (sizeof (data) > bfds_read (cfs->bfds, data, sizeof (data)))
+  unsigned char hdata[12];
+
+  /* Process gzip header */  
+  if (sizeof (hdata) > bfds_read (cfs->bfds, hdata, sizeof (hdata)))
     return -1;
-  
-  if (0 != (data[3] & 0x4)) /* FEXTRA  set */
-    gzip_header_length += 2 + (unsigned) (data[10] & 0xff) +
-      (((unsigned) (data[11] & 0xff)) * 256);
+  if (0 != (hdata[3] & 0x4)) /* FEXTRA  set */
+    gzip_header_length += 2 + (unsigned) (hdata[10] & 0xff) +
+      (((unsigned) (hdata[11] & 0xff)) * 256);
 
-  if (0 != (data[3] & 0x8)) /* FNAME set */
-  {
-    if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, 
SEEK_SET))
-      return -1;
-    buf_bytes = bfds_read (cfs->bfds, &buf, 1024);
-    if (buf_bytes <= 0)
-      return -1;
-    cptr = buf;
-
-    len = 0;
-    /* stored file name is here */
-    while (len < buf_bytes)
+  if (0 != (hdata[3] & 0x8)) 
     {
-      if ('\0' == *cptr)
-      break;
-      cptr++;
-      len++;
+      /* FNAME set */
+      char fname[1024];
+      char *cptr;
+      size_t len;
+      ssize_t buf_bytes;
+      
+      if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, 
SEEK_SET))
+       return -1;
+      buf_bytes = bfds_read (cfs->bfds, fname, sizeof (fname));
+      if (buf_bytes <= 0)
+       return -1;
+      if (NULL == (cptr = memchr (fname, 0, buf_bytes)))
+       return -1;
+      len = cptr - fname;
+      if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
+                    EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+                    fname,
+                    len))
+       return 0; /* done */
+      gzip_header_length += len + 1;
     }
-
-    if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
-                  EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
-                  (const char *) buf,
-                  len))
-      return 0; /* done */
-
-    /* FIXME: check for correctness */
-    //gzip_header_length = (cptr - data) + 1;
-    gzip_header_length += len + 1;
-  }
-
-  if (0 != (data[3] & 0x16)) /* FCOMMENT set */
-  {
-    int64_t buf_bytes;
-    int len;
-    unsigned char *buf;
-    unsigned char *cptr;
-
-    if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, 
SEEK_SET))
-      return -1;
-    buf_bytes = bfds_read (cfs->bfds, &buf, 1024);
-    if (buf_bytes <= 0)
-      return -1;
-    cptr = buf;
-
-    len = 0;
-    /* stored file name is here */
-    while (len < buf_bytes)
-      {
-       if ('\0' == *cptr)
-         break;
-       cptr++;
-       len++;
-      }
-    
-    if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
-                  EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
-                  (const char *) buf,
-                  len))
-      return 0; /* done */
-
-    /* FIXME: check for correctness */
-    //gzip_header_length = (cptr - data) + 1;
-    gzip_header_length += len + 1;
-  }
-
-  if (data[3] & 0x2) /* FCHRC set */
+  
+  if (0 != (hdata[3] & 0x16)) 
+    {
+      /* FCOMMENT set */
+      char fcomment[1024];
+      char *cptr;
+      ssize_t buf_bytes;
+      size_t len;
+      
+      if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, 
SEEK_SET))
+       return -1;
+      buf_bytes = bfds_read (cfs->bfds, fcomment, sizeof (fcomment));
+      if (buf_bytes <= 0)
+       return -1;
+      if (NULL == (cptr = memchr (fcomment, 0, buf_bytes)))
+       return -1;
+      len = cptr - fcomment;
+      if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
+                    EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+                    (const char *) fcomment,
+                    len))
+       return 0; /* done */
+      gzip_header_length += len + 1;
+    }
+  if (0 != (hdata[3] & 0x2)) /* FCHRC set */
     gzip_header_length += 2;
-
   memset (&cfs->strm, 0, sizeof (z_stream));
-
+  
 #ifdef ZLIB_VERNUM
+  /* zlib will take care of its header */
   gzip_header_length = 0;
 #endif
 
@@ -604,7 +601,7 @@
  * @param cfs cfs to initialize
  * @param proc callback for metadata
  * @param proc_cls callback cls
- * @return 1 on success, -1 on error
+ * @return 1 on success, 0 to terminate extraction, -1 on error
  */
 static int
 cfs_init_decompressor (struct CompressedFileSource *cfs, 
@@ -671,12 +668,25 @@
 
 
 /**
+ * Destroy compressed file source.
+ *
+ * @param cfs source to destroy
+ */
+static void
+cfs_destroy (struct CompressedFileSource *cfs)
+{
+  cfs_deinit_decompressor (cfs);
+  free (cfs);
+}
+
+
+/**
  * Allocates and initializes new cfs object.
  *
  * @param bfds data source to use
  * @param fsize size of the source
  * @param compression_type type of compression used
- * @param proc metadata callback
+ * @param proc metadata callback to call with meta data found upon opening
  * @param proc_cls callback cls
  * @return newly allocated cfs on success, NULL on error
  */
@@ -686,7 +696,6 @@
         enum ExtractorCompressionType compression_type, 
         EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
 {
-  int shm_result;
   struct CompressedFileSource *cfs;
 
   if (NULL == (cfs = malloc (sizeof (struct CompressedFileSource))))
@@ -696,93 +705,118 @@
   cfs->bfds = bfds;
   cfs->fsize = fsize;
   cfs->uncompressed_size = -1;
+  if (1 != cfs_init_decompressor (cfs,
+                                 proc, proc_cls))
+    {
+      free (cfs);
+      return NULL;
+    }
   return cfs;
 }
 
 
 /**
- * Data is read from the source and shoved into decompressor
- * in chunks this big.
- */
-#define COM_CHUNK_SIZE (10*1024)
-
-
-/**
- * Re-fills shm with new uncompressed data, preserving the last
- * 'preserve' bytes of existing data as the first 'preserve' bytes
- * of the new data.
- * Does the actual decompression. Will set uncompressed_size on
- * the end of compressed stream.
+ * Fills 'data' with new uncompressed data.  Does the actual
+ * decompression. Will set uncompressed_size on the end of compressed
+ * stream.
  *
  * @param cfds cfs to read from
- * @param preserve number of bytes to preserve (0 to discard all old data)
- * @return number of bytes in shm. 0 if no more data can be uncompressed, -1 
on error
+ * @param data where to copy the data
+ * @param size number of bytes available in data
+ * @return number of bytes in data. 0 if no more data can be uncompressed, -1 
on error
  */
-static int
-cfs_read_zlib (struct CompressedFileSource *cfs, int64_t preserve)
+static ssize_t
+cfs_read_zlib (struct CompressedFileSource *cfs, 
+              void *data,
+              size_t size)
 {
+  char *dst = data;
   int ret;
-  int64_t rc = preserve;
-  int64_t total = cfs->strm.total_out;
+  size_t rc;
+  ssize_t in;
+  char buf[COM_CHUNK_SIZE];
 
-  if (preserve > 0)
-    memmove (cfs->shm_ptr, &((unsigned char *)cfs->shm_ptr)[0], preserve);
-
-  while (rc < cfs->shm_size && ret != Z_STREAM_END)
-  {
-    if (cfs->strm.avail_in == 0)
+  if (cfs->fpos == cfs->uncompressed_size)
+    return 0;
+  rc = 0;
+  if (strm.avail_out > 0)
     {
-      int64_t count = bfds_read (cfs->bfds, &cfs->strm.next_in, 
COM_CHUNK_SIZE);
-      if (count <= 0)
-        return 0;
+      /* got left-over decompressed data from previous round! */
+      in = strm.avail_out;
+      if (in > size)
+       in = size;
+      memcpy (&dst[rc], &cfs->result[cfs->result_pos], in);
+      cfs->fpos += in;
+      cfs->result_pos += in;
+      rc += in;
+    }
+  ret = Z_OK;
+  while ( (rc < size) && (Z_STREAM_END != ret) )
+    {
+      /* read block from original data source */
+      in = bfds_read (cfs->bfds,
+                     buf, sizeof (buf));
+      if (in <= 0)
+       return -1; /* unexpected EOF */
+      cfs->strm.next_in = buf;
       cfs->strm.avail_in = (uInt) count;
+      cfs->strm.next_out = cfs->result;
+      cfs->strm.avail_out = COM_CHUNK_SIZE;
+      cfs->result_pos = 0;
+      ret = inflate (&cfs->strm, Z_SYNC_FLUSH);
+      if ( (Z_OK != ret) && (Z_STREAM_END != ret) )
+       return -1; /* unexpected error */
+      /* go backwards by the number of bytes left in the buffer */
+      if (-1 == bfds_seek (cfs->bfds, - cfs->strm.avail_in, SEEK_CUR))
+       return -1;
+      /* copy decompressed bytes to target buffer */
+      in = cfs->strm.total_out;
+      if (in > size - rc)
+       in = size - rc;
+      memcpy (&dst[rc], &cfs->result[cfs->result_pos], in);
+      cfs->fpos += in;
+      cfs->result_pos += in;
+      rc += in;
     }
-    cfs->strm.next_out = &((unsigned char *)cfs->shm_ptr)[rc];
-    cfs->strm.avail_out = cfs->shm_size - rc;
-    ret = inflate (&cfs->strm, Z_SYNC_FLUSH);
-    if (ret != Z_OK && ret != Z_STREAM_END)
-      return 0;
-    rc = cfs->strm.total_out - total;
-  }
-  if (ret == Z_STREAM_END)
-    cfs->uncompressed_size = cfs->strm.total_out;
-  cfs->shm_pos = preserve;
-  cfs->shm_buf_size = rc + preserve;
-  return 1;
+  if (Z_STREAM_END == ret)
+    cfs->uncompressed_size = cfs->fpos;
+  return rc;
 }
 
 
 /**
- * Re-fills shm with new uncompressed data, preserving the last
- * 'preserve' bytes of existing data as the first 'preserve' bytes
- * of the new data.
- * Does the actual decompression. Will set uncompressed_size on
- * the end of compressed stream.
+ * Fills 'data' with new uncompressed data.  Does the actual
+ * decompression. Will set uncompressed_size on the end of compressed
+ * stream.
  *
  * @param cfds cfs to read from
- * @param preserve number of bytes to preserve (0 to discard all old data)
- * @return number of bytes in shm. 0 if no more data can be uncompressed, -1 
on error
+ * @param data where to copy the data
+ * @param size number of bytes available in data
+ * @return number of bytes in data. 0 if no more data can be uncompressed, -1 
on error
  */
-static int
-cfs_read_bz2 (struct CompressedFileSource *cfs, int64_t preserve)
+static ssize_t
+cfs_read_bz2 (struct CompressedFileSource *cfs, 
+             void *data,
+             size_t size)
 {
   return -1;
 }
 
 
 /**
- * Re-fills shm with new uncompressed data, preserving the last
- * 'preserve' bytes of existing data as the first 'preserve' bytes
- * of the new data.
- * Does the actual decompression. Will set uncompressed_size on
- * the end of compressed stream.
+ * Fills 'data' with new uncompressed data.  Does the actual
+ * decompression. Will set uncompressed_size on the end of compressed
+ * stream.
  *
  * @param cfds cfs to read from
- * @param preserve number of bytes to preserve (0 to discard all old data)
- * @return number of bytes in shm. 0 if no more data can be uncompressed, -1 
on error
+ * @param data where to copy the data
+ * @param size number of bytes available in data
+ * @return number of bytes in data. 0 if no more data can be uncompressed, -1 
on error
  */
-static int64_t
-cfs_read (struct CompressedFileSource *cfs, int64_t preserve)
+static ssize_t
+cfs_read (struct CompressedFileSource *cfs, 
+         void *data,
+         size_t size)
 {
   switch (cfs->compression_type)
     {
@@ -801,72 +835,44 @@
  * requires seeking backwards beyond the boundaries of the buffer, resets the
  * stream and repeats decompression from the beginning to 'position'.
  *
- * @param cfds cfs to seek on
+ * @param cfs cfs to seek on
  * @param position new starting point for the buffer
  * @return new absolute buffer position, -1 on error or EOS
  */
 static int64_t
-cfs_seek_zlib (struct CompressedFileSource *cfs, int64_t position)
+cfs_seek (struct CompressedFileSource *cfs, 
+         uint64_t position)
 {
-  int64_t ret;
-
-  if (position > cfs->strm.total_out - cfs->shm_buf_size && position < 
cfs->strm.total_out)
-  {
-    ret = cfs_read (cfs, cfs->strm.total_out - position);
-    if (ret < 0)
-      return ret;
-    return position;
-  }
-  while (position >= cfs->strm.total_out)
-  {
-    if (0 > (ret = cfs_read (cfs, 0)))
-      return ret;
-    if (ret == 0)
-      return position;
-  }
-  if (position < cfs->strm.total_out && position > cfs->strm.total_out - 
cfs->shm_buf_size)
-    return cfs->strm.total_out - cfs->shm_buf_size;
-  return -1;
-}
-
-
-/**
- * Moves the buffer to 'position' in uncompressed steam. If position
- * requires seeking backwards beyond the boundaries of the buffer, resets the
- * stream and repeats decompression from the beginning to 'position'.
- *
- * @param cfds cfs to seek on
- * @param position new starting point for the buffer
- * @return new absolute buffer position, -1 on error or EOS
- */
-static int64_t
-cfs_seek_bz2 (struct CompressedFileSource *cfs, int64_t position)
-{
-  return -1;
-}
-
-
-/**
- * Moves the buffer to 'position' in uncompressed steam. If position
- * requires seeking backwards beyond the boundaries of the buffer, resets the
- * stream and repeats decompression from the beginning to 'position'.
- *
- * @param cfds cfs to seek on
- * @param position new starting point for the buffer
- * @return new absolute buffer position, -1 on error or EOS
- */
-static int64_t
-cfs_seek (struct CompressedFileSource *cfs, int64_t position)
-{
-  switch (cfs->compression_type)
+  int64_t delta;
+  
+  delta = position - cfs->fpos;
+  if (delta < 0)
     {
-    case COMP_TYPE_ZLIB:
-      return cfs_seek_zlib (cfs, position);
-    case COMP_TYPE_BZ2:
-      return cfs_seek_bz2 (cfs, position);
-    default:
-      return -1;
+      if (result_pos >= - delta)
+       {
+         result_pos += delta;
+         delta = 0;
+       }
+      else
+       {
+         if (-1 == cfs_reset_stream (cfs))
+           return -1;
+         delta = position;
+       }
     }
+  while (delta > 0)
+    {
+      char buf[COM_CHUNK_SIZE];
+      size_t max;
+      int64_t ret;
+      
+      max = (sizeof (buf) > delta) ? delta : sizeof (buf);
+      ret = cfs_read (cfs, buf, max);
+      if (-1 == ret)
+       return -1;
+      delta -= ret;      
+    }
+  return cfs->fpos;
 }
 
 
@@ -879,163 +885,239 @@
  * @return -1 to indicate an error, 0 to indicate uncompressed data, or a type 
(> 0) of compression
  */
 static enum ExtractorCompressionType
-get_compression_type (const unsigned char *data, 
-                     int fd, 
-                     int64_t fsize)
+get_compression_type (struct BufferedFileDataSource *bfds)
 {
-  void *read_data = NULL;
-  size_t read_data_size = 0;
-  ssize_t read_result;
-  enum ExtractorCompressionType result = COMP_TYPE_INVALID;
+  unsigned char read_data[3];
 
-  if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER))
-  {
+  if (0 != bfds_seek (bfds, 0, SEEK_SET))
     return COMP_TYPE_INVALID;
-  }
-  if (data == NULL)
-  {
-    int64_t position;
-    read_data_size = COMPRESSED_DATA_PROBE_SIZE;
-    read_data = malloc (read_data_size);
-    if (read_data == NULL)
-      return -1;
-#if WINDOWS
-    position = _lseeki64 (fd, 0, SEEK_CUR);
-#elif HAVE_LSEEK64
-    position = lseek64 (fd, 0, SEEK_CUR);
-#else
-    position = (int64_t) lseek (fd, 0, SEEK_CUR);
-#endif
-    read_result = READ (fd, read_data, read_data_size);
-#if WINDOWS
-    position = _lseeki64 (fd, position, SEEK_SET);
-#elif HAVE_LSEEK64
-    position = lseek64 (fd, position, SEEK_SET);
-#else
-    position = lseek (fd, (off_t) position, SEEK_SET);
-#endif
-    if (read_result != read_data_size)
-    {
-      free (read_data);
-      return COMP_TYPE_UNDEFINED;
-    }
-    data = (const void *) read_data;
-  }
+  if (sizeof (read_data) !=
+      bfds_read (bfds, read_data, sizeof (read_data)))
+    return COMP_TYPE_UNDEFINED;
+
 #if HAVE_ZLIB
-  if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) && 
(data[2] == 0x08))
-    result = COMP_TYPE_ZLIB;
+  if ( (bdfs->fsize >= MIN_ZLIB_HEADER) && 
+       (data[0] == 0x1f) && 
+       (data[1] == 0x8b) && 
+       (data[2] == 0x08) )
+    return COMP_TYPE_ZLIB;
 #endif
 #if HAVE_LIBBZ2
-  if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') && 
(data[2] == 'h')) 
-    result = COMP_TYPE_BZ2;
+  if ( (bdfs->fsize >= MIN_BZ2_HEADER) && 
+       (data[0] == 'B') && 
+       (data[1] == 'Z') && 
+       (data[2] == 'h')) 
+    return COMP_TYPE_BZ2;
 #endif
-  if (read_data != NULL)
-    free (read_data);
-  return result;
+  return COMP_TYPE_INVALID;
 }
 
 
-#if 0
+/**
+ * Handle to a datasource we can use for the plugins.
+ */ 
+struct EXTRACTOR_Datasource
+{
 
-  enum ExtractorCompressionType compression_type = -1;
-  struct CompressedFileSource *cfs = NULL;
-  int fd = -1;
-  struct stat64 fstatbuf;
-  int64_t fsize = 0;
+  /**
+   * Underlying buffered data source.
+   */
+  struct BufferedFileDataSource *bfds;
 
-  /* If data is not given, then we need to read it from the file. Try opening 
it */
-  if ((data == NULL) &&
-      (filename != NULL) &&
-      (0 == STAT64(filename, &fstatbuf)) &&
-      (!S_ISDIR(fstatbuf.st_mode)) &&
-      (-1 != (fd = file_open (filename,
-             O_RDONLY | O_LARGEFILE))))
-  {
-    /* Empty files are of no interest */
-    fsize = fstatbuf.st_size;
-    if (fsize == 0) 
-    {
-       close(fd);
-       return;
-    }
-  }
+  /**
+   * Compressed file source (NULL if not applicable).
+   */
+  struct CompressedFileSource *cfs;
 
-  /* Data is not given, and we've failed to open the file with data -> exit */
-  if ((fsize == 0) && (data == NULL))
-    return;
-  /* fsize is now size of the data OR size of the file */
-  if (data != NULL)
-    fsize = size;
+  /**
+   * Underlying file descriptor, -1 for none.
+   */
+  int fd;
+};
 
-  errno = 0;
 
-  /* Peek at first few bytes of the file (or of the data), and see if it's 
compressed. */
-  compression_type = get_compression_type (data, fd, fsize);
-  if (compression_type < 0)
-  {
-    /* errno is set by get_compression_type () */
-    if (fd != -1)
-      close (fd);
-    return;
-  }
-
+/**
+ * Create a datasource from a file on disk.
+ *
+ * @param filename name of the file on disk
+ * @param proc metadata callback to call with meta data found upon opening
+ * @param proc_cls callback cls
+ * @return handle to the datasource, NULL on error
+ */
+struct EXTRACTOR_Datasource *
+EXTRACTOR_datasource_create_from_file_ (const char *filename,
+                                       EXTRACTOR_MetaDataProcessor proc, 
+                                       void *proc_cls)
+{
   struct BufferedFileDataSource *bfds;
-  bfds = bfds_new (data, fd, fsize);
-  if (bfds == NULL)
-    return;
+  struct EXTRACTOR_Datasource *ds;
+  enum ExtractorCompressionType ct;
+  int fd;
+  struct stat sb;
+  int64_t fsize;
 
-  if (compression_type > 0)
-  {
-    int icr = 0;
-    /* Set up a decompressor.
-     * Will also report compression-related metadata to the caller.
-     */
-    cfs = cfs_new (bfds, fsize, compression_type, proc, proc_cls);
-    if (cfs == NULL)
+  if (-1 == (fd = open (filename, O_RDONLY | O_LARGEFILE)))
+    return NULL;
+  if ( (0 != fstat (fd, &sb)) ||
+       (S_ISDIR (fstatbuf.st_mode)) )       
     {
-      if (fd != -1)
-        close (fd);
-      errno = EILSEQ;
-      return;
+      (void) close (fd);
+      return NULL;
     }
-    icr = cfs_init_decompressor (cfs, proc, proc_cls);
-    if (icr < 0)
+  fsize = (int64_t) sb.st_size;
+  if (0 == fsize)
     {
-      if (fd != -1)
-        close (fd);
-      errno = EILSEQ;
-      return;
+      (void) close (fd);
+      return NULL;
     }
-    else if (icr == 0)
+  bfds = bfds_new (NULL, fd, fsize);
+  if (NULL == bfds)
     {
-      if (fd != -1)
-        close (fd);
-      errno = 0;
-      return;
+      (void) close (fd);
+      return NULL;
     }
-  }
+  if (NULL == (ds = malloc (sizeof (struct EXTRACTOR_Datasource))))
+    {
+      bfds_delete (bfds);
+      return NULL;
+    }
+  ds->bfds = bfds;
+  ds->fd;
+  ct = get_compression_type (bfds);
+  if ( (COMP_TYPE_ZLIB == ct) ||
+       (COMP_TYPE_BZ2 == ct) )
+    ds->cfs = cfs_new (bfds, fsize, ct, proc, proc_cls);
+  if (NULL == ds->cfs)
+    {
+      bfds_delete (bfds);
+      free (ds);
+      (void) close (fd);
+      return NULL;
+    }
+  return ds;
+}
 
 
-#endif
+/**
+ * Create a datasource from a buffer in memory.
+ *
+ * @param buf data in memory
+ * @param size number of bytes in 'buf'
+ * @param proc metadata callback to call with meta data found upon opening
+ * @param proc_cls callback cls
+ * @return handle to the datasource
+ */
+struct EXTRACTOR_Datasource *
+EXTRACTOR_datasource_create_from_buffer_ (const char *buf,
+                                         size_t size,
+                                         EXTRACTOR_MetaDataProcessor proc, 
void *proc_cls)
+{
+  struct BufferedFileDataSource *bfds;
+  struct EXTRACTOR_Datasource *ds;
+  enum ExtractorCompressionType ct;
 
+  if (0 == size)
+    return NULL;
+  if (NULL == (bfds = bfds_new (buf, -1, size)))
+    return NULL;
+  if (NULL == (ds = malloc (sizeof (struct EXTRACTOR_Datasource))))
+    {
+      bfds_delete (bfds);
+      return NULL;
+    }
+  ds->bfds = bfds;
+  ds->fd;
+  ct = get_compression_type (bfds);
+  if ( (COMP_TYPE_ZLIB == ct) ||
+       (COMP_TYPE_BZ2 == ct) )
+    ds->cfs = cfs_new (bfds, fsize, ct, proc, proc_cls);
+  if (NULL == ds->cfs)
+    {
+      bfds_delete (bfds);
+      free (ds);
+      return NULL;
+    }
+  return ds;
+}
 
 
 /**
  * Destroy a data source.
  *
- * @param datasource source to destroy
+ * @param ds source to destroy
  */
 void
-EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *datasource)
+EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *ds)
 {
-  if (cfs != NULL)
-  {
-    cfs_deinit_decompressor (cfs);
-    cfs_delete (cfs);
-  }
-  bfds_delete (bfds);
-  if (-1 != fd)
-    close(fd);
+  if (NULL != ds->cfs)
+    cfs_destroy (ds->cfs);
+  bfds_delete (ds->bfds);
+  if (-1 != ds->fd)
+    (void) close (ds->fd);
+  free (ds);
 }
 
+
+/**
+ * Make 'size' bytes of data from the data source available at 'data'.
+ *
+ * @param cls must be a 'struct EXTRACTOR_Datasource'
+ * @param data where the data should be copied to
+ * @param size maximum number of bytes requested
+ * @return number of bytes now available in data (can be smaller than 'size'),
+ *         -1 on error
+ */
+ssize_t
+EXTRACTOR_datasource_read_ (void *cls,
+                           void *data,
+                           size_t size)
+{
+  struct EXTRACTOR_Datasource *ds = cls;
+
+  if (NULL != ds->cfs)
+    return cfs_read (ds->cfs, data, size);
+  return bdfs_read (ds->bdfs, data, size);
+}
+
+
+/**
+ * Seek in the datasource.  Use 'SEEK_CUR' for whence and 'pos' of 0 to
+ * obtain the current position in the file.
+ * 
+ * @param cls must be a 'struct EXTRACTOR_Datasource'
+ * @param pos position to seek (see 'man lseek')
+ * @param whence how to see (absolute to start, relative, absolute to end)
+ * @return new absolute position, UINT64_MAX on error (i.e. desired position
+ *         does not exist)
+ */
+int64_t
+EXTRACTOR_datasource_seek_ (void *cls,
+                           uint64_t pos,
+                           int whence)
+{
+  struct EXTRACTOR_Datasource *ds = cls;
+
+  if (NULL != ds->cfs)
+    return cfs_seek (ds->cfs, pos, whence);
+  return bdfs_seek (ds->bdfs, pos, whence);
+}
+
+
+/**
+ * Determine the overall size of the data source (after compression).
+ * 
+ * @param cls must be a 'struct EXTRACTOR_Datasource'
+ * @return overall file size, UINT64_MAX on error or unknown
+ */ 
+int64_t 
+EXTRACTOR_datasource_get_size_ (void *cls)
+{
+  struct EXTRACTOR_Datasource *ds = cls;
+
+  if (NULL != ds->cfs)
+    return cfs_seek (ds->cfs, pos, whence);
+  return bdfs_seek (ds->bdfs, pos, whence);
+}
+
+
 /* end of extractor_datasource.c */

Modified: Extractor/src/main/extractor_datasource.h
===================================================================
--- Extractor/src/main/extractor_datasource.h   2012-07-21 22:34:27 UTC (rev 
22813)
+++ Extractor/src/main/extractor_datasource.h   2012-07-22 09:38:54 UTC (rev 
22814)
@@ -30,10 +30,13 @@
  * Create a datasource from a file on disk.
  *
  * @param filename name of the file on disk
- * @return handle to the datasource
+ * @param proc metadata callback to call with meta data found upon opening
+ * @param proc_cls callback cls
+ * @return handle to the datasource, NULL on error
  */
 struct EXTRACTOR_Datasource *
-EXTRACTOR_datasource_create_from_file_ (const char *filename);
+EXTRACTOR_datasource_create_from_file_ (const char *filename,
+                                       EXTRACTOR_MetaDataProcessor proc, void 
*proc_cls);
 
 
 /**
@@ -41,24 +44,27 @@
  *
  * @param buf data in memory
  * @param size number of bytes in 'buf'
- * @return handle to the datasource
+ * @param proc metadata callback to call with meta data found upon opening
+ * @param proc_cls callback cls
+ * @return handle to the datasource, NULL on error
  */
 struct EXTRACTOR_Datasource *
 EXTRACTOR_datasource_create_from_buffer_ (const char *buf,
-                                         size_t size);
+                                         size_t size,
+                                         EXTRACTOR_MetaDataProcessor proc, 
void *proc_cls);
 
 
 /**
  * Destroy a data source.
  *
- * @param datasource source to destroy
+ * @param ds source to destroy
  */
 void
-EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *datasource);
+EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *ds);
 
 
 /**
- * Make 'size' bytes of data from the data source available at '*data'.
+ * Make 'size' bytes of data from the data source available at 'data'.
  *
  * @param cls must be a 'struct EXTRACTOR_Datasource'
  * @param data where the data should be copied to
@@ -77,12 +83,12 @@
  * obtain the current position in the file.
  * 
  * @param cls must be a 'struct EXTRACTOR_Datasource'
- * @param pos position to seek (see 'man lseek')
+ * @param pos position to seek (see 'man lseek')o
  * @param whence how to see (absolute to start, relative, absolute to end)
- * @return new absolute position, UINT64_MAX on error (i.e. desired position
+ * @return new absolute position, -1 on error (i.e. desired position
  *         does not exist)
  */
-uint64_t
+int64_t
 EXTRACTOR_datasource_seek_ (void *cls,
                            uint64_t pos,
                            int whence);
@@ -92,9 +98,9 @@
  * Determine the overall size of the data source (after compression).
  * 
  * @param cls must be a 'struct EXTRACTOR_Datasource'
- * @return overall file size, UINT64_MAX on error (i.e. IPC failure)
+ * @return overall file size, -1 on error or unknown
  */ 
-uint64_t 
+int64_t 
 EXTRACTOR_datasource_get_size_ (void *cls);
 
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]