[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r22814 - Extractor/src/main
From: |
gnunet |
Subject: |
[GNUnet-SVN] r22814 - Extractor/src/main |
Date: |
Sun, 22 Jul 2012 11:38:54 +0200 |
Author: grothoff
Date: 2012-07-22 11:38:54 +0200 (Sun, 22 Jul 2012)
New Revision: 22814
Modified:
Extractor/src/main/extractor_datasource.c
Extractor/src/main/extractor_datasource.h
Log:
datasource hacking
Modified: Extractor/src/main/extractor_datasource.c
===================================================================
--- Extractor/src/main/extractor_datasource.c 2012-07-21 22:34:27 UTC (rev
22813)
+++ Extractor/src/main/extractor_datasource.c 2012-07-22 09:38:54 UTC (rev
22814)
@@ -22,12 +22,24 @@
#if HAVE_LIBBZ2
#include <bzlib.h>
+#define MIN_BZ2_HEADER 4
+#ifndef MIN_COMPRESSED_HEADER
+#define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER
#endif
+#endif
#if HAVE_ZLIB
#include <zlib.h>
+#define MIN_ZLIB_HEADER 12
+#ifndef MIN_COMPRESSED_HEADER
+#define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER
#endif
+#endif
+#ifndef MIN_COMPRESSED_HEADER
+#define MIN_COMPRESSED_HEADER -1
+#endif
+
#ifndef O_LARGEFILE
#define O_LARGEFILE 0
#endif
@@ -37,25 +49,13 @@
*/
#define MAX_READ (4 * 1024 * 1024)
+/**
+ * Data is read from the source and shoved into decompressor
+ * in chunks this big.
+ */
+#define COM_CHUNK_SIZE (10 * 1024)
-#if HAVE_ZLIB
-#define MIN_ZLIB_HEADER 12
-#endif
-#if HAVE_LIBBZ2
-#define MIN_BZ2_HEADER 4
-#endif
-#if !defined (MIN_COMPRESSED_HEADER) && HAVE_ZLIB
-#define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER
-#endif
-#if !defined (MIN_COMPRESSED_HEADER) && HAVE_LIBBZ2
-#define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER
-#endif
-#if !defined (MIN_COMPRESSED_HEADER)
-#define MIN_COMPRESSED_HEADER -1
-#endif
-#define COMPRESSED_DATA_PROBE_SIZE 3
-
/**
* Enum with the various possible types of compression supported.
*/
@@ -106,17 +106,17 @@
uint64_t fsize;
/**
- * Position within the file or the data buffer
+ * Position of the buffer in the file.
*/
uint64_t fpos;
/**
- * Position within the buffer.
+ * Position within the buffer.
*/
uint64_t buffer_pos;
/**
- * Number of bytes in the buffer (<= buffer_size)
+ * Number of valid bytes in the buffer (<= buffer_size)
*/
uint64_t buffer_bytes;
@@ -144,12 +144,22 @@
struct BufferedFileDataSource *bfds;
/**
+ * Decompression target buffer.
+ */
+ char result[COM_CHUNK_SIZE];
+
+ /**
+ * At which offset in 'result' is 'fpos'?
+ */
+ size_t result_pos;
+
+ /**
* Size of the source (same as bfds->fsize)
*/
int64_t fsize;
/**
- * Position within the source
+ * Position within the (decompressed) source
*/
int64_t fpos;
@@ -206,7 +216,7 @@
return -1; /* invalid */
if (NULL == bfds->buffer)
{
- bfds->buffer_bytes = bfds->fsize;
+ bfds->buffer_pos = pos;
return 0;
}
#if WINDOWS
@@ -219,6 +229,7 @@
if (position < 0)
return -1;
bfds->fpos = position;
+ bfds->buffer_pos = 0;
rd = read (bfds->fd, bfds->buffer, bfds->buffer_size);
if (rd < 0)
return -1;
@@ -258,7 +269,7 @@
memset (result, 0, sizeof (struct BufferedFileDataSource));
result->data = (NULL != data) ? data : &result[1];
result->buffer = (NULL != data) ? NULL : &result[1];
- result->buffer_size = (NULL != data) ? fsize : xtra;
+ result->buffer_size = (NULL != data) ? fsize : xtra;
result->fsize = fsize;
result->fd = fd;
bfds_pick_next_buffer_at (result, 0);
@@ -297,86 +308,95 @@
switch (whence)
{
case SEEK_CUR:
- if (NULL != bfds->buffer)
+ if (bfds->fpos + bfds->buffer_pos + pos < 0)
+ return -1;
+ if (bfds->fpos + bfds->buffer_pos + pos > bfds->fsize)
+ return -1;
+ if ( (NULL == bfds->buffer) ||
+ ( (bfds->buffer_pos + pos < pos->buffer_bytes) &&
+ (bfds->buffer_pos + pos >= 0) ) )
{
- if (0 != bfds_pick_next_buffer_at (bfds,
- bfds->fpos + bfds->buffer_pos +
pos))
- return -1;
- bfds->buffer_pos = 0;
- return bfds->fpos;
+ bfds->buffer_pos += pos;
+ return bfds->buffer_pos;
}
- bfds->buffer_pos += pos;
- return bfds->buffer_pos;
+ if (0 != bfds_pick_next_buffer_at (bfds,
+ bfds->fpos + bfds->buffer_pos + pos))
+ return -1;
+ return bfds->fpos;
+ case SEEK_END:
+ if (pos > 0)
+ return -1;
+ if (bfds->fsize < - pos)
+ return -1;
+ pos = bfds->fsize + pos;
+ /* fall-through! */
case SEEK_SET:
if (pos < 0)
return -1;
- if (NULL != bfds->buffer)
+ if (pos > bfds->fsize)
+ return -1;
+ if ( (NULL == bfds->buffer) ||
+ ( (bfds->buffer_pos <= pos) &&
+ (bfds->buffer_pos + pos->buffer_bytes > pos) ) )
{
- if (0 != bfds_pick_next_buffer_at (bfds, pos))
- return -1;
- bfds->buffer_pos = 0;
- return bfds->fpos;
+ bfds->buffer_pos = pos;
+ return bfds->buffer_pos;
}
- bfds->buffer_pos = pos;
- return bfds->buffer_pos;
- case SEEK_END:
- if (NULL != bfds->buffer)
- {
- if (0 != bfds_pick_next_buffer_at (bfds, bfds->fsize + pos))
- return -1;
- bfds->buffer_pos = 0;
- return bfds->fpos;
- }
- bfds->buffer_pos = bfds->fsize + pos;
- return bfds->buffer_pos;
+ if (0 != bfds_pick_next_buffer_at (bfds, pos))
+ return -1;
+ return bfds->fpos;
}
return -1;
}
/**
- * Fills 'buf_ptr' with a chunk of data.
- * Will seek if necessary. Will fail if 'count' exceeds buffer size.
+ * Fills 'buf_ptr' with a chunk of data. Will
+ * fail if 'count' exceeds buffer size.
*
* @param bfds bfds
* @param buf_ptr location to store data
* @param count number of bytes to read
- * @return number of bytes (<= count) available at location pointed by buf_ptr
+ * @return number of bytes (<= count) available at location pointed by buf_ptr,
+ * 0 for end of stream, -1 on error
*/
static ssize_t
bfds_read (struct BufferedFileDataSource *bfds,
void *buf_ptr,
size_t count)
{
- if (count > MAX_READ)
- return -1;
- if (count > bfds->buffer_bytes - bfds->buffer_pos)
+ char *cbuf = buf_ptr;
+ uint64_t old_off;
+ size_t avail;
+ size_t ret;
+
+ old_off = bfds->fpos + bfds->buffer_pos + bfds->buffer_bytes;
+ if (old_off == bfds->fsize)
+ return 0; /* end of stream */
+ ret = 0;
+ while (count > 0)
{
- if (bfds->fpos + bfds->buffer_pos != bfds_seek (bfds, bfds->fpos +
bfds->buffer_pos, SEEK_SET))
- return -1;
- if (NULL != bfds->buffer)
+ if ( (bfds->buffer_bytes == bfds->buffer_pos) &&
+ (0 != bfds_pick_next_buffer_at (bfds,
+ bfds->fpos + bfds->buffer_pos +
bfds->buffer_bytes)) )
{
- *buf_ptr = &bfds->buffer[bfds->buffer_pos];
- bfds->buffer_pos += count < bfds->buffer_bytes ? count :
bfds->buffer_bytes;
- return (count < bfds->buffer_bytes ? count : bfds->buffer_bytes);
+ /* revert to original position, invalidate buffer */
+ bfds->fpos = old_off;
+ bfds->buffer_bytes = 0;
+ bfds->buffer_pos = 0;
+ return -1; /* getting more failed */
}
- else
- {
- int64_t ret = count < (bfds->buffer_bytes - bfds->buffer_pos) ? count
: (bfds->buffer_bytes - bfds->buffer_pos);
- *buf_ptr = (unsigned char*) &bfds->data[bfds->buffer_pos];
- bfds->buffer_pos += ret;
- return ret;
- }
+ avail = bfds->buffer_bytes - bfds->buffer_pos;
+ if (avail > count)
+ avail = count;
+ if (0 == avail)
+ abort (); /* must not happen */
+ memcpy (&cbuf[ret], &bfds->data[bfds->buffer_pos], avail);
+ bfds->buffer_pos += avail;
+ count -= avail;
+ ret += avail;
}
- else
- {
- if (NULL != bfds->buffer)
- *buf_ptr = &bfds->buffer[bfds->buffer_pos];
- else
- *buf_ptr = (unsigned char*) &bfds->data[bfds->buffer_pos];
- bfds->buffer_pos += count;
- return count;
- }
+ return ret;
}
@@ -395,7 +415,7 @@
/**
* Reset gz-compressed data stream to the beginning.
*
- * @return 1 on success, 0 if we failed to seek,
+ * @return 1 on success, 0 to terminate extraction,
* -1 on decompressor initialization failure
*/
static int
@@ -422,13 +442,13 @@
#ifdef ZLIB_VERNUM
15 + 32
#else
- -MAX_WBITS
+ - MAX_WBITS
#endif
))
{
return -1;
}
- cfs->fpos = cfs->gzip_header_length;
+ cfs->fpos = 0;
cfs->shm_pos = 0;
cfs->shm_buf_size = 0;
return 1;
@@ -438,7 +458,7 @@
/**
* Reset bz2-compressed data stream to the beginning.
*
- * @return 1 on success, 0 if we failed to seek,
+ * @return 1 on success, 0 to terminate extraction,
* -1 on decompressor initialization failure
*/
static int
@@ -455,21 +475,21 @@
* seeking backward.
*
* @param cfs cfs to reset
- * @return 1 on success, , 0 if we failed to seek,
+ * @return 1 on success, 0 to terminate extraction,
* -1 on error
*/
static int
cfs_reset_stream (struct CompressedFileSource *cfs)
{
switch (cfs->compression_type)
- {
- case COMP_TYPE_ZLIB:
- return cfs_reset_stream_zlib (cfs);
- case COMP_TYPE_BZ2:
- return cfs_reset_stream_bz2 (cfs);
- default:
- return -1;
- }
+ {
+ case COMP_TYPE_ZLIB:
+ return cfs_reset_stream_zlib (cfs);
+ case COMP_TYPE_BZ2:
+ return cfs_reset_stream_bz2 (cfs);
+ default:
+ return -1;
+ }
}
@@ -480,98 +500,75 @@
* @param cfs cfs to initialize
* @param proc callback for metadata
* @param proc_cls callback cls
- * @return 1 on success, -1 on error
+ * @return 1 on success, 0 to terminate extraction, -1 on error
*/
static int
cfs_init_decompressor_zlib (struct CompressedFileSource *cfs,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
{
- /* Process gzip header */
unsigned int gzip_header_length = 10;
- unsigned char data[12];
- int64_t buf_bytes;
- int len;
- unsigned char *buf;
- unsigned char *cptr;
-
- if (sizeof (data) > bfds_read (cfs->bfds, data, sizeof (data)))
+ unsigned char hdata[12];
+
+ /* Process gzip header */
+ if (sizeof (hdata) > bfds_read (cfs->bfds, hdata, sizeof (hdata)))
return -1;
-
- if (0 != (data[3] & 0x4)) /* FEXTRA set */
- gzip_header_length += 2 + (unsigned) (data[10] & 0xff) +
- (((unsigned) (data[11] & 0xff)) * 256);
+ if (0 != (hdata[3] & 0x4)) /* FEXTRA set */
+ gzip_header_length += 2 + (unsigned) (hdata[10] & 0xff) +
+ (((unsigned) (hdata[11] & 0xff)) * 256);
- if (0 != (data[3] & 0x8)) /* FNAME set */
- {
- if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length,
SEEK_SET))
- return -1;
- buf_bytes = bfds_read (cfs->bfds, &buf, 1024);
- if (buf_bytes <= 0)
- return -1;
- cptr = buf;
-
- len = 0;
- /* stored file name is here */
- while (len < buf_bytes)
+ if (0 != (hdata[3] & 0x8))
{
- if ('\0' == *cptr)
- break;
- cptr++;
- len++;
+ /* FNAME set */
+ char fname[1024];
+ char *cptr;
+ size_t len;
+ ssize_t buf_bytes;
+
+ if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length,
SEEK_SET))
+ return -1;
+ buf_bytes = bfds_read (cfs->bfds, fname, sizeof (fname));
+ if (buf_bytes <= 0)
+ return -1;
+ if (NULL == (cptr = memchr (fname, 0, buf_bytes)))
+ return -1;
+ len = cptr - fname;
+ if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
+ EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+ fname,
+ len))
+ return 0; /* done */
+ gzip_header_length += len + 1;
}
-
- if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
- EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
- (const char *) buf,
- len))
- return 0; /* done */
-
- /* FIXME: check for correctness */
- //gzip_header_length = (cptr - data) + 1;
- gzip_header_length += len + 1;
- }
-
- if (0 != (data[3] & 0x16)) /* FCOMMENT set */
- {
- int64_t buf_bytes;
- int len;
- unsigned char *buf;
- unsigned char *cptr;
-
- if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length,
SEEK_SET))
- return -1;
- buf_bytes = bfds_read (cfs->bfds, &buf, 1024);
- if (buf_bytes <= 0)
- return -1;
- cptr = buf;
-
- len = 0;
- /* stored file name is here */
- while (len < buf_bytes)
- {
- if ('\0' == *cptr)
- break;
- cptr++;
- len++;
- }
-
- if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
- EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
- (const char *) buf,
- len))
- return 0; /* done */
-
- /* FIXME: check for correctness */
- //gzip_header_length = (cptr - data) + 1;
- gzip_header_length += len + 1;
- }
-
- if (data[3] & 0x2) /* FCHRC set */
+
+ if (0 != (hdata[3] & 0x16))
+ {
+ /* FCOMMENT set */
+ char fcomment[1024];
+ char *cptr;
+ ssize_t buf_bytes;
+ size_t len;
+
+ if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length,
SEEK_SET))
+ return -1;
+ buf_bytes = bfds_read (cfs->bfds, fcomment, sizeof (fcomment));
+ if (buf_bytes <= 0)
+ return -1;
+ if (NULL == (cptr = memchr (fcomment, 0, buf_bytes)))
+ return -1;
+ len = cptr - fcomment;
+ if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
+ EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+ (const char *) fcomment,
+ len))
+ return 0; /* done */
+ gzip_header_length += len + 1;
+ }
+ if (0 != (hdata[3] & 0x2)) /* FCHRC set */
gzip_header_length += 2;
-
memset (&cfs->strm, 0, sizeof (z_stream));
-
+
#ifdef ZLIB_VERNUM
+ /* zlib will take care of its header */
gzip_header_length = 0;
#endif
@@ -604,7 +601,7 @@
* @param cfs cfs to initialize
* @param proc callback for metadata
* @param proc_cls callback cls
- * @return 1 on success, -1 on error
+ * @return 1 on success, 0 to terminate extraction, -1 on error
*/
static int
cfs_init_decompressor (struct CompressedFileSource *cfs,
@@ -671,12 +668,25 @@
/**
+ * Destroy compressed file source.
+ *
+ * @param cfs source to destroy
+ */
+static void
+cfs_destroy (struct CompressedFileSource *cfs)
+{
+ cfs_deinit_decompressor (cfs);
+ free (cfs);
+}
+
+
+/**
* Allocates and initializes new cfs object.
*
* @param bfds data source to use
* @param fsize size of the source
* @param compression_type type of compression used
- * @param proc metadata callback
+ * @param proc metadata callback to call with meta data found upon opening
* @param proc_cls callback cls
* @return newly allocated cfs on success, NULL on error
*/
@@ -686,7 +696,6 @@
enum ExtractorCompressionType compression_type,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
{
- int shm_result;
struct CompressedFileSource *cfs;
if (NULL == (cfs = malloc (sizeof (struct CompressedFileSource))))
@@ -696,93 +705,118 @@
cfs->bfds = bfds;
cfs->fsize = fsize;
cfs->uncompressed_size = -1;
+ if (1 != cfs_init_decompressor (cfs,
+ proc, proc_cls))
+ {
+ free (cfs);
+ return NULL;
+ }
return cfs;
}
/**
- * Data is read from the source and shoved into decompressor
- * in chunks this big.
- */
-#define COM_CHUNK_SIZE (10*1024)
-
-
-/**
- * Re-fills shm with new uncompressed data, preserving the last
- * 'preserve' bytes of existing data as the first 'preserve' bytes
- * of the new data.
- * Does the actual decompression. Will set uncompressed_size on
- * the end of compressed stream.
+ * Fills 'data' with new uncompressed data. Does the actual
+ * decompression. Will set uncompressed_size on the end of compressed
+ * stream.
*
* @param cfds cfs to read from
- * @param preserve number of bytes to preserve (0 to discard all old data)
- * @return number of bytes in shm. 0 if no more data can be uncompressed, -1
on error
+ * @param data where to copy the data
+ * @param size number of bytes available in data
+ * @return number of bytes in data. 0 if no more data can be uncompressed, -1
on error
*/
-static int
-cfs_read_zlib (struct CompressedFileSource *cfs, int64_t preserve)
+static ssize_t
+cfs_read_zlib (struct CompressedFileSource *cfs,
+ void *data,
+ size_t size)
{
+ char *dst = data;
int ret;
- int64_t rc = preserve;
- int64_t total = cfs->strm.total_out;
+ size_t rc;
+ ssize_t in;
+ char buf[COM_CHUNK_SIZE];
- if (preserve > 0)
- memmove (cfs->shm_ptr, &((unsigned char *)cfs->shm_ptr)[0], preserve);
-
- while (rc < cfs->shm_size && ret != Z_STREAM_END)
- {
- if (cfs->strm.avail_in == 0)
+ if (cfs->fpos == cfs->uncompressed_size)
+ return 0;
+ rc = 0;
+ if (strm.avail_out > 0)
{
- int64_t count = bfds_read (cfs->bfds, &cfs->strm.next_in,
COM_CHUNK_SIZE);
- if (count <= 0)
- return 0;
+ /* got left-over decompressed data from previous round! */
+ in = strm.avail_out;
+ if (in > size)
+ in = size;
+ memcpy (&dst[rc], &cfs->result[cfs->result_pos], in);
+ cfs->fpos += in;
+ cfs->result_pos += in;
+ rc += in;
+ }
+ ret = Z_OK;
+ while ( (rc < size) && (Z_STREAM_END != ret) )
+ {
+ /* read block from original data source */
+ in = bfds_read (cfs->bfds,
+ buf, sizeof (buf));
+ if (in <= 0)
+ return -1; /* unexpected EOF */
+ cfs->strm.next_in = buf;
cfs->strm.avail_in = (uInt) count;
+ cfs->strm.next_out = cfs->result;
+ cfs->strm.avail_out = COM_CHUNK_SIZE;
+ cfs->result_pos = 0;
+ ret = inflate (&cfs->strm, Z_SYNC_FLUSH);
+ if ( (Z_OK != ret) && (Z_STREAM_END != ret) )
+ return -1; /* unexpected error */
+ /* go backwards by the number of bytes left in the buffer */
+ if (-1 == bfds_seek (cfs->bfds, - cfs->strm.avail_in, SEEK_CUR))
+ return -1;
+ /* copy decompressed bytes to target buffer */
+ in = cfs->strm.total_out;
+ if (in > size - rc)
+ in = size - rc;
+ memcpy (&dst[rc], &cfs->result[cfs->result_pos], in);
+ cfs->fpos += in;
+ cfs->result_pos += in;
+ rc += in;
}
- cfs->strm.next_out = &((unsigned char *)cfs->shm_ptr)[rc];
- cfs->strm.avail_out = cfs->shm_size - rc;
- ret = inflate (&cfs->strm, Z_SYNC_FLUSH);
- if (ret != Z_OK && ret != Z_STREAM_END)
- return 0;
- rc = cfs->strm.total_out - total;
- }
- if (ret == Z_STREAM_END)
- cfs->uncompressed_size = cfs->strm.total_out;
- cfs->shm_pos = preserve;
- cfs->shm_buf_size = rc + preserve;
- return 1;
+ if (Z_STREAM_END == ret)
+ cfs->uncompressed_size = cfs->fpos;
+ return rc;
}
/**
- * Re-fills shm with new uncompressed data, preserving the last
- * 'preserve' bytes of existing data as the first 'preserve' bytes
- * of the new data.
- * Does the actual decompression. Will set uncompressed_size on
- * the end of compressed stream.
+ * Fills 'data' with new uncompressed data. Does the actual
+ * decompression. Will set uncompressed_size on the end of compressed
+ * stream.
*
* @param cfds cfs to read from
- * @param preserve number of bytes to preserve (0 to discard all old data)
- * @return number of bytes in shm. 0 if no more data can be uncompressed, -1
on error
+ * @param data where to copy the data
+ * @param size number of bytes available in data
+ * @return number of bytes in data. 0 if no more data can be uncompressed, -1
on error
*/
-static int
-cfs_read_bz2 (struct CompressedFileSource *cfs, int64_t preserve)
+static ssize_t
+cfs_read_bz2 (struct CompressedFileSource *cfs,
+ void *data,
+ size_t size)
{
return -1;
}
/**
- * Re-fills shm with new uncompressed data, preserving the last
- * 'preserve' bytes of existing data as the first 'preserve' bytes
- * of the new data.
- * Does the actual decompression. Will set uncompressed_size on
- * the end of compressed stream.
+ * Fills 'data' with new uncompressed data. Does the actual
+ * decompression. Will set uncompressed_size on the end of compressed
+ * stream.
*
* @param cfds cfs to read from
- * @param preserve number of bytes to preserve (0 to discard all old data)
- * @return number of bytes in shm. 0 if no more data can be uncompressed, -1
on error
+ * @param data where to copy the data
+ * @param size number of bytes available in data
+ * @return number of bytes in data. 0 if no more data can be uncompressed, -1
on error
*/
-static int64_t
-cfs_read (struct CompressedFileSource *cfs, int64_t preserve)
+static ssize_t
+cfs_read (struct CompressedFileSource *cfs,
+ void *data,
+ size_t size)
{
switch (cfs->compression_type)
{
@@ -801,72 +835,44 @@
* requires seeking backwards beyond the boundaries of the buffer, resets the
* stream and repeats decompression from the beginning to 'position'.
*
- * @param cfds cfs to seek on
+ * @param cfs cfs to seek on
* @param position new starting point for the buffer
* @return new absolute buffer position, -1 on error or EOS
*/
static int64_t
-cfs_seek_zlib (struct CompressedFileSource *cfs, int64_t position)
+cfs_seek (struct CompressedFileSource *cfs,
+ uint64_t position)
{
- int64_t ret;
-
- if (position > cfs->strm.total_out - cfs->shm_buf_size && position <
cfs->strm.total_out)
- {
- ret = cfs_read (cfs, cfs->strm.total_out - position);
- if (ret < 0)
- return ret;
- return position;
- }
- while (position >= cfs->strm.total_out)
- {
- if (0 > (ret = cfs_read (cfs, 0)))
- return ret;
- if (ret == 0)
- return position;
- }
- if (position < cfs->strm.total_out && position > cfs->strm.total_out -
cfs->shm_buf_size)
- return cfs->strm.total_out - cfs->shm_buf_size;
- return -1;
-}
-
-
-/**
- * Moves the buffer to 'position' in uncompressed steam. If position
- * requires seeking backwards beyond the boundaries of the buffer, resets the
- * stream and repeats decompression from the beginning to 'position'.
- *
- * @param cfds cfs to seek on
- * @param position new starting point for the buffer
- * @return new absolute buffer position, -1 on error or EOS
- */
-static int64_t
-cfs_seek_bz2 (struct CompressedFileSource *cfs, int64_t position)
-{
- return -1;
-}
-
-
-/**
- * Moves the buffer to 'position' in uncompressed steam. If position
- * requires seeking backwards beyond the boundaries of the buffer, resets the
- * stream and repeats decompression from the beginning to 'position'.
- *
- * @param cfds cfs to seek on
- * @param position new starting point for the buffer
- * @return new absolute buffer position, -1 on error or EOS
- */
-static int64_t
-cfs_seek (struct CompressedFileSource *cfs, int64_t position)
-{
- switch (cfs->compression_type)
+ int64_t delta;
+
+ delta = position - cfs->fpos;
+ if (delta < 0)
{
- case COMP_TYPE_ZLIB:
- return cfs_seek_zlib (cfs, position);
- case COMP_TYPE_BZ2:
- return cfs_seek_bz2 (cfs, position);
- default:
- return -1;
+ if (result_pos >= - delta)
+ {
+ result_pos += delta;
+ delta = 0;
+ }
+ else
+ {
+ if (-1 == cfs_reset_stream (cfs))
+ return -1;
+ delta = position;
+ }
}
+ while (delta > 0)
+ {
+ char buf[COM_CHUNK_SIZE];
+ size_t max;
+ int64_t ret;
+
+ max = (sizeof (buf) > delta) ? delta : sizeof (buf);
+ ret = cfs_read (cfs, buf, max);
+ if (-1 == ret)
+ return -1;
+ delta -= ret;
+ }
+ return cfs->fpos;
}
@@ -879,163 +885,239 @@
* @return -1 to indicate an error, 0 to indicate uncompressed data, or a type
(> 0) of compression
*/
static enum ExtractorCompressionType
-get_compression_type (const unsigned char *data,
- int fd,
- int64_t fsize)
+get_compression_type (struct BufferedFileDataSource *bfds)
{
- void *read_data = NULL;
- size_t read_data_size = 0;
- ssize_t read_result;
- enum ExtractorCompressionType result = COMP_TYPE_INVALID;
+ unsigned char read_data[3];
- if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER))
- {
+ if (0 != bfds_seek (bfds, 0, SEEK_SET))
return COMP_TYPE_INVALID;
- }
- if (data == NULL)
- {
- int64_t position;
- read_data_size = COMPRESSED_DATA_PROBE_SIZE;
- read_data = malloc (read_data_size);
- if (read_data == NULL)
- return -1;
-#if WINDOWS
- position = _lseeki64 (fd, 0, SEEK_CUR);
-#elif HAVE_LSEEK64
- position = lseek64 (fd, 0, SEEK_CUR);
-#else
- position = (int64_t) lseek (fd, 0, SEEK_CUR);
-#endif
- read_result = READ (fd, read_data, read_data_size);
-#if WINDOWS
- position = _lseeki64 (fd, position, SEEK_SET);
-#elif HAVE_LSEEK64
- position = lseek64 (fd, position, SEEK_SET);
-#else
- position = lseek (fd, (off_t) position, SEEK_SET);
-#endif
- if (read_result != read_data_size)
- {
- free (read_data);
- return COMP_TYPE_UNDEFINED;
- }
- data = (const void *) read_data;
- }
+ if (sizeof (read_data) !=
+ bfds_read (bfds, read_data, sizeof (read_data)))
+ return COMP_TYPE_UNDEFINED;
+
#if HAVE_ZLIB
- if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) &&
(data[2] == 0x08))
- result = COMP_TYPE_ZLIB;
+ if ( (bdfs->fsize >= MIN_ZLIB_HEADER) &&
+ (data[0] == 0x1f) &&
+ (data[1] == 0x8b) &&
+ (data[2] == 0x08) )
+ return COMP_TYPE_ZLIB;
#endif
#if HAVE_LIBBZ2
- if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') &&
(data[2] == 'h'))
- result = COMP_TYPE_BZ2;
+ if ( (bdfs->fsize >= MIN_BZ2_HEADER) &&
+ (data[0] == 'B') &&
+ (data[1] == 'Z') &&
+ (data[2] == 'h'))
+ return COMP_TYPE_BZ2;
#endif
- if (read_data != NULL)
- free (read_data);
- return result;
+ return COMP_TYPE_INVALID;
}
-#if 0
+/**
+ * Handle to a datasource we can use for the plugins.
+ */
+struct EXTRACTOR_Datasource
+{
- enum ExtractorCompressionType compression_type = -1;
- struct CompressedFileSource *cfs = NULL;
- int fd = -1;
- struct stat64 fstatbuf;
- int64_t fsize = 0;
+ /**
+ * Underlying buffered data source.
+ */
+ struct BufferedFileDataSource *bfds;
- /* If data is not given, then we need to read it from the file. Try opening
it */
- if ((data == NULL) &&
- (filename != NULL) &&
- (0 == STAT64(filename, &fstatbuf)) &&
- (!S_ISDIR(fstatbuf.st_mode)) &&
- (-1 != (fd = file_open (filename,
- O_RDONLY | O_LARGEFILE))))
- {
- /* Empty files are of no interest */
- fsize = fstatbuf.st_size;
- if (fsize == 0)
- {
- close(fd);
- return;
- }
- }
+ /**
+ * Compressed file source (NULL if not applicable).
+ */
+ struct CompressedFileSource *cfs;
- /* Data is not given, and we've failed to open the file with data -> exit */
- if ((fsize == 0) && (data == NULL))
- return;
- /* fsize is now size of the data OR size of the file */
- if (data != NULL)
- fsize = size;
+ /**
+ * Underlying file descriptor, -1 for none.
+ */
+ int fd;
+};
- errno = 0;
- /* Peek at first few bytes of the file (or of the data), and see if it's
compressed. */
- compression_type = get_compression_type (data, fd, fsize);
- if (compression_type < 0)
- {
- /* errno is set by get_compression_type () */
- if (fd != -1)
- close (fd);
- return;
- }
-
+/**
+ * Create a datasource from a file on disk.
+ *
+ * @param filename name of the file on disk
+ * @param proc metadata callback to call with meta data found upon opening
+ * @param proc_cls callback cls
+ * @return handle to the datasource, NULL on error
+ */
+struct EXTRACTOR_Datasource *
+EXTRACTOR_datasource_create_from_file_ (const char *filename,
+ EXTRACTOR_MetaDataProcessor proc,
+ void *proc_cls)
+{
struct BufferedFileDataSource *bfds;
- bfds = bfds_new (data, fd, fsize);
- if (bfds == NULL)
- return;
+ struct EXTRACTOR_Datasource *ds;
+ enum ExtractorCompressionType ct;
+ int fd;
+ struct stat sb;
+ int64_t fsize;
- if (compression_type > 0)
- {
- int icr = 0;
- /* Set up a decompressor.
- * Will also report compression-related metadata to the caller.
- */
- cfs = cfs_new (bfds, fsize, compression_type, proc, proc_cls);
- if (cfs == NULL)
+ if (-1 == (fd = open (filename, O_RDONLY | O_LARGEFILE)))
+ return NULL;
+ if ( (0 != fstat (fd, &sb)) ||
+ (S_ISDIR (fstatbuf.st_mode)) )
{
- if (fd != -1)
- close (fd);
- errno = EILSEQ;
- return;
+ (void) close (fd);
+ return NULL;
}
- icr = cfs_init_decompressor (cfs, proc, proc_cls);
- if (icr < 0)
+ fsize = (int64_t) sb.st_size;
+ if (0 == fsize)
{
- if (fd != -1)
- close (fd);
- errno = EILSEQ;
- return;
+ (void) close (fd);
+ return NULL;
}
- else if (icr == 0)
+ bfds = bfds_new (NULL, fd, fsize);
+ if (NULL == bfds)
{
- if (fd != -1)
- close (fd);
- errno = 0;
- return;
+ (void) close (fd);
+ return NULL;
}
- }
+ if (NULL == (ds = malloc (sizeof (struct EXTRACTOR_Datasource))))
+ {
+ bfds_delete (bfds);
+ return NULL;
+ }
+ ds->bfds = bfds;
+ ds->fd;
+ ct = get_compression_type (bfds);
+ if ( (COMP_TYPE_ZLIB == ct) ||
+ (COMP_TYPE_BZ2 == ct) )
+ ds->cfs = cfs_new (bfds, fsize, ct, proc, proc_cls);
+ if (NULL == ds->cfs)
+ {
+ bfds_delete (bfds);
+ free (ds);
+ (void) close (fd);
+ return NULL;
+ }
+ return ds;
+}
-#endif
+/**
+ * Create a datasource from a buffer in memory.
+ *
+ * @param buf data in memory
+ * @param size number of bytes in 'buf'
+ * @param proc metadata callback to call with meta data found upon opening
+ * @param proc_cls callback cls
+ * @return handle to the datasource
+ */
+struct EXTRACTOR_Datasource *
+EXTRACTOR_datasource_create_from_buffer_ (const char *buf,
+ size_t size,
+ EXTRACTOR_MetaDataProcessor proc,
void *proc_cls)
+{
+ struct BufferedFileDataSource *bfds;
+ struct EXTRACTOR_Datasource *ds;
+ enum ExtractorCompressionType ct;
+ if (0 == size)
+ return NULL;
+ if (NULL == (bfds = bfds_new (buf, -1, size)))
+ return NULL;
+ if (NULL == (ds = malloc (sizeof (struct EXTRACTOR_Datasource))))
+ {
+ bfds_delete (bfds);
+ return NULL;
+ }
+ ds->bfds = bfds;
+ ds->fd;
+ ct = get_compression_type (bfds);
+ if ( (COMP_TYPE_ZLIB == ct) ||
+ (COMP_TYPE_BZ2 == ct) )
+ ds->cfs = cfs_new (bfds, fsize, ct, proc, proc_cls);
+ if (NULL == ds->cfs)
+ {
+ bfds_delete (bfds);
+ free (ds);
+ return NULL;
+ }
+ return ds;
+}
/**
* Destroy a data source.
*
- * @param datasource source to destroy
+ * @param ds source to destroy
*/
void
-EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *datasource)
+EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *ds)
{
- if (cfs != NULL)
- {
- cfs_deinit_decompressor (cfs);
- cfs_delete (cfs);
- }
- bfds_delete (bfds);
- if (-1 != fd)
- close(fd);
+ if (NULL != ds->cfs)
+ cfs_destroy (ds->cfs);
+ bfds_delete (ds->bfds);
+ if (-1 != ds->fd)
+ (void) close (ds->fd);
+ free (ds);
}
+
+/**
+ * Make 'size' bytes of data from the data source available at 'data'.
+ *
+ * @param cls must be a 'struct EXTRACTOR_Datasource'
+ * @param data where the data should be copied to
+ * @param size maximum number of bytes requested
+ * @return number of bytes now available in data (can be smaller than 'size'),
+ * -1 on error
+ */
+ssize_t
+EXTRACTOR_datasource_read_ (void *cls,
+ void *data,
+ size_t size)
+{
+ struct EXTRACTOR_Datasource *ds = cls;
+
+ if (NULL != ds->cfs)
+ return cfs_read (ds->cfs, data, size);
+ return bdfs_read (ds->bdfs, data, size);
+}
+
+
+/**
+ * Seek in the datasource. Use 'SEEK_CUR' for whence and 'pos' of 0 to
+ * obtain the current position in the file.
+ *
+ * @param cls must be a 'struct EXTRACTOR_Datasource'
+ * @param pos position to seek (see 'man lseek')
+ * @param whence how to see (absolute to start, relative, absolute to end)
+ * @return new absolute position, UINT64_MAX on error (i.e. desired position
+ * does not exist)
+ */
+int64_t
+EXTRACTOR_datasource_seek_ (void *cls,
+ uint64_t pos,
+ int whence)
+{
+ struct EXTRACTOR_Datasource *ds = cls;
+
+ if (NULL != ds->cfs)
+ return cfs_seek (ds->cfs, pos, whence);
+ return bdfs_seek (ds->bdfs, pos, whence);
+}
+
+
+/**
+ * Determine the overall size of the data source (after compression).
+ *
+ * @param cls must be a 'struct EXTRACTOR_Datasource'
+ * @return overall file size, UINT64_MAX on error or unknown
+ */
+int64_t
+EXTRACTOR_datasource_get_size_ (void *cls)
+{
+ struct EXTRACTOR_Datasource *ds = cls;
+
+ if (NULL != ds->cfs)
+ return cfs_seek (ds->cfs, pos, whence);
+ return bdfs_seek (ds->bdfs, pos, whence);
+}
+
+
/* end of extractor_datasource.c */
Modified: Extractor/src/main/extractor_datasource.h
===================================================================
--- Extractor/src/main/extractor_datasource.h 2012-07-21 22:34:27 UTC (rev
22813)
+++ Extractor/src/main/extractor_datasource.h 2012-07-22 09:38:54 UTC (rev
22814)
@@ -30,10 +30,13 @@
* Create a datasource from a file on disk.
*
* @param filename name of the file on disk
- * @return handle to the datasource
+ * @param proc metadata callback to call with meta data found upon opening
+ * @param proc_cls callback cls
+ * @return handle to the datasource, NULL on error
*/
struct EXTRACTOR_Datasource *
-EXTRACTOR_datasource_create_from_file_ (const char *filename);
+EXTRACTOR_datasource_create_from_file_ (const char *filename,
+ EXTRACTOR_MetaDataProcessor proc, void
*proc_cls);
/**
@@ -41,24 +44,27 @@
*
* @param buf data in memory
* @param size number of bytes in 'buf'
- * @return handle to the datasource
+ * @param proc metadata callback to call with meta data found upon opening
+ * @param proc_cls callback cls
+ * @return handle to the datasource, NULL on error
*/
struct EXTRACTOR_Datasource *
EXTRACTOR_datasource_create_from_buffer_ (const char *buf,
- size_t size);
+ size_t size,
+ EXTRACTOR_MetaDataProcessor proc,
void *proc_cls);
/**
* Destroy a data source.
*
- * @param datasource source to destroy
+ * @param ds source to destroy
*/
void
-EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *datasource);
+EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *ds);
/**
- * Make 'size' bytes of data from the data source available at '*data'.
+ * Make 'size' bytes of data from the data source available at 'data'.
*
* @param cls must be a 'struct EXTRACTOR_Datasource'
* @param data where the data should be copied to
@@ -77,12 +83,12 @@
* obtain the current position in the file.
*
* @param cls must be a 'struct EXTRACTOR_Datasource'
- * @param pos position to seek (see 'man lseek')
+ * @param pos position to seek (see 'man lseek')o
* @param whence how to see (absolute to start, relative, absolute to end)
- * @return new absolute position, UINT64_MAX on error (i.e. desired position
+ * @return new absolute position, -1 on error (i.e. desired position
* does not exist)
*/
-uint64_t
+int64_t
EXTRACTOR_datasource_seek_ (void *cls,
uint64_t pos,
int whence);
@@ -92,9 +98,9 @@
* Determine the overall size of the data source (after compression).
*
* @param cls must be a 'struct EXTRACTOR_Datasource'
- * @return overall file size, UINT64_MAX on error (i.e. IPC failure)
+ * @return overall file size, -1 on error or unknown
*/
-uint64_t
+int64_t
EXTRACTOR_datasource_get_size_ (void *cls);
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r22814 - Extractor/src/main,
gnunet <=