[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r20783 - in Extractor: . src/include src/main src/plugins
From: |
gnunet |
Subject: |
[GNUnet-SVN] r20783 - in Extractor: . src/include src/main src/plugins |
Date: |
Tue, 27 Mar 2012 15:05:17 +0200 |
Author: grothoff
Date: 2012-03-27 15:05:17 +0200 (Tue, 27 Mar 2012)
New Revision: 20783
Removed:
Extractor/src/plugins/id3v23_extractor.c
Extractor/src/plugins/id3v24_extractor.c
Modified:
Extractor/AUTHORS
Extractor/ChangeLog
Extractor/configure.ac
Extractor/src/include/extractor.h
Extractor/src/main/extractor.c
Extractor/src/main/extractor_plugins.c
Extractor/src/main/extractor_plugins.h
Extractor/src/plugins/Makefile.am
Extractor/src/plugins/id3_extractor.c
Extractor/src/plugins/id3v2_extractor.c
Extractor/src/plugins/mp3_extractor.c
Extractor/src/plugins/template_extractor.c
Log:
LRN is refactoring the plugin API, and hell breaks loose
Modified: Extractor/AUTHORS
===================================================================
--- Extractor/AUTHORS 2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/AUTHORS 2012-03-27 13:05:17 UTC (rev 20783)
@@ -1,6 +1,7 @@
Core Team:
Christian Grothoff <address@hidden>
Nils Durner <address@hidden>
+LRN <address@hidden>
Formats:
html - core team with code from libhtmlparse 0.1.13,
http://msalem.translator.cx/libhtmlparse.html
Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/ChangeLog 2012-03-27 13:05:17 UTC (rev 20783)
@@ -1,3 +1,8 @@
+Tue Mar 27 15:04:00 CEST 2012
+ Refactoring plugin API to allow seeks to arbitrary positions in the
+ file (breaks existing plugins, so the current version will not
+ work). -LRN
+
Sun Jan 29 17:27:08 CET 2012
Documented recently discovered issues with pthreads and
out-of-process plugin executions in the manual. -CG
Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac 2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/configure.ac 2012-03-27 13:05:17 UTC (rev 20783)
@@ -101,6 +101,8 @@
if test "x$mingw32_ws2" = "xno" -a "x$mingw64_ws2" = "xno"; then
AC_MSG_ERROR([libextractor requires Winsock2])
fi
+ # Sufficiently new Windows XP
+ CFLAGS="-D__MSVCRT_VERSION__=0x0601 $CFLAGS"
AC_MSG_CHECKING(for PlibC)
plibc=0
@@ -136,6 +138,8 @@
if test $plibc -ne 1;
then
AC_MSG_ERROR([libextractor requires PlibC])
+ else
+ LIBS="$LIBS -lplibc"
fi
LDFLAGS="$LDFLAGS -Wl,-no-undefined -Wl,--export-all-symbols"
@@ -336,7 +340,7 @@
AC_FUNC_ERROR_AT_LINE
AC_SEARCH_LIBS(dlopen, dl)
AC_SEARCH_LIBS(shm_open, rt)
-AC_CHECK_FUNCS([mkstemp strndup munmap strcasecmp strdup strncasecmp memmove
memset strtoul floor getcwd pow setenv sqrt strchr strcspn strrchr strnlen
strndup ftruncate shm_open shm_unlink])
+AC_CHECK_FUNCS([mkstemp strndup munmap strcasecmp strdup strncasecmp memmove
memset strtoul floor getcwd pow setenv sqrt strchr strcspn strrchr strnlen
strndup ftruncate shm_open shm_unlink lseek64])
LE_LIB_LIBS=$LIBS
LIBS=$LIBSOLD
Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h 2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/src/include/extractor.h 2012-03-27 13:05:17 UTC (rev 20783)
@@ -392,13 +392,7 @@
* @param options options for this plugin; can be NULL
* @return 0 if all calls to proc returned 0, otherwise 1
*/
-typedef int (*EXTRACTOR_ExtractMethod)(const char *data,
- size_t datasize,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls,
- const char *options);
-
/**
* Linked list of extractor plugins. An application builds this list
* by telling libextractor to load various keyword-extraction
@@ -407,7 +401,14 @@
*/
struct EXTRACTOR_PluginList;
+typedef int (*EXTRACTOR_extract_method) (struct EXTRACTOR_PluginList *plugin,
+ EXTRACTOR_MetaDataProcessor proc, void *proc_cls);
+typedef void (*EXTRACTOR_discard_state_method) (struct EXTRACTOR_PluginList
*plugin);
+typedef void (*EXTRACTOR_init_state_method) (struct EXTRACTOR_PluginList
*plugin);
+
+
+
/**
* Load the default set of plugins. The default can be changed
* by setting the LIBEXTRACTOR_LIBRARIES environment variable;
Modified: Extractor/src/main/extractor.c
===================================================================
--- Extractor/src/main/extractor.c 2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/src/main/extractor.c 2012-03-27 13:05:17 UTC (rev 20783)
@@ -23,7 +23,7 @@
#include "extractor.h"
#include <dirent.h>
#include <sys/types.h>
-#ifndef WINDOWS
+#if !WINDOWS
#include <sys/wait.h>
#include <sys/shm.h>
#endif
@@ -59,118 +59,54 @@
*/
#define MAX_MIME_LEN 256
+#define MAX_SHM_NAME 255
+
/**
* Set to 1 to get failure info,
* 2 for actual debug info.
*/
#define DEBUG 1
+#define MESSAGE_INIT_STATE 0x01
+#define MESSAGE_UPDATED_SHM 0x02
+#define MESSAGE_DONE 0x03
+#define MESSAGE_SEEK 0x04
+#define MESSAGE_META 0x05
+#define MESSAGE_DISCARD_STATE 0x06
/**
- * Stop the child process of this plugin.
+ * Header used for our IPC replies. A header
+ * with all fields being zero is used to indicate
+ * the end of the stream.
*/
-static void
-stop_process (struct EXTRACTOR_PluginList *plugin)
+struct IpcHeader
{
- int status;
-#ifdef WINDOWS
- HANDLE process;
-#endif
+ enum EXTRACTOR_MetaType meta_type;
+ enum EXTRACTOR_MetaFormat meta_format;
+ size_t data_len;
+ size_t mime_len;
+};
-#if DEBUG
-#ifndef WINDOWS
- if (plugin->cpid == -1)
-#else
- if (plugin->hProcess == INVALID_HANDLE_VALUE)
-#endif
- fprintf (stderr,
- "Plugin `%s' choked on this input\n",
- plugin->short_libname);
-#endif
-#ifndef WINDOWS
- if ( (plugin->cpid == -1) ||
- (plugin->cpid == 0) )
- return;
- kill (plugin->cpid, SIGKILL);
- waitpid (plugin->cpid, &status, 0);
- plugin->cpid = -1;
- close (plugin->cpipe_out);
- fclose (plugin->cpipe_in);
-#else
- if (plugin->hProcess == INVALID_HANDLE_VALUE ||
- plugin->hProcess == NULL)
- return;
- TerminateProcess (plugin->hProcess, 0);
- CloseHandle (plugin->hProcess);
- plugin->hProcess = INVALID_HANDLE_VALUE;
- close (plugin->cpipe_out);
- fclose (plugin->cpipe_in);
-#endif
- plugin->cpipe_out = -1;
- plugin->cpipe_in = NULL;
-}
-
-
-/**
- * Remove a plugin from a list.
- *
- * @param prev the current list of plugins
- * @param library the name of the plugin to remove
- * @return the reduced list, unchanged if the plugin was not loaded
- */
-struct EXTRACTOR_PluginList *
-EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev,
- const char * library)
+#if !WINDOWS
+int
+plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name)
{
- struct EXTRACTOR_PluginList *pos;
- struct EXTRACTOR_PluginList *first;
-
- pos = prev;
- first = prev;
- while ((pos != NULL) && (0 != strcmp (pos->short_libname, library)))
- {
- prev = pos;
- pos = pos->next;
- }
- if (pos != NULL)
- {
- /* found, close library */
- if (first == pos)
- first = pos->next;
- else
- prev->next = pos->next;
- /* found */
- stop_process (pos);
- free (pos->short_libname);
- free (pos->libname);
- free (pos->plugin_options);
- if (NULL != pos->libraryHandle)
- lt_dlclose (pos->libraryHandle);
- free (pos);
- }
-#if DEBUG
- else
- fprintf(stderr,
- "Unloading plugin `%s' failed!\n",
- library);
-#endif
- return first;
+ if (plugin->shm_id != -1)
+ close (plugin->shm_id);
+ plugin->shm_id = shm_open (shm_name, O_RDONLY, 0);
+ return plugin->shm_id;
}
-
-
-/**
- * Remove all plugins from the given list (destroys the list).
- *
- * @param plugin the list of plugins
- */
-void
-EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins)
+#else
+HANDLE
+plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name)
{
- while (plugins != NULL)
- plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname);
+ if (plugin->map_handle != 0)
+ CloseHandle (plugin->map_handle);
+ plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name);
+ return plugin->map_handle;
}
+#endif
-
static int
write_all (int fd,
const void *buf,
@@ -187,45 +123,10 @@
return -1;
off += ret;
}
- return 0;
+ return size;
}
-
-static int
-read_all (int fd,
- void *buf,
- size_t size)
-{
- char *data = buf;
- size_t off = 0;
- ssize_t ret;
-
- while (off < size)
- {
- ret = read (fd, &data[off], size - off);
- if (ret <= 0)
- return -1;
- off += ret;
- }
- return 0;
-}
-
-
/**
- * Header used for our IPC replies. A header
- * with all fields being zero is used to indicate
- * the end of the stream.
- */
-struct IpcHeader
-{
- enum EXTRACTOR_MetaType type;
- enum EXTRACTOR_MetaFormat format;
- size_t data_len;
- size_t mime_len;
-};
-
-
-/**
* Function called by a plugin in a child process. Transmits
* the meta data back to the parent process.
*
@@ -254,6 +155,8 @@
int *cpipe_out = cls;
struct IpcHeader hdr;
size_t mime_len;
+ unsigned char meta_byte = MESSAGE_META;
+ unsigned char zero_byte = 0;
if (data_mime_type == NULL)
mime_len = 0;
@@ -261,23 +164,19 @@
mime_len = strlen (data_mime_type) + 1;
if (mime_len > MAX_MIME_LEN)
mime_len = MAX_MIME_LEN;
- hdr.type = type;
- hdr.format = format;
+ hdr.meta_type = type;
+ hdr.meta_format = format;
hdr.data_len = data_len;
hdr.mime_len = mime_len;
- if ( (hdr.type == 0) &&
- (hdr.format == 0) &&
- (hdr.data_len == 0) &&
- (hdr.mime_len == 0) )
- return 0; /* better skip this one, would signal termination... */
- if ( (0 != write_all (*cpipe_out, &hdr, sizeof(hdr))) ||
- (0 != write_all (*cpipe_out, data_mime_type, mime_len)) ||
- (0 != write_all (*cpipe_out, data, data_len)) )
- return 1;
+ if ((1 != write_all (*cpipe_out, &meta_byte, 1)) ||
+ (sizeof(hdr) != write_all (*cpipe_out, &hdr, sizeof(hdr))) ||
+ (mime_len -1 != write_all (*cpipe_out, data_mime_type, mime_len - 1)) ||
+ (1 != write_all (*cpipe_out, &zero_byte, 1)) ||
+ (data_len != write_all (*cpipe_out, data, data_len)))
+ return 1;
return 0;
}
-
/**
* 'main' function of the child process. Reads shm-filenames from
* 'in' (line-by-line) and writes meta data blocks to 'out'. The meta
@@ -288,23 +187,20 @@
* @param out stream to write to
*/
static void
-process_requests (struct EXTRACTOR_PluginList *plugin,
- int in,
- int out)
+process_requests (struct EXTRACTOR_PluginList *plugin, int in, int out)
{
- char hfn[256];
- char tfn[256];
- char sze[256];
- size_t hfn_len;
- size_t tfn_len;
- size_t sze_len;
- char *fn;
- FILE *fin;
- void *ptr;
- int shmid;
+ int read_result1, read_result2, read_result3;
+ unsigned char code;
+ int64_t fsize = -1;
+ int64_t position = 0;
+ void *shm_ptr = NULL;
+ size_t shm_size = 0;
+ char *shm_name = NULL;
+ size_t shm_name_len;
+
+ int extract_reply;
+
struct IpcHeader hdr;
- size_t size;
- int want_tail;
int do_break;
#ifdef WINDOWS
HANDLE map;
@@ -312,167 +208,556 @@
#endif
if (plugin == NULL)
- {
- close (in);
- close (out);
- return;
- }
+ {
+ close (in);
+ close (out);
+ return;
+ }
if (0 != plugin_load (plugin))
- {
- close (in);
- close (out);
+ {
+ close (in);
+ close (out);
#if DEBUG
- fprintf (stderr,
- "Plugin `%s' failed to load!\n",
- plugin->short_libname);
+ fprintf (stderr, "Plugin `%s' failed to load!\n", plugin->short_libname);
#endif
- return;
- }
- want_tail = 0;
- if ( (plugin->specials != NULL) &&
- (NULL != strstr (plugin->specials,
- "want-tail")) )
- {
- want_tail = 1;
- }
- if ( (plugin->specials != NULL) &&
- (NULL != strstr (plugin->specials,
- "close-stderr")) )
- {
- close (2);
- }
- if ( (plugin->specials != NULL) &&
- (NULL != strstr (plugin->specials,
- "close-stdout")) )
- {
- close (1);
- }
+ return;
+ }
+ if ((plugin->specials != NULL) &&
+ (NULL != strstr (plugin->specials, "close-stderr")))
+ close (2);
+ if ((plugin->specials != NULL) &&
+ (NULL != strstr (plugin->specials, "close-stdout")))
+ close (1);
memset (&hdr, 0, sizeof (hdr));
- fin = fdopen (in, "r");
- if (fin == NULL)
+ do_break = 0;
+ while (!do_break)
+ {
+ read_result1 = read (in, &code, 1);
+ if (read_result1 <= 0)
+ break;
+ switch (code)
{
- close (in);
- close (out);
- return;
- }
- while (NULL != fgets (hfn, sizeof(hfn), fin))
- {
- hfn_len = strlen (hfn);
- if (hfn_len <= 1)
- break;
- ptr = NULL;
- hfn[--hfn_len] = '\0'; /* kill newline */
- if (NULL == fgets (tfn, sizeof(tfn), fin))
- break;
- if ('!' != tfn[0])
- break;
- tfn_len = strlen (tfn);
- tfn[--tfn_len] = '\0'; /* kill newline */
- if ( (want_tail) &&
- (tfn_len > 1) )
- {
- fn = &tfn[1];
- }
- else
- {
- fn = hfn;
- }
- if (NULL == fgets (sze, sizeof(sze), fin))
- break;
- if ('s' != sze[0])
- break;
- sze_len = strlen (sze);
- sze[--sze_len] = '\0'; /* kill newline */
- size = strtol (&sze[1], NULL, 10);
- if (size == LONG_MIN || size == LONG_MAX || size == 0)
+ case MESSAGE_INIT_STATE:
+ read_result2 = read (in, &fsize, sizeof (int64_t));
+ read_result3 = read (in, &shm_name_len, sizeof (size_t));
+ if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof
(size_t)) ||
+ shm_name_len > MAX_SHM_NAME || fsize <= 0)
+ {
+ do_break = 1;
break;
- do_break = 0;
-#ifndef WINDOWS
- if ( (-1 != (shmid = shm_open (fn, O_RDONLY, 0))) &&
- (SIZE_MAX != (size = lseek (shmid, 0, SEEK_END))) &&
- (NULL != (ptr = mmap (NULL, size, PROT_READ, MAP_SHARED, shmid, 0)))
&&
- (ptr != (void*) -1) )
+ }
+ if (shm_name != NULL)
+ free (shm_name);
+ shm_name = malloc (shm_name_len);
+ if (shm_name == NULL)
+ {
+ do_break = 1;
+ break;
+ }
+ read_result2 = read (in, shm_name, shm_name_len);
+ if (read_result2 < shm_name_len)
+ {
+ do_break = 1;
+ break;
+ }
+ shm_name[shm_name_len - 1] = '\0';
+#if !WINDOWS
+ if (shm_ptr != NULL)
+ munmap (shm_ptr, shm_size);
+ if (-1 == plugin_open_shm (plugin, shm_name))
+ {
+ do_break = 1;
+ break;
+ }
#else
- /* Despite the obvious, this must be READWRITE, not READONLY */
- map = OpenFileMapping (PAGE_READWRITE, FALSE, fn);
- ptr = MapViewOfFile (map, FILE_MAP_READ, 0, 0, 0);
- if (ptr != NULL)
+ if (shm_ptr != NULL)
+ UnmapViewOfFile (shm_ptr);
+ if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name))
{
- if (0 == VirtualQuery (ptr, &mi, sizeof (mi)) || mi.RegionSize < size)
- {
- UnmapViewOfFile (ptr);
- ptr = NULL;
- }
+ do_break = 1;
+ break;
}
- if (ptr != NULL)
#endif
- {
- if ( ( (plugin->extractMethod != NULL) &&
- (0 != plugin->extractMethod (ptr,
- size,
- &transmit_reply,
- &out,
- plugin->plugin_options)) ) ||
- (0 != write_all (out, &hdr, sizeof(hdr))) )
- do_break = 1;
- }
-#ifndef WINDOWS
- if ( (ptr != NULL) &&
- (ptr != (void*) -1) )
- munmap (ptr, size);
- if (-1 != shmid)
- close (shmid);
+ plugin->fsize = fsize;
+ plugin->init_state_method (plugin);
+ break;
+ case MESSAGE_DISCARD_STATE:
+ plugin->discard_state_method (plugin);
+#if !WINDOWS
+ if (shm_ptr != NULL && shm_size > 0)
+ munmap (shm_ptr, shm_size);
+ if (plugin->shm_id != -1)
+ close (plugin->shm_id);
+ plugin->shm_id = -1;
+ shm_size = 0;
#else
- if (ptr != NULL && ptr != (void*) -1)
- UnmapViewOfFile (ptr);
- if (map != NULL)
- CloseHandle (map);
+ if (shm_ptr != NULL)
+ UnmapViewOfFile (shm_ptr);
+ if (plugin->map_handle != 0)
+ CloseHandle (plugin->map_handle);
+ plugin->map_handle = 0;
#endif
- if (do_break)
- break;
- if ( (plugin->specials != NULL) &&
- (NULL != strstr (plugin->specials,
- "force-kill")) )
- {
- /* we're required to die after each file since this
- plugin only supports a single file at a time */
- _exit (0);
- }
+ shm_ptr = NULL;
+ break;
+ case MESSAGE_UPDATED_SHM:
+ read_result2 = read (in, &position, sizeof (int64_t));
+ read_result3 = read (in, &shm_size, sizeof (size_t));
+ if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof
(size_t)) ||
+ position < 0 || fsize <= 0 || position >= fsize)
+ {
+ do_break = 1;
+ break;
+ }
+ /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery
for W32) */
+#if !WINDOWS
+ if ((-1 == plugin->shm_id) ||
+ (NULL == (shm_ptr = mmap (NULL, shm_size, PROT_READ, MAP_SHARED,
plugin->shm_id, 0))) ||
+ (shm_ptr == (void *) -1))
+ {
+ do_break = 1;
+ break;
+ }
+#else
+ if ((plugin->map_handle == 0) ||
+ (NULL == (shm_ptr = MapViewOfFile (plugin->map_handle, FILE_MAP_READ,
0, 0, 0))))
+ {
+ do_break = 1;
+ break;
+ }
+#endif
+ plugin->position = position;
+ plugin->shm_ptr = shm_ptr;
+ plugin->map_size = shm_size;
+ /* Now, ideally a plugin would do reads and seeks on a virtual "plugin"
object
+ * completely transparently, and the underlying code would return bytes
from
+ * the memory map, or would block and wait for a seek to happen.
+ * That, however, requires somewhat different architecture, and even
more wrapping
+ * and hand-helding. It's easier to make plugins aware of the fact that
they work
+ * with discrete in-memory buffers with expensive seeking, not
continuous files.
+ */
+ extract_reply = plugin->extract_method (plugin, transmit_reply, &out);
+#if !WINDOWS
+ if ((shm_ptr != NULL) &&
+ (shm_ptr != (void*) -1) )
+ munmap (shm_ptr, shm_size);
+#else
+ if (shm_ptr != NULL)
+ UnmapViewOfFile (shm_ptr);
+#endif
+ if (extract_reply == 1)
+ {
+ unsigned char done_byte = MESSAGE_DONE;
+ if (write (out, &done_byte, 1) != 1)
+ {
+ do_break = 1;
+ break;
+ }
+ if ((plugin->specials != NULL) &&
+ (NULL != strstr (plugin->specials, "force-kill")))
+ {
+ /* we're required to die after each file since this
+ plugin only supports a single file at a time */
+#if !WINDOWS
+ fsync (out);
+#else
+ _commit (out);
+#endif
+ _exit (0);
+ }
+ }
+ else
+ {
+ unsigned char seek_byte = MESSAGE_SEEK;
+ if (write (out, &seek_byte, 1) != 1)
+ {
+ do_break = 1;
+ break;
+ }
+ if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof
(int64_t))
+ {
+ do_break = 1;
+ break;
+ }
+ }
+ break;
}
- fclose (fin);
+ }
+ close (in);
close (out);
}
+#if !WINDOWS
-#ifdef WINDOWS
+/**
+ * Start the process for the given plugin.
+ */
static void
-write_plugin_data (int fd, const struct EXTRACTOR_PluginList *plugin)
+start_process (struct EXTRACTOR_PluginList *plugin)
{
- size_t i;
- DWORD len;
- char *str;
+ int p1[2];
+ int p2[2];
+ pid_t pid;
+ int status;
- i = strlen (plugin->libname) + 1;
- write (fd, &i, sizeof (size_t));
- write (fd, plugin->libname, i);
- i = strlen (plugin->short_libname) + 1;
- write (fd, &i, sizeof (size_t));
- write (fd, plugin->short_libname, i);
- if (plugin->plugin_options != NULL)
+ switch (plugin->flags)
+ {
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ if (-1 != plugin->cpid && 0 != plugin->cpid)
+ return;
+ break;
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ if (0 != plugin->cpid)
+ return;
+ break;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ return;
+ break;
+ case EXTRACTOR_OPTION_DISABLED:
+ return;
+ break;
+ }
+
+ plugin->cpid = -1;
+ if (0 != pipe (p1))
+ {
+ plugin->flags = EXTRACTOR_OPTION_DISABLED;
+ return;
+ }
+ if (0 != pipe (p2))
+ {
+ close (p1[0]);
+ close (p1[1]);
+ plugin->flags = EXTRACTOR_OPTION_DISABLED;
+ return;
+ }
+ pid = fork ();
+ plugin->cpid = pid;
+ if (pid == -1)
+ {
+ close (p1[0]);
+ close (p1[1]);
+ close (p2[0]);
+ close (p2[1]);
+ plugin->flags = EXTRACTOR_OPTION_DISABLED;
+ return;
+ }
+ if (pid == 0)
+ {
+ close (p1[1]);
+ close (p2[0]);
+ process_requests (plugin, p1[0], p2[1]);
+ _exit (0);
+ }
+ close (p1[0]);
+ close (p2[1]);
+ plugin->cpipe_in = fdopen (p1[1], "w");
+ if (plugin->cpipe_in == NULL)
+ {
+ perror ("fdopen");
+ (void) kill (plugin->cpid, SIGKILL);
+ waitpid (plugin->cpid, &status, 0);
+ close (p1[1]);
+ close (p2[0]);
+ plugin->cpid = -1;
+ plugin->flags = EXTRACTOR_OPTION_DISABLED;
+ return;
+ }
+ plugin->cpipe_out = p2[0];
+}
+
+/**
+ * Stop the child process of this plugin.
+ */
+static void
+stop_process (struct EXTRACTOR_PluginList *plugin)
+{
+ int status;
+
+#if DEBUG
+ if (plugin->cpid == -1)
+ fprintf (stderr,
+ "Plugin `%s' choked on this input\n",
+ plugin->short_libname);
+#endif
+ if ( (plugin->cpid == -1) ||
+ (plugin->cpid == 0) )
+ return;
+ kill (plugin->cpid, SIGKILL);
+ waitpid (plugin->cpid, &status, 0);
+ plugin->cpid = -1;
+ close (plugin->cpipe_out);
+ fclose (plugin->cpipe_in);
+ plugin->cpipe_out = -1;
+ plugin->cpipe_in = NULL;
+
+ if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
+ plugin->flags = EXTRACTOR_OPTION_DISABLED;
+
+ plugin->seek_request = -1;
+}
+
+static int
+write_plugin_data (const struct EXTRACTOR_PluginList *plugin)
+{
+ /* only does anything on Windows */
+ return 0;
+}
+
+#define plugin_print(plug, fmt, ...) fprintf (plug->cpipe_in, fmt, ...)
+#define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf,
size)
+
+#else /* WINDOWS */
+
+#ifndef PIPE_BUF
+#define PIPE_BUF 512
+#endif
+
+/* Copyright Bob Byrnes <byrnes <at> curl.com>
+ http://permalink.gmane.org/gmane.os.cygwin.patches/2121
+*/
+/* Create a pipe, and return handles to the read and write ends,
+ just like CreatePipe, but ensure that the write end permits
+ FILE_READ_ATTRIBUTES access, on later versions of win32 where
+ this is supported. This access is needed by NtQueryInformationFile,
+ which is used to implement select and nonblocking writes.
+ Note that the return value is either NO_ERROR or GetLastError,
+ unlike CreatePipe, which returns a bool for success or failure. */
+static int
+create_selectable_pipe (PHANDLE read_pipe_ptr, PHANDLE write_pipe_ptr,
+ LPSECURITY_ATTRIBUTES sa_ptr, DWORD psize,
+ DWORD dwReadMode, DWORD dwWriteMode)
+{
+ /* Default to error. */
+ *read_pipe_ptr = *write_pipe_ptr = INVALID_HANDLE_VALUE;
+
+ HANDLE read_pipe = INVALID_HANDLE_VALUE, write_pipe = INVALID_HANDLE_VALUE;
+
+ /* Ensure that there is enough pipe buffer space for atomic writes. */
+ if (psize < PIPE_BUF)
+ psize = PIPE_BUF;
+
+ char pipename[MAX_PATH];
+
+ /* Retry CreateNamedPipe as long as the pipe name is in use.
+ * Retrying will probably never be necessary, but we want
+ * to be as robust as possible. */
+ while (1)
+ {
+ static volatile LONG pipe_unique_id;
+
+ snprintf (pipename, sizeof pipename, "\\\\.\\pipe\\gnunet-%d-%ld",
+ getpid (), InterlockedIncrement ((LONG *) & pipe_unique_id));
+ /* Use CreateNamedPipe instead of CreatePipe, because the latter
+ * returns a write handle that does not permit FILE_READ_ATTRIBUTES
+ * access, on versions of win32 earlier than WinXP SP2.
+ * CreatePipe also stupidly creates a full duplex pipe, which is
+ * a waste, since only a single direction is actually used.
+ * It's important to only allow a single instance, to ensure that
+ * the pipe was not created earlier by some other process, even if
+ * the pid has been reused. We avoid FILE_FLAG_FIRST_PIPE_INSTANCE
+ * because that is only available for Win2k SP2 and WinXP. */
+ read_pipe = CreateNamedPipeA (pipename, PIPE_ACCESS_INBOUND | dwReadMode,
PIPE_TYPE_BYTE | PIPE_READMODE_BYTE, 1, /* max instances */
+ psize, /* output buffer size */
+ psize, /* input buffer size */
+ NMPWAIT_USE_DEFAULT_WAIT, sa_ptr);
+
+ if (read_pipe != INVALID_HANDLE_VALUE)
{
- i = strlen (plugin->plugin_options) + 1;
- str = plugin->plugin_options;
+ break;
}
- else
+
+ DWORD err = GetLastError ();
+
+ switch (err)
{
- i = 0;
+ case ERROR_PIPE_BUSY:
+ /* The pipe is already open with compatible parameters.
+ * Pick a new name and retry. */
+ continue;
+ case ERROR_ACCESS_DENIED:
+ /* The pipe is already open with incompatible parameters.
+ * Pick a new name and retry. */
+ continue;
+ case ERROR_CALL_NOT_IMPLEMENTED:
+ /* We are on an older Win9x platform without named pipes.
+ * Return an anonymous pipe as the best approximation. */
+ if (CreatePipe (read_pipe_ptr, write_pipe_ptr, sa_ptr, psize))
+ {
+ return 0;
+ }
+ err = GetLastError ();
+ return err;
+ default:
+ return err;
}
- write (fd, &i, sizeof (size_t));
- if (i > 0)
- write (fd, str, i);
+ /* NOTREACHED */
+ }
+
+ /* Open the named pipe for writing.
+ * Be sure to permit FILE_READ_ATTRIBUTES access. */
+ write_pipe = CreateFileA (pipename, GENERIC_WRITE | FILE_READ_ATTRIBUTES, 0,
/* share mode */
+ sa_ptr, OPEN_EXISTING, dwWriteMode, /* flags and
attributes */
+ 0); /* handle to template file */
+
+ if (write_pipe == INVALID_HANDLE_VALUE)
+ {
+ /* Failure. */
+ DWORD err = GetLastError ();
+
+ CloseHandle (read_pipe);
+ return err;
+ }
+
+ /* Success. */
+ *read_pipe_ptr = read_pipe;
+ *write_pipe_ptr = write_pipe;
+ return 0;
}
+static int
+write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size,
unsigned char **old_buf)
+{
+ DWORD written;
+ BOOL bresult;
+ DWORD err;
+
+ if (WAIT_OBJECT_0 != WaitForSingleObject (ov->hEvent, INFINITE))
+ return -1;
+
+ ResetEvent (ov->hEvent);
+
+ if (*old_buf != NULL)
+ free (*old_buf);
+
+ *old_buf = malloc (size);
+ if (*old_buf == NULL)
+ return -1;
+ memcpy (*old_buf, buf, size);
+ written = 0;
+ ov->Offset = 0;
+ ov->OffsetHigh = 0;
+ ov->Pointer = 0;
+ ov->Internal = 0;
+ ov->InternalHigh = 0;
+ bresult = WriteFile (h, *old_buf, size, &written, ov);
+
+ if (bresult == TRUE)
+ {
+ SetEvent (ov->hEvent);
+ free (*old_buf);
+ *old_buf = NULL;
+ return written;
+ }
+
+ err = GetLastError ();
+ if (err == ERROR_IO_PENDING)
+ return size;
+ SetEvent (ov->hEvent);
+ *old_buf = NULL;
+ SetLastError (err);
+ return -1;
+}
+
+static int
+print_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char **buf, const char *fmt,
...)
+{
+ va_list va;
+ va_list vacp;
+ size_t size;
+ char *print_buf;
+ int result;
+
+ va_start (va, fmt);
+ va_copy (vacp, va);
+ size = VSNPRINTF (NULL, 0, fmt, vacp) + 1;
+ va_end (vacp);
+ if (size <= 0)
+ {
+ va_end (va);
+ return size;
+ }
+
+ print_buf = malloc (size);
+ if (print_buf == NULL)
+ return -1;
+ VSNPRINTF (print_buf, size, fmt, va);
+ va_end (va);
+
+ result = write_to_pipe (h, ov, print_buf, size, buf);
+ free (buf);
+ return result;
+}
+
+#define plugin_print(plug, fmt, ...) print_to_pipe (plug->cpipe_in,
&plug->ov_write, &plug->ov_write_buffer, fmt, ...)
+#define plugin_write(plug, buf, size) write_to_pipe (plug->cpipe_in,
&plug->ov_write, buf, size, &plug->ov_write_buffer)
+
+static int
+write_plugin_data (struct EXTRACTOR_PluginList *plugin)
+{
+ size_t libname_len, shortname_len, opts_len;
+ DWORD len;
+ char *str;
+ size_t total_len = 0;
+ unsigned char *buf, *ptr;
+
+ switch (plugin->flags)
+ {
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ break;
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ break;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ return 0;
+ break;
+ case EXTRACTOR_OPTION_DISABLED:
+ return 0;
+ break;
+ }
+
+ libname_len = strlen (plugin->libname) + 1;
+ total_len += sizeof (size_t) + libname_len;
+ shortname_len = strlen (plugin->short_libname) + 1;
+ total_len += sizeof (size_t) + shortname_len;
+ if (plugin->plugin_options != NULL)
+ {
+ opts_len = strlen (plugin->plugin_options) + 1;
+ total_len += opts_len;
+ }
+ else
+ {
+ opts_len = 0;
+ }
+ total_len += sizeof (size_t);
+
+ buf = malloc (total_len);
+ if (buf == NULL)
+ return -1;
+ ptr = buf;
+ memcpy (ptr, &libname_len, sizeof (size_t));
+ ptr += sizeof (size_t);
+ memcpy (ptr, plugin->libname, libname_len);
+ ptr += libname_len;
+ memcpy (ptr, &shortname_len, sizeof (size_t));
+ ptr += sizeof (size_t);
+ memcpy (ptr, plugin->short_libname, shortname_len);
+ ptr += shortname_len;
+ memcpy (ptr, &opts_len, sizeof (size_t));
+ ptr += sizeof (size_t);
+ if (opts_len > 0)
+ {
+ memcpy (ptr, plugin->plugin_options, opts_len);
+ ptr += opts_len;
+ }
+ if (total_len != write_to_pipe (plugin->cpipe_in, &plugin->ov_write, buf,
total_len, &plugin->ov_write_buffer))
+ {
+ free (buf);
+ return -1;
+ }
+ free (buf);
+ return 0;
+}
+
static struct EXTRACTOR_PluginList *
read_plugin_data (int fd)
{
@@ -485,183 +770,112 @@
read (fd, &i, sizeof (size_t));
ret->libname = malloc (i);
if (ret->libname == NULL)
- {
- free (ret);
- return NULL;
- }
+ {
+ free (ret);
+ return NULL;
+ }
read (fd, ret->libname, i);
+ ret->libname[i - 1] = '\0';
read (fd, &i, sizeof (size_t));
ret->short_libname = malloc (i);
if (ret->short_libname == NULL)
- {
- free (ret->libname);
- free (ret);
- return NULL;
- }
+ {
+ free (ret->libname);
+ free (ret);
+ return NULL;
+ }
read (fd, ret->short_libname, i);
+ ret->short_libname[i - 1] = '\0';
read (fd, &i, sizeof (size_t));
if (i == 0)
- {
- ret->plugin_options = NULL;
- }
+ {
+ ret->plugin_options = NULL;
+ }
else
+ {
+ ret->plugin_options = malloc (i);
+ if (ret->plugin_options == NULL)
{
- ret->plugin_options = malloc (i);
- if (ret->plugin_options == NULL)
- {
- free (ret->short_libname);
- free (ret->libname);
- free (ret);
- return NULL;
- }
- read (fd, ret->plugin_options, i);
+ free (ret->short_libname);
+ free (ret->libname);
+ free (ret);
+ return NULL;
}
+ read (fd, ret->plugin_options, i);
+ ret->plugin_options[i - 1] = '\0';
+ }
return ret;
}
-
-void CALLBACK
-RundllEntryPoint (HWND hwnd,
- HINSTANCE hinst,
- LPSTR lpszCmdLine,
- int nCmdShow)
-{
- intptr_t in_h;
- intptr_t out_h;
- int in, out;
-
- sscanf(lpszCmdLine, "%lu %lu", &in_h, &out_h);
- in = _open_osfhandle (in_h, _O_RDONLY);
- out = _open_osfhandle (out_h, 0);
- setmode (in, _O_BINARY);
- setmode (out, _O_BINARY);
- process_requests (read_plugin_data (in),
- in, out);
-}
-
-void CALLBACK
-RundllEntryPointA (HWND hwnd,
- HINSTANCE hinst,
- LPSTR lpszCmdLine,
- int nCmdShow)
-{
- return RundllEntryPoint(hwnd, hinst, lpszCmdLine, nCmdShow);
-}
-#endif
-
-
/**
* Start the process for the given plugin.
*/
static void
start_process (struct EXTRACTOR_PluginList *plugin)
{
-#if !WINDOWS
- int p1[2];
- int p2[2];
- pid_t pid;
- int status;
-
- plugin->cpid = -1;
- if (0 != pipe (p1))
- {
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return;
- }
- if (0 != pipe (p2))
- {
- close (p1[0]);
- close (p1[1]);
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return;
- }
- pid = fork ();
- plugin->cpid = pid;
- if (pid == -1)
- {
- close (p1[0]);
- close (p1[1]);
- close (p2[0]);
- close (p2[1]);
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return;
- }
- if (pid == 0)
- {
- close (p1[1]);
- close (p2[0]);
- process_requests (plugin, p1[0], p2[1]);
- _exit (0);
- }
- close (p1[0]);
- close (p2[1]);
- plugin->cpipe_in = fdopen (p1[1], "w");
- if (plugin->cpipe_in == NULL)
- {
- perror ("fdopen");
- (void) kill (plugin->cpid, SIGKILL);
- waitpid (plugin->cpid, &status, 0);
- close (p1[1]);
- close (p2[0]);
- plugin->cpid = -1;
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return;
- }
- plugin->cpipe_out = p2[0];
-#else
- int p1[2];
- int p2[2];
+ HANDLE p1[2];
+ HANDLE p2[2];
STARTUPINFO startup;
PROCESS_INFORMATION proc;
char cmd[MAX_PATH + 1];
char arg1[10], arg2[10];
- HANDLE p10_os = INVALID_HANDLE_VALUE, p21_os = INVALID_HANDLE_VALUE;
HANDLE p10_os_inh = INVALID_HANDLE_VALUE, p21_os_inh = INVALID_HANDLE_VALUE;
+ SECURITY_ATTRIBUTES sa;
- plugin->hProcess = NULL;
- if (0 != _pipe (p1, 0, _O_BINARY | _O_NOINHERIT))
- {
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
+ switch (plugin->flags)
+ {
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ if (plugin->hProcess != INVALID_HANDLE_VALUE && plugin->hProcess != 0)
return;
- }
- if (0 != _pipe (p2, 0, _O_BINARY | _O_NOINHERIT))
- {
- close (p1[0]);
- close (p1[1]);
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
+ break;
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ if (plugin->hProcess != 0)
return;
- }
+ break;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ return;
+ break;
+ case EXTRACTOR_OPTION_DISABLED:
+ return;
+ break;
+ }
- memset (&startup, 0, sizeof (STARTUPINFO));
+ sa.nLength = sizeof (sa);
+ sa.lpSecurityDescriptor = NULL;
+ sa.bInheritHandle = FALSE;
- p10_os = (HANDLE) _get_osfhandle (p1[0]);
- p21_os = (HANDLE) _get_osfhandle (p2[1]);
+ plugin->hProcess = NULL;
- if (p10_os == INVALID_HANDLE_VALUE || p21_os == INVALID_HANDLE_VALUE)
+ if (0 != create_selectable_pipe (&p1[0], &p1[1], &sa, 1024,
FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED))
{
- close (p1[0]);
- close (p1[1]);
- close (p2[0]);
- close (p2[1]);
plugin->flags = EXTRACTOR_OPTION_DISABLED;
return;
}
+ if (0 != create_selectable_pipe (&p2[0], &p2[1], &sa, 1024,
FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED))
+ {
+ CloseHandle (p1[0]);
+ CloseHandle (p1[1]);
+ plugin->flags = EXTRACTOR_OPTION_DISABLED;
+ return;
+ }
- if (!DuplicateHandle (GetCurrentProcess (), p10_os, GetCurrentProcess (),
+ memset (&startup, 0, sizeof (STARTUPINFO));
+
+ if (!DuplicateHandle (GetCurrentProcess (), p1[0], GetCurrentProcess (),
&p10_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS)
- || !DuplicateHandle (GetCurrentProcess (), p21_os, GetCurrentProcess (),
+ || !DuplicateHandle (GetCurrentProcess (), p2[1], GetCurrentProcess (),
&p21_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS))
{
if (p10_os_inh != INVALID_HANDLE_VALUE)
CloseHandle (p10_os_inh);
if (p21_os_inh != INVALID_HANDLE_VALUE)
CloseHandle (p21_os_inh);
- close (p1[0]);
- close (p1[1]);
- close (p2[0]);
- close (p2[1]);
+ CloseHandle (p1[0]);
+ CloseHandle (p1[1]);
+ CloseHandle (p2[0]);
+ CloseHandle (p2[1]);
plugin->flags = EXTRACTOR_OPTION_DISABLED;
return;
}
@@ -675,190 +889,588 @@
CloseHandle (proc.hThread);
}
else
- {
- close (p1[0]);
- close (p1[1]);
- close (p2[0]);
- close (p2[1]);
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return;
- }
- close (p1[0]);
- close (p2[1]);
+ {
+ CloseHandle (p1[0]);
+ CloseHandle (p1[1]);
+ CloseHandle (p2[0]);
+ CloseHandle (p2[1]);
+ plugin->flags = EXTRACTOR_OPTION_DISABLED;
+ return;
+ }
+ CloseHandle (p1[0]);
+ CloseHandle (p2[1]);
CloseHandle (p10_os_inh);
CloseHandle (p21_os_inh);
- write_plugin_data (p1[1], plugin);
+ plugin->cpipe_in = p1[1];
+ plugin->cpipe_out = p2[0];
- plugin->cpipe_in = fdopen (p1[1], "w");
- if (plugin->cpipe_in == NULL)
+ memset (&plugin->ov_read, 0, sizeof (OVERLAPPED));
+ memset (&plugin->ov_write, 0, sizeof (OVERLAPPED));
+
+ plugin->ov_write_buffer = NULL;
+
+ plugin->ov_write.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL);
+ plugin->ov_read.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL);
+}
+
+/**
+ * Stop the child process of this plugin.
+ */
+static void
+stop_process (struct EXTRACTOR_PluginList *plugin)
+{
+ int status;
+ HANDLE process;
+
+#if DEBUG
+ if (plugin->hProcess == INVALID_HANDLE_VALUE)
+ fprintf (stderr,
+ "Plugin `%s' choked on this input\n",
+ plugin->short_libname);
+#endif
+ if (plugin->hProcess == INVALID_HANDLE_VALUE ||
+ plugin->hProcess == NULL)
+ return;
+ TerminateProcess (plugin->hProcess, 0);
+ CloseHandle (plugin->hProcess);
+ plugin->hProcess = INVALID_HANDLE_VALUE;
+ CloseHandle (plugin->cpipe_out);
+ CloseHandle (plugin->cpipe_in);
+ plugin->cpipe_out = INVALID_HANDLE_VALUE;
+ plugin->cpipe_in = INVALID_HANDLE_VALUE;
+ CloseHandle (plugin->ov_read.hEvent);
+ CloseHandle (plugin->ov_write.hEvent);
+ if (plugin->ov_write_buffer != NULL)
+ {
+ free (plugin->ov_write_buffer);
+ plugin->ov_write_buffer = NULL;
+ }
+
+ if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
+ plugin->flags = EXTRACTOR_OPTION_DISABLED;
+
+ plugin->seek_request = -1;
+}
+
+#endif /* WINDOWS */
+
+/**
+ * Remove a plugin from a list.
+ *
+ * @param prev the current list of plugins
+ * @param library the name of the plugin to remove
+ * @return the reduced list, unchanged if the plugin was not loaded
+ */
+struct EXTRACTOR_PluginList *
+EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev,
+ const char * library)
+{
+ struct EXTRACTOR_PluginList *pos;
+ struct EXTRACTOR_PluginList *first;
+
+ pos = prev;
+ first = prev;
+ while ((pos != NULL) && (0 != strcmp (pos->short_libname, library)))
{
- perror ("fdopen");
- TerminateProcess (plugin->hProcess, 0);
- WaitForSingleObject (plugin->hProcess, INFINITE);
- CloseHandle (plugin->hProcess);
- close (p1[1]);
- close (p2[0]);
- plugin->hProcess = INVALID_HANDLE_VALUE;
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return;
+ prev = pos;
+ pos = pos->next;
}
- plugin->cpipe_out = p2[0];
+ if (pos != NULL)
+ {
+ /* found, close library */
+ if (first == pos)
+ first = pos->next;
+ else
+ prev->next = pos->next;
+ /* found */
+ stop_process (pos);
+ free (pos->short_libname);
+ free (pos->libname);
+ free (pos->plugin_options);
+ if (NULL != pos->libraryHandle)
+ lt_dlclose (pos->libraryHandle);
+ free (pos);
+ }
+#if DEBUG
+ else
+ fprintf(stderr,
+ "Unloading plugin `%s' failed!\n",
+ library);
#endif
+ return first;
}
/**
- * Extract meta data using the given plugin, running the
- * actual code of the plugin out-of-process.
+ * Remove all plugins from the given list (destroys the list).
*
- * @param plugin which plugin to call
- * @param size size of the file mapped by shmfn or tshmfn
- * @param shmfn file name of the shared memory segment
- * @param tshmfn file name of the shared memory segment for the end of the data
- * @param proc function to call on the meta data
+ * @param plugin the list of plugins
+ */
+void
+EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins)
+{
+ while (plugins != NULL)
+ plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname);
+}
+
+
+
+/**
+ * Open a file
+ */
+static int file_open(const char *filename, int oflag, ...)
+{
+ int mode;
+ const char *fn;
+#ifdef MINGW
+ char szFile[_MAX_PATH + 1];
+ long lRet;
+
+ if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS)
+ {
+ errno = ENOENT;
+ SetLastError(lRet);
+ return -1;
+ }
+ fn = szFile;
+#else
+ fn = filename;
+#endif
+ mode = 0;
+#ifdef MINGW
+ /* Set binary mode */
+ mode |= O_BINARY;
+#endif
+ return OPEN(fn, oflag, mode);
+}
+
+#ifndef O_LARGEFILE
+#define O_LARGEFILE 0
+#endif
+
+#if HAVE_ZLIB
+#define MIN_ZLIB_HEADER 12
+#endif
+#if HAVE_LIBBZ2
+#define MIN_BZ2_HEADER 4
+#endif
+#if !defined (MIN_COMPRESSED_HEADER) && HAVE_ZLIB
+#define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER
+#endif
+#if !defined (MIN_COMPRESSED_HEADER) && HAVE_LIBBZ2
+#define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER
+#endif
+#if !defined (MIN_COMPRESSED_HEADER)
+#define MIN_COMPRESSED_HEADER -1
+#endif
+
+#define COMPRESSED_DATA_PROBE_SIZE 3
+
+/**
+ * Try to decompress compressed data
+ *
+ * @param data data to decompress, or NULL (if fd is not -1)
+ * @param fd file to read data from, or -1 (if data is not NULL)
+ * @param fsize size of data (if data is not NULL) or size of fd file (if fd
is not -1)
+ * @param compression_type type of compression, as returned by
get_compression_type ()
+ * @param buffer a pointer to a buffer pointer, buffer pointer is NEVER a NULL
and already has some data (usually - COMPRESSED_DATA_PROBE_SIZE bytes) in it.
+ * @param buffer_size a pointer to buffer size
+ * @param proc callback for metadata
* @param proc_cls cls for proc
- * @return 0 if proc did not return non-zero
+ * @return 0 on success, anything else on error
*/
static int
-extract_oop (struct EXTRACTOR_PluginList *plugin,
- size_t size,
- const char *shmfn,
- const char *tshmfn,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls)
+try_to_decompress (const unsigned char *data, int fd, int64_t fsize, int
compression_type, void **buffer, size_t *buffer_size,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
{
- struct IpcHeader hdr;
- char mimetype[MAX_MIME_LEN + 1];
- char *data;
+ unsigned char *new_buffer;
+ ssize_t read_result;
-#ifndef WINDOWS
- if (plugin->cpid == -1)
-#else
- if (plugin->hProcess == INVALID_HANDLE_VALUE)
+ unsigned char *buf;
+ unsigned char *rbuf;
+ size_t dsize;
+#if HAVE_ZLIB
+ z_stream strm;
+ int ret;
+ size_t pos;
#endif
- return 0;
- if (0 >= fprintf (plugin->cpipe_in,
- "%s\n",
- shmfn))
+#if HAVE_LIBBZ2
+ bz_stream bstrm;
+ int bret;
+ size_t bpos;
+#endif
+
+ if (fd != -1)
+ {
+ if (fsize > *buffer_size)
{
- stop_process (plugin);
-#ifndef WINDOWS
- plugin->cpid = -1;
-#else
- plugin->hProcess = INVALID_HANDLE_VALUE;
-#endif
- if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return 0;
+ /* Read the rest of the file. Can't de-compress it partially anyway */
+ /* Memory mapping is not useful here, because memory mapping ALSO takes
up
+ * memory (even more than a buffer, since it might be aligned), and
+ * because we need to read every byte anyway (lazy on-demand reads into
+ * memory provided by memory mapping won't help).
+ */
+ new_buffer = realloc (*buffer, fsize);
+ if (new_buffer == NULL)
+ {
+ free (*buffer);
+ return -1;
+ }
+ read_result = READ (fd, &new_buffer[*buffer_size], fsize - *buffer_size);
+ if (read_result != fsize - *buffer_size)
+ {
+ free (*buffer);
+ return -1;
+ }
+ *buffer = new_buffer;
+ *buffer_size = fsize;
}
- if (0 >= fprintf (plugin->cpipe_in,
- "!%s\n",
- (tshmfn != NULL) ? tshmfn : ""))
+ data = (const unsigned char *) new_buffer;
+ }
+
+#if HAVE_ZLIB
+ if (compression_type == 1)
+ {
+ /* Process gzip header */
+ unsigned int gzip_header_length = 10;
+
+ if (data[3] & 0x4) /* FEXTRA set */
+ gzip_header_length += 2 + (unsigned) (data[10] & 0xff) +
+ (((unsigned) (data[11] & 0xff)) * 256);
+
+ if (data[3] & 0x8) /* FNAME set */
{
- stop_process (plugin);
-#ifndef WINDOWS
- plugin->cpid = -1;
-#else
- plugin->hProcess = INVALID_HANDLE_VALUE;
-#endif
- if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return 0;
+ const unsigned char *cptr = data + gzip_header_length;
+
+ /* stored file name is here */
+ while ((cptr - data) < fsize)
+ {
+ if ('\0' == *cptr)
+ break;
+ cptr++;
+ }
+
+ if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
+ EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+ (const char *) (data + gzip_header_length),
+ cptr - (data + gzip_header_length)))
+ return 0; /* done */
+
+ gzip_header_length = (cptr - data) + 1;
}
- if (0 >= fprintf (plugin->cpipe_in,
- "s%lu\n",
- size))
+
+ if (data[3] & 0x16) /* FCOMMENT set */
{
- stop_process (plugin);
-#ifndef WINDOWS
- plugin->cpid = -1;
-#else
- plugin->hProcess = INVALID_HANDLE_VALUE;
+ const unsigned char * cptr = data + gzip_header_length;
+
+ /* stored comment is here */
+ while (cptr < data + fsize)
+ {
+ if ('\0' == *cptr)
+ break;
+ cptr ++;
+ }
+
+ if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
+ EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
+ (const char *) (data + gzip_header_length),
+ cptr - (data + gzip_header_length)))
+ return 0; /* done */
+
+ gzip_header_length = (cptr - data) + 1;
+ }
+
+ if (data[3] & 0x2) /* FCHRC set */
+ gzip_header_length += 2;
+
+ memset (&strm, 0, sizeof (z_stream));
+
+#ifdef ZLIB_VERNUM
+ gzip_header_length = 0;
#endif
- if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return 0;
+
+ if (fsize > gzip_header_length)
+ {
+ strm.next_in = (Bytef *) data + gzip_header_length;
+ strm.avail_in = fsize - gzip_header_length;
}
- fflush (plugin->cpipe_in);
- while (1)
+ else
{
- if (0 != read_all (plugin->cpipe_out,
- &hdr,
- sizeof(hdr)))
- {
- stop_process (plugin);
-#ifndef WINDOWS
- plugin->cpid = -1;
+ strm.next_in = (Bytef *) data;
+ strm.avail_in = 0;
+ }
+ strm.total_in = 0;
+ strm.zalloc = NULL;
+ strm.zfree = NULL;
+ strm.opaque = NULL;
+
+ /*
+ * note: maybe plain inflateInit(&strm) is adequate,
+ * it looks more backward-compatible also ;
+ *
+ * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
+ * there might be a better check.
+ */
+ if (Z_OK == inflateInit2 (&strm,
+#ifdef ZLIB_VERNUM
+ 15 + 32
#else
- plugin->hProcess = INVALID_HANDLE_VALUE;
+ -MAX_WBITS
#endif
- if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return 0;
- }
- if ( (hdr.type == 0) &&
- (hdr.format == 0) &&
- (hdr.data_len == 0) &&
- (hdr.mime_len == 0) )
- break;
- if (hdr.mime_len > MAX_MIME_LEN)
- {
- stop_process (plugin);
-#ifndef WINDOWS
- plugin->cpid = -1;
-#else
- plugin->hProcess = INVALID_HANDLE_VALUE;
+ ))
+ {
+ pos = 0;
+ dsize = 2 * fsize;
+ if ( (dsize > MAX_DECOMPRESS) ||
+ (dsize < fsize) )
+ dsize = MAX_DECOMPRESS;
+ buf = malloc (dsize);
+
+ if (buf != NULL)
+ {
+ strm.next_out = (Bytef *) buf;
+ strm.avail_out = dsize;
+
+ do
+ {
+ ret = inflate (&strm, Z_SYNC_FLUSH);
+ if (ret == Z_OK)
+ {
+ if (dsize == MAX_DECOMPRESS)
+ break;
+
+ pos += strm.total_out;
+ strm.total_out = 0;
+ dsize *= 2;
+
+ if (dsize > MAX_DECOMPRESS)
+ dsize = MAX_DECOMPRESS;
+
+ rbuf = realloc (buf, dsize);
+ if (rbuf == NULL)
+ {
+ free (buf);
+ buf = NULL;
+ break;
+ }
+
+ buf = rbuf;
+ strm.next_out = (Bytef *) &buf[pos];
+ strm.avail_out = dsize - pos;
+ }
+ else if (ret != Z_STREAM_END)
+ {
+ /* error */
+ free (buf);
+ buf = NULL;
+ }
+ } while ((buf != NULL) && (ret != Z_STREAM_END));
+
+ dsize = pos + strm.total_out;
+ if ((dsize == 0) && (buf != NULL))
+ {
+ free (buf);
+ buf = NULL;
+ }
+ }
+
+ inflateEnd (&strm);
+
+ if (fd != -1)
+ if (*buffer != NULL)
+ free (*buffer);
+
+ if (buf == NULL)
+ {
+ return -1;
+ }
+ else
+ {
+ *buffer = buf;
+ *buffer_size = dsize;
+ return 0;
+ }
+ }
+ }
#endif
- if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return 0;
- }
- data = malloc (hdr.data_len);
- if (data == NULL)
- {
- stop_process (plugin);
- return 1;
- }
- if ( (0 != (read_all (plugin->cpipe_out,
- mimetype,
- hdr.mime_len))) ||
- (0 != (read_all (plugin->cpipe_out,
- data,
- hdr.data_len))) )
- {
- stop_process (plugin);
-#ifndef WINDOWS
- plugin->cpid = -1;
-#else
- plugin->hProcess = INVALID_HANDLE_VALUE;
+
+#if HAVE_LIBBZ2
+ if (compression_type == 2)
+ {
+ memset(&bstrm, 0, sizeof (bz_stream));
+ bstrm.next_in = (char *) data;
+ bstrm.avail_in = fsize;
+ bstrm.total_in_lo32 = 0;
+ bstrm.total_in_hi32 = 0;
+ bstrm.bzalloc = NULL;
+ bstrm.bzfree = NULL;
+ bstrm.opaque = NULL;
+ if (BZ_OK == BZ2_bzDecompressInit(&bstrm, 0,0))
+ {
+ bpos = 0;
+ dsize = 2 * fsize;
+ if ( (dsize > MAX_DECOMPRESS) || (dsize < fsize) )
+ dsize = MAX_DECOMPRESS;
+ buf = malloc (dsize);
+
+ if (buf != NULL)
+ {
+ bstrm.next_out = (char *) buf;
+ bstrm.avail_out = dsize;
+
+ do
+ {
+ bret = BZ2_bzDecompress (&bstrm);
+ if (bret == Z_OK)
+ {
+ if (dsize == MAX_DECOMPRESS)
+ break;
+ bpos += bstrm.total_out_lo32;
+ bstrm.total_out_lo32 = 0;
+
+ dsize *= 2;
+ if (dsize > MAX_DECOMPRESS)
+ dsize = MAX_DECOMPRESS;
+
+ rbuf = realloc(buf, dsize);
+ if (rbuf == NULL)
+ {
+ free (buf);
+ buf = NULL;
+ break;
+ }
+
+ buf = rbuf;
+ bstrm.next_out = (char*) &buf[bpos];
+ bstrm.avail_out = dsize - bpos;
+ }
+ else if (bret != BZ_STREAM_END)
+ {
+ /* error */
+ free (buf);
+ buf = NULL;
+ }
+ } while ((buf != NULL) && (bret != BZ_STREAM_END));
+
+ dsize = bpos + bstrm.total_out_lo32;
+ if ((dsize == 0) && (buf != NULL))
+ {
+ free (buf);
+ buf = NULL;
+ }
+ }
+
+ BZ2_bzDecompressEnd (&bstrm);
+
+ if (fd != -1)
+ if (*buffer != NULL)
+ free (*buffer);
+
+ if (buf == NULL)
+ {
+ return -1;
+ }
+ else
+ {
+ *buffer = buf;
+ *buffer_size = dsize;
+ return 0;
+ }
+ }
+ }
#endif
- free (data);
- if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
- plugin->flags = EXTRACTOR_OPTION_DISABLED;
- return 0;
- }
- mimetype[hdr.mime_len] = '\0';
- if ( (proc != NULL) &&
- (0 != proc (proc_cls,
- plugin->short_libname,
- hdr.type,
- hdr.format,
- mimetype,
- data,
- hdr.data_len)) )
- proc = NULL;
- free (data);
+ return -1;
+}
+
+/**
+ * Detect if we have compressed data on our hands.
+ *
+ * @param data pointer to a data buffer or NULL (in case fd is not -1)
+ * @param fd a file to read data from, or -1 (if data is not NULL)
+ * @param fsize size of data (if data is not NULL) or of file (if fd is not -1)
+ * @param buffer will receive a pointer to the data that this function read
+ * @param buffer_size will receive size of the buffer
+ * @return -1 to indicate an error, 0 to indicate uncompressed data, or a type
(> 0) of compression
+ */
+static int
+get_compression_type (const unsigned char *data, int fd, int64_t fsize, void
**buffer, size_t *buffer_size)
+{
+ void *read_data = NULL;
+ size_t read_data_size = 0;
+ ssize_t read_result;
+
+ if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER))
+ {
+ *buffer = NULL;
+ return 0;
+ }
+ if (data == NULL)
+ {
+ read_data_size = COMPRESSED_DATA_PROBE_SIZE;
+ read_data = malloc (read_data_size);
+ if (read_data == NULL)
+ return -1;
+ read_result = READ (fd, read_data, read_data_size);
+ if (read_result != read_data_size)
+ {
+ free (read_data);
+ return -1;
}
- if (NULL == proc)
+ *buffer = read_data;
+ *buffer_size = read_data_size;
+ data = (const void *) read_data;
+ }
+#if HAVE_ZLIB
+ if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) &&
(data[2] == 0x08))
return 1;
+#endif
+#if HAVE_LIBBZ2
+ if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') &&
(data[2] == 'h'))
+ return 2;
+#endif
return 0;
-}
+}
+#if WINDOWS
/**
* Setup a shared memory segment.
*
+ * @param ptr set to the location of the map segment
+ * @param map where to store the map handle
+ * @param fn name of the mapping
+ * @param fn_size size available in fn
+ * @param size number of bytes to allocated for the mapping
+ * @return 0 on success
+ */
+static int
+make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size)
+{
+ const char *tpath = "Local\\";
+ snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
+ (unsigned int) RANDOM());
+ *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0,
size, fn);
+ *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size);
+ if (*ptr == NULL)
+ {
+ CloseHandle (*map);
+ return 1;
+ }
+ return 0;
+}
+
+static void
+destroy_shm_w32 (void *ptr, HANDLE map)
+{
+ UnmapViewOfFile (ptr);
+ CloseHandle (map);
+}
+
+#else
+
+/**
+ * Setup a shared memory segment.
+ *
* @param ptr set to the location of the shm segment
* @param shmid where to store the shm ID
* @param fn name of the shared segment
@@ -867,22 +1479,10 @@
* @return 0 on success
*/
static int
-make_shm (int is_tail,
- void **ptr,
-#ifndef WINDOWS
- int *shmid,
-#else
- HANDLE *map,
-#endif
- char *fn,
- size_t fn_size,
- size_t size)
+make_shm_posix (void **ptr, int *shmid, char *fn, size_t fn_size, size_t size)
{
const char *tpath;
-#ifdef WINDOWS
- tpath = "Local\\";
-#elif SOMEBSD
- const char *tpath;
+#if SOMEBSD
/* this works on FreeBSD, not sure about others... */
tpath = getenv ("TMPDIR");
if (tpath == NULL)
@@ -890,578 +1490,606 @@
#else
tpath = "/"; /* Linux */
#endif
- snprintf (fn,
- fn_size,
- "%slibextractor-%sshm-%u-%u",
- tpath,
- (is_tail) ? "t" : "",
- getpid(),
- (unsigned int) RANDOM());
-#ifndef WINDOWS
+ snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
+ (unsigned int) RANDOM());
*shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
*ptr = NULL;
- if (-1 == (*shmid))
- return 1;
- if ( (0 != ftruncate (*shmid, size)) ||
- (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0)))
||
- (*ptr == (void*) -1) )
+ if (-1 == *shmid)
+ return 1;
+ if ((0 != ftruncate (*shmid, size)) ||
+ (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0)))
||
+ (*ptr == (void*) -1) )
+ {
+ close (*shmid);
+ *shmid = -1;
+ shm_unlink (fn);
+ return 1;
+ }
+ return 0;
+}
+
+static void
+destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name)
+{
+ if (NULL != ptr)
+ munmap (ptr, size);
+ if (shm_id != -1)
+ close (shm_id);
+ shm_unlink (shm_name);
+}
+#endif
+
+
+static void
+init_plugin_state (struct EXTRACTOR_PluginList *plugin, char *shm_name,
int64_t fsize)
+{
+ int write_result;
+ int init_state_size;
+ unsigned char *init_state;
+ int t;
+ size_t shm_name_len = strlen (shm_name) + 1;
+ init_state_size = 1 + sizeof (size_t) + shm_name_len + sizeof (int64_t);
+ switch (plugin->flags)
+ {
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ init_state = malloc (init_state_size);
+ if (init_state == NULL)
{
- close (*shmid);
- *shmid = -1;
- shm_unlink (fn);
- return 1;
+ stop_process (plugin);
+ return;
}
- return 0;
-#else
- *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0,
size, fn);
- *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size);
- if (*ptr == NULL)
+ t = 0;
+ init_state[t] = MESSAGE_INIT_STATE;
+ t += 1;
+ memcpy (&init_state[t], &fsize, sizeof (int64_t));
+ t += sizeof (int64_t);
+ memcpy (&init_state[t], &shm_name_len, sizeof (size_t));
+ t += sizeof (size_t);
+ memcpy (&init_state[t], shm_name, shm_name_len);
+ t += shm_name_len;
+ write_result = plugin_write (plugin, init_state, init_state_size);
+ free (init_state);
+ if (write_result < init_state_size)
{
- CloseHandle (*map);
- return 1;
+ stop_process (plugin);
+ return;
}
- return 0;
-#endif
+ plugin->seek_request = 0;
+ break;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ plugin_open_shm (plugin, shm_name);
+ plugin->fsize = fsize;
+ plugin->init_state_method (plugin);
+ plugin->seek_request = 0;
+ return;
+ break;
+ case EXTRACTOR_OPTION_DISABLED:
+ return;
+ break;
+ }
}
+static void
+discard_plugin_state (struct EXTRACTOR_PluginList *plugin)
+{
+ int write_result;
+ unsigned char discard_state = MESSAGE_DISCARD_STATE;
+ switch (plugin->flags)
+ {
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ /* This is somewhat clumsy, but it's the only stop-indicating
+ * non-W32/POSIX-specific field i could think of...
+ */
+ if (plugin->cpipe_out != -1)
+ {
+ write_result = plugin_write (plugin, &discard_state, 1);
+ if (write_result < 1)
+ {
+ stop_process (plugin);
+ return;
+ }
+ }
+ break;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ plugin->discard_state_method (plugin);
+ return;
+ break;
+ case EXTRACTOR_OPTION_DISABLED:
+ return;
+ break;
+ }
+}
-/**
- * Extract keywords using the given set of plugins.
- *
- * @param plugins the list of plugins to use
- * @param data data to process, never NULL
- * @param size number of bytes in data, ignored if data is NULL
- * @param tdata end of file data, or NULL
- * @param tsize number of bytes in tdata
- * @param proc function to call for each meta data item found
- * @param proc_cls cls argument to proc
- */
+static int
+give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position,
size_t map_size)
+{
+ int write_result;
+ int updated_shm_size = 1 + sizeof (int64_t) + sizeof (size_t);
+ unsigned char updated_shm[updated_shm_size];
+ int t = 0;
+ updated_shm[t] = MESSAGE_UPDATED_SHM;
+ t += 1;
+ memcpy (&updated_shm[t], &position, sizeof (int64_t));
+ t += sizeof (int64_t);
+ memcpy (&updated_shm[t], &map_size, sizeof (size_t));
+ t += sizeof (size_t);
+ switch (plugin->flags)
+ {
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ if (plugin->seek_request < 0)
+ return 0;
+ write_result = plugin_write (plugin, updated_shm, updated_shm_size);
+ if (write_result < updated_shm_size)
+ {
+ stop_process (plugin);
+ return 0;
+ }
+ return 1;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ plugin->position = position;
+ plugin->map_size = map_size;
+ return 0;
+ case EXTRACTOR_OPTION_DISABLED:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
static void
-extract (struct EXTRACTOR_PluginList *plugins,
- const char * data,
- size_t size,
- const char * tdata,
- size_t tsize,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls)
+ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position,
void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
{
- struct EXTRACTOR_PluginList *ppos;
- enum EXTRACTOR_Options flags;
- void *ptr;
- void *tptr;
- char fn[255];
- char tfn[255];
- int want_shm;
- int want_tail;
-#ifndef WINDOWS
- int shmid;
- int tshmid;
+ int extract_reply;
+ switch (plugin->flags)
+ {
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ return;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ if (plugin->seek_request >= 0)
+ {
+ plugin->shm_ptr = shm_ptr;
+ extract_reply = plugin->extract_method (plugin, proc, proc_cls);
+ if (extract_reply == 1)
+ plugin->seek_request = -1;
+ }
+ break;
+ case EXTRACTOR_OPTION_DISABLED:
+ return;
+ break;
+ }
+}
+
+#if !WINDOWS
+int
+plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t
size)
+{
+ ssize_t read_result;
+ size_t read_count = 0;
+ while (read_count < size)
+ {
+ read_result = read (plugin->cpipe_out, &buf[read_count], size -
read_count);
+ if (read_result <= 0)
+ return read_result;
+ read_count += read_result;
+ }
+ return read_count;
+}
#else
- HANDLE map;
- HANDLE tmap;
+int
+plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t
size)
+{
+ DWORD bytes_read;
+ BOOL bresult;
+ size_t read_count = 0;
+ while (read_count < size)
+ {
+ bresult = ReadFile (plugin->cpipe_out, &buf[read_count], size -
read_count, &bytes_read, NULL);
+ if (!bresult)
+ return -1;
+ read_count += bytes_read;
+ }
+ return read_count;
+}
#endif
- want_shm = 0;
- ppos = plugins;
- while (NULL != ppos)
- {
- switch (ppos->flags)
- {
- case EXTRACTOR_OPTION_DEFAULT_POLICY:
-#ifndef WINDOWS
- if ( (0 == ppos->cpid) ||
- (-1 == ppos->cpid) )
-#else
- if (ppos->hProcess == NULL || ppos->hProcess == INVALID_HANDLE_VALUE)
-#endif
- start_process (ppos);
- want_shm = 1;
- break;
- case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
-#ifndef WINDOWS
- if (0 == ppos->cpid)
-#else
- if (ppos->hProcess == NULL)
-#endif
- start_process (ppos);
- want_shm = 1;
- break;
- case EXTRACTOR_OPTION_IN_PROCESS:
- break;
- case EXTRACTOR_OPTION_DISABLED:
- break;
- }
- ppos = ppos->next;
+static int
+receive_reply (struct EXTRACTOR_PluginList *plugin,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+ int read_result;
+ unsigned char code;
+ int must_read = 1;
+
+ int64_t seek_position;
+ struct IpcHeader hdr;
+ char *mime_type;
+ char *data;
+
+ while (must_read)
+ {
+ read_result = plugin_read (plugin, &code, 1);
+ if (read_result < 1)
+ return -1;
+ switch (code)
+ {
+ case MESSAGE_DONE: /* Done */
+ plugin->seek_request = -1;
+ must_read = 0;
+ break;
+ case MESSAGE_SEEK: /* Seek */
+ read_result = plugin_read (plugin, (unsigned char *) &seek_position,
sizeof (int64_t));
+ if (read_result < sizeof (int64_t))
+ return -1;
+ plugin->seek_request = seek_position;
+ must_read = 0;
+ break;
+ case MESSAGE_META: /* Meta */
+ read_result = plugin_read (plugin, (unsigned char *) &hdr, sizeof (hdr));
+ if (read_result < sizeof (hdr)) /* FIXME: check hdr for sanity */
+ return -1;
+ mime_type = malloc (hdr.mime_len + 1);
+ if (mime_type == NULL)
+ return -1;
+ read_result = plugin_read (plugin, (unsigned char *) mime_type,
hdr.mime_len);
+ if (read_result < hdr.mime_len)
+ return -1;
+ mime_type[hdr.mime_len] = '\0';
+ data = malloc (hdr.data_len);
+ if (data == NULL)
+ {
+ free (mime_type);
+ return -1;
+ }
+ read_result = plugin_read (plugin, (unsigned char *) data, hdr.data_len);
+ if (read_result < hdr.data_len)
+ {
+ free (mime_type);
+ free (data);
+ return -1;
+ }
+ read_result = proc (proc_cls, plugin->short_libname, hdr.meta_type,
hdr.meta_format, mime_type, data, hdr.data_len);
+ free (mime_type);
+ free (data);
+ if (read_result != 0)
+ return 1;
+ break;
+ default:
+ return -1;
}
- ptr = NULL;
- tptr = NULL;
- if (want_shm)
+ }
+ return 0;
+}
+
+#if !WINDOWS
+static int
+wait_for_reply (struct EXTRACTOR_PluginList *plugins,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+ int ready;
+ int result;
+ struct timeval tv;
+ fd_set to_check;
+ int highest = 0;
+ int read_result;
+ struct EXTRACTOR_PluginList *ppos;
+
+ FD_ZERO (&to_check);
+
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ {
+ switch (ppos->flags)
{
- if (size > MAX_READ)
- size = MAX_READ;
- if (0 == make_shm (0,
- &ptr,
-#ifndef WINDOWS
- &shmid,
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ if (ppos->seek_request == -1)
+ continue;
+ FD_SET (ppos->cpipe_out, &to_check);
+ if (highest < ppos->cpipe_out)
+ highest = ppos->cpipe_out;
+ break;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ break;
+ case EXTRACTOR_OPTION_DISABLED:
+ break;
+ }
+ }
+
+ tv.tv_sec = 10;
+ tv.tv_usec = 0;
+ ready = select (highest + 1, &to_check, NULL, NULL, &tv);
+ if (ready <= 0)
+ /* an error or timeout -> something's wrong or all plugins hung up */
+ return -1;
+
+ result = 0;
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ {
+ switch (ppos->flags)
+ {
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ if (ppos->seek_request == -1)
+ continue;
+ if (FD_ISSET (ppos->cpipe_out, &to_check))
+ {
+ read_result = receive_reply (ppos, proc, proc_cls);
+ if (read_result < 0)
+ {
+ stop_process (ppos);
+ }
+ result += 1;
+ }
+ break;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ break;
+ case EXTRACTOR_OPTION_DISABLED:
+ break;
+ }
+ }
+ return result;
+}
#else
- &map,
-#endif
- fn, sizeof(fn), size))
- {
- memcpy (ptr, data, size);
- if ( (tdata != NULL) &&
- (0 == make_shm (1,
- &tptr,
-#ifndef WINDOWS
- &tshmid,
-#else
- &tmap,
-#endif
- tfn, sizeof(tfn), tsize)) )
- {
- memcpy (tptr, tdata, tsize);
- }
- else
- {
- tptr = NULL;
- }
- }
- else
- {
- want_shm = 0;
- }
+static int
+wait_for_reply (struct EXTRACTOR_PluginList *plugins,
EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+ int result;
+ DWORD ms;
+ DWORD first_ready;
+ DWORD dwresult;
+ DWORD bytes_read;
+ BOOL bresult;
+ int i;
+ HANDLE events[MAXIMUM_WAIT_OBJECTS];
+
+
+ struct EXTRACTOR_PluginList *ppos;
+
+ i = 0;
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ {
+ if (i == MAXIMUM_WAIT_OBJECTS)
+ return -1;
+ if (ppos->seek_request == -1)
+ continue;
+ switch (ppos->flags)
+ {
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ if (WaitForSingleObject (ppos->ov_read.hEvent, 0) == WAIT_OBJECT_0)
+ {
+ ResetEvent (ppos->ov_read.hEvent);
+ bresult = ReadFile (ppos->cpipe_out, &i, 0, &bytes_read,
&ppos->ov_read);
+ if (bresult == TRUE)
+ {
+ SetEvent (ppos->ov_read.hEvent);
+ }
+ else
+ {
+ DWORD err = GetLastError ();
+ if (err != ERROR_IO_PENDING)
+ SetEvent (ppos->ov_read.hEvent);
+ }
+ }
+ events[i] = ppos->ov_read.hEvent;
+ i++;
+ break;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ break;
+ case EXTRACTOR_OPTION_DISABLED:
+ break;
}
- ppos = plugins;
- while (NULL != ppos)
+ }
+
+ ms = 10000;
+ first_ready = WaitForMultipleObjects (i, events, FALSE, ms);
+ if (first_ready == WAIT_TIMEOUT || first_ready == WAIT_FAILED)
+ /* an error or timeout -> something's wrong or all plugins hung up */
+ return -1;
+
+ i = 0;
+ result = 0;
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ {
+ int read_result;
+ switch (ppos->flags)
{
- flags = ppos->flags;
- if (! want_shm)
- flags = EXTRACTOR_OPTION_IN_PROCESS;
- switch (flags)
- {
- case EXTRACTOR_OPTION_DEFAULT_POLICY:
- if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn,
- (tptr != NULL) ? tfn : NULL,
- proc, proc_cls))
- {
- ppos = NULL;
- break;
- }
-#ifndef WINDOWS
- if (ppos->cpid == -1)
-#else
- if (ppos->hProcess == INVALID_HANDLE_VALUE)
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ if (ppos->seek_request == -1)
+ continue;
+ if (i < first_ready)
+ {
+ i += 1;
+ continue;
+ }
+ dwresult = WaitForSingleObject (ppos->ov_read.hEvent, 0);
+ read_result = 0;
+ if (dwresult == WAIT_OBJECT_0)
+ {
+ read_result = receive_reply (ppos, proc, proc_cls);
+ result += 1;
+ }
+ if (dwresult == WAIT_FAILED || read_result < 0)
+ {
+ stop_process (ppos);
+ if (dwresult == WAIT_FAILED)
+ result += 1;
+ }
+ i++;
+ break;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ break;
+ case EXTRACTOR_OPTION_DISABLED:
+ break;
+ }
+ }
+ return result;
+}
+
#endif
- {
- start_process (ppos);
- if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn,
- (tptr != NULL) ? tfn : NULL,
- proc, proc_cls))
- {
- ppos = NULL;
- break;
- }
- }
- break;
- case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
- if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn,
- (tptr != NULL) ? tfn : NULL,
- proc, proc_cls))
- {
- ppos = NULL;
- break;
- }
- break;
- case EXTRACTOR_OPTION_IN_PROCESS:
- want_tail = ( (ppos->specials != NULL) &&
- (NULL != strstr (ppos->specials,
- "want-tail")));
- if (NULL == ppos->extractMethod)
- plugin_load (ppos);
- if ( ( (ppos->specials == NULL) ||
- (NULL == strstr (ppos->specials,
- "oop-only")) ) )
- {
- if (want_tail)
- {
- if ( (NULL != ppos->extractMethod) &&
- (tdata != NULL) &&
- (0 != ppos->extractMethod (tdata,
- tsize,
- proc,
- proc_cls,
- ppos->plugin_options)) )
- {
- ppos = NULL;
- break;
- }
- }
- else
- {
- if ( (NULL != ppos->extractMethod) &&
- (0 != ppos->extractMethod (data,
- size,
- proc,
- proc_cls,
- ppos->plugin_options)) )
- {
- ppos = NULL;
- break;
- }
- }
- }
- break;
- case EXTRACTOR_OPTION_DISABLED:
- break;
- }
- if (ppos == NULL)
- break;
- ppos = ppos->next;
+
+static int64_t
+seek_to_new_position (struct EXTRACTOR_PluginList *plugins, int fd, int64_t
fsize, int64_t current_position)
+{
+ int64_t min_pos = fsize;
+ struct EXTRACTOR_PluginList *ppos;
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ {
+ switch (ppos->flags)
+ {
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ if (ppos->seek_request > 0 && ppos->seek_request >= current_position &&
+ ppos->seek_request <= min_pos)
+ min_pos = ppos->seek_request;
+ break;
+ case EXTRACTOR_OPTION_DISABLED:
+ break;
}
- if (want_shm)
- {
-#ifndef WINDOWS
- if (NULL != ptr)
- munmap (ptr, size);
- if (shmid != -1)
- close (shmid);
- shm_unlink (fn);
- if (NULL != tptr)
- {
- munmap (tptr, tsize);
- shm_unlink (tfn);
- if (tshmid != -1)
- close (tshmid);
- }
+ }
+ if (min_pos >= fsize)
+ return -1;
+#if WINDOWS
+ _lseeki64 (fd, min_pos, SEEK_SET);
+#elif !HAVE_SEEK64
+ lseek64 (fd, min_pos, SEEK_SET);
#else
- UnmapViewOfFile (ptr);
- CloseHandle (map);
- if (tptr != NULL)
- {
- UnmapViewOfFile (tptr);
- CloseHandle (tmap);
- }
+ if (min_pos >= INT_MAX)
+ return -1;
+ lseek (fd, (ssize_t) min_pos, SEEK_SET);
#endif
- }
+ return min_pos;
}
+static void
+load_in_process_plugin (struct EXTRACTOR_PluginList *plugin)
+{
+ switch (plugin->flags)
+ {
+ case EXTRACTOR_OPTION_DEFAULT_POLICY:
+ case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
+ case EXTRACTOR_OPTION_DISABLED:
+ break;
+ case EXTRACTOR_OPTION_IN_PROCESS:
+ plugin_load (plugin);
+ break;
+ }
+}
/**
- * If the given data is compressed using gzip or bzip2, decompress
- * it. Run 'extract' on the decompressed contents (or the original
- * contents if they were not compressed).
+ * Extract keywords using the given set of plugins.
*
* @param plugins the list of plugins to use
- * @param data data to process, never NULL
- * @param size number of bytes in data
- * @param tdata end of file data, or NULL
- * @param tsize number of bytes in tdata
+ * @param data data to process, or NULL if fds is not -1
+ * @param fd file to read data from, or -1 if data is not NULL
+ * @param fsize size of data or size of file
+ * @param buffer a buffer with data alteady read from the file (if fd != -1)
+ * @param buffer_size size of buffer
* @param proc function to call for each meta data item found
* @param proc_cls cls argument to proc
*/
static void
-decompress_and_extract (struct EXTRACTOR_PluginList *plugins,
- const unsigned char * data,
- size_t size,
- const char * tdata,
- size_t tsize,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls) {
- unsigned char * buf;
- unsigned char * rbuf;
- size_t dsize;
-#if HAVE_ZLIB
- z_stream strm;
- int ret;
- size_t pos;
+do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd,
int64_t fsize, void *buffer, size_t buffer_size, EXTRACTOR_MetaDataProcessor
proc, void *proc_cls)
+{
+ int shm_result;
+ unsigned char *shm_ptr;
+#if !WINDOWS
+ int shm_id;
+#else
+ HANDLE map_handle;
#endif
-#if HAVE_LIBBZ2
- bz_stream bstrm;
- int bret;
- size_t bpos;
+ char shm_name[MAX_SHM_NAME + 1];
+
+ struct EXTRACTOR_PluginList *ppos;
+
+ int64_t position = 0;
+ size_t map_size;
+ ssize_t read_result;
+ int kill_plugins = 0;
+
+ map_size = (fd == -1) ? fsize : MAX_READ;
+
+ /* Make a shared memory object. Even if we're running in-process. Simpler
that way */
+#if !WINDOWS
+ shm_result = make_shm_posix ((void **) &shm_ptr, &shm_id, shm_name,
MAX_SHM_NAME,
+ map_size);
+#else
+ shm_result = make_shm_w32 ((void **) &shm_ptr, &map_handle, shm_name,
MAX_SHM_NAME,
+ map_size);
#endif
+ if (shm_result != 0)
+ return;
- buf = NULL;
- dsize = 0;
-#if HAVE_ZLIB
- /* try gzip decompression first */
- if ( (size >= 12) &&
- (data[0] == 0x1f) &&
- (data[1] == 0x8b) &&
- (data[2] == 0x08) )
+ /* This three-loops-instead-of-one construction is intended to increase
parallelism */
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ start_process (ppos);
+
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ load_in_process_plugin (ppos);
+
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ write_plugin_data (ppos);
+
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ init_plugin_state (ppos, shm_name, fsize);
+
+ while (1)
+ {
+ int plugins_not_ready = 0;
+ if (fd != -1)
{
- /* Process gzip header */
- unsigned int gzip_header_length = 10;
-
- if (data[3] & 0x4) /* FEXTRA set */
- gzip_header_length += 2 + (unsigned) (data[10] & 0xff)
- + (((unsigned) (data[11] & 0xff)) * 256);
-
- if (data[3] & 0x8) /* FNAME set */
- {
- const unsigned char * cptr = data + gzip_header_length;
- /* stored file name is here */
- while (cptr < data + size)
- {
- if ('\0' == *cptr)
- break;
- cptr++;
- }
- if (0 != proc (proc_cls,
- "<zlib>",
- EXTRACTOR_METATYPE_FILENAME,
- EXTRACTOR_METAFORMAT_C_STRING,
- "text/plain",
- (const char*) (data + gzip_header_length),
- cptr - (data + gzip_header_length)))
- return; /* done */
- gzip_header_length = (cptr - data) + 1;
- }
- if (data[3] & 0x16) /* FCOMMENT set */
- {
- const unsigned char * cptr = data + gzip_header_length;
- /* stored comment is here */
- while (cptr < data + size)
- {
- if('\0' == *cptr)
- break;
- cptr ++;
- }
- if (0 != proc (proc_cls,
- "<zlib>",
- EXTRACTOR_METATYPE_COMMENT,
- EXTRACTOR_METAFORMAT_C_STRING,
- "text/plain",
- (const char*) (data + gzip_header_length),
- cptr - (data + gzip_header_length)))
- return; /* done */
- gzip_header_length = (cptr - data) + 1;
- }
- if(data[3] & 0x2) /* FCHRC set */
- gzip_header_length += 2;
- memset(&strm,
- 0,
- sizeof(z_stream));
-#ifdef ZLIB_VERNUM
- gzip_header_length = 0;
-#endif
- if (size > gzip_header_length)
- {
- strm.next_in = (Bytef*) data + gzip_header_length;
- strm.avail_in = size - gzip_header_length;
- }
+ /* fill the share buffer with data from the file */
+ if (buffer_size > 0)
+ memcpy (shm_ptr, buffer, buffer_size);
+ read_result = READ (fd, &shm_ptr[buffer_size], MAX_READ - buffer_size);
+ if (read_result <= 0)
+ break;
else
- {
- strm.next_in = (Bytef*) data;
- strm.avail_in = 0;
- }
- strm.total_in = 0;
- strm.zalloc = NULL;
- strm.zfree = NULL;
- strm.opaque = NULL;
-
- /*
- * note: maybe plain inflateInit(&strm) is adequate,
- * it looks more backward-compatible also ;
- *
- * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
- * there might be a better check.
- */
- if (Z_OK == inflateInit2(&strm,
-#ifdef ZLIB_VERNUM
- 15 + 32
-#else
- -MAX_WBITS
-#endif
- )) {
- dsize = 2 * size;
- if (dsize > MAX_DECOMPRESS)
- dsize = MAX_DECOMPRESS;
- buf = malloc(dsize);
- pos = 0;
- if (buf == NULL)
- {
- inflateEnd(&strm);
- }
- else
- {
- strm.next_out = (Bytef*) buf;
- strm.avail_out = dsize;
- do
- {
- ret = inflate(&strm,
- Z_SYNC_FLUSH);
- if (ret == Z_OK)
- {
- if (dsize == MAX_DECOMPRESS)
- break;
- pos += strm.total_out;
- strm.total_out = 0;
- dsize *= 2;
- if (dsize > MAX_DECOMPRESS)
- dsize = MAX_DECOMPRESS;
- rbuf = realloc(buf, dsize);
- if (rbuf == NULL)
- {
- free (buf);
- buf = NULL;
- break;
- }
- buf = rbuf;
- strm.next_out = (Bytef*) &buf[pos];
- strm.avail_out = dsize - pos;
- }
- else if (ret != Z_STREAM_END)
- {
- /* error */
- free(buf);
- buf = NULL;
- }
- } while ( (buf != NULL) &&
- (ret != Z_STREAM_END) );
- dsize = pos + strm.total_out;
- inflateEnd(&strm);
- if ( (dsize == 0) &&
- (buf != NULL) )
- {
- free(buf);
- buf = NULL;
- }
- }
- }
+ map_size = read_result + buffer_size;
+ if (buffer_size > 0)
+ buffer_size = 0;
}
-#endif
-
-#if HAVE_LIBBZ2
- if ( (size >= 4) &&
- (data[0] == 'B') &&
- (data[1] == 'Z') &&
- (data[2] == 'h') )
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ plugins_not_ready += give_shm_to_plugin (ppos, position, map_size);
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ ask_in_process_plugin (ppos, position, shm_ptr, proc, proc_cls);
+ while (plugins_not_ready > 0 && !kill_plugins)
{
- /* now try bz2 decompression */
- memset(&bstrm,
- 0,
- sizeof(bz_stream));
- bstrm.next_in = (char*) data;
- bstrm.avail_in = size;
- bstrm.total_in_lo32 = 0;
- bstrm.total_in_hi32 = 0;
- bstrm.bzalloc = NULL;
- bstrm.bzfree = NULL;
- bstrm.opaque = NULL;
- if ( (buf == NULL) &&
- (BZ_OK == BZ2_bzDecompressInit(&bstrm,
- 0,
- 0)) )
- {
- dsize = 2 * size;
- if (dsize > MAX_DECOMPRESS)
- dsize = MAX_DECOMPRESS;
- buf = malloc(dsize);
- bpos = 0;
- if (buf == NULL)
- {
- BZ2_bzDecompressEnd(&bstrm);
- }
- else
- {
- bstrm.next_out = (char*) buf;
- bstrm.avail_out = dsize;
- do {
- bret = BZ2_bzDecompress(&bstrm);
- if (bret == Z_OK)
- {
- if (dsize == MAX_DECOMPRESS)
- break;
- bpos += bstrm.total_out_lo32;
- bstrm.total_out_lo32 = 0;
- dsize *= 2;
- if (dsize > MAX_DECOMPRESS)
- dsize = MAX_DECOMPRESS;
- rbuf = realloc(buf, dsize);
- if (rbuf == NULL)
- {
- free (buf);
- buf = NULL;
- break;
- }
- buf = rbuf;
- bstrm.next_out = (char*) &buf[bpos];
- bstrm.avail_out = dsize - bpos;
- }
- else if (bret != BZ_STREAM_END)
- {
- /* error */
- free(buf);
- buf = NULL;
- }
- } while ( (buf != NULL) &&
- (bret != BZ_STREAM_END) );
- dsize = bpos + bstrm.total_out_lo32;
- BZ2_bzDecompressEnd(&bstrm);
- if ( (dsize == 0) &&
- (buf != NULL) )
- {
- free(buf);
- buf = NULL;
- }
- }
- }
+ int ready = wait_for_reply (plugins, proc, proc_cls);
+ if (ready <= 0)
+ kill_plugins = 1;
+ plugins_not_ready -= ready;
}
-#endif
- if (buf != NULL)
+ if (kill_plugins)
+ break;
+ if (fd != -1)
{
- data = buf;
- size = dsize;
+ position += map_size;
+ position = seek_to_new_position (plugins, fd, fsize, position);
+ if (position < 0)
+ break;
}
- extract (plugins,
- (const char*) data,
- size,
- tdata,
- tsize,
- proc,
- proc_cls);
- if (buf != NULL)
- free(buf);
- errno = 0; /* kill transient errors */
-}
+ else
+ break;
+ }
+ if (kill_plugins)
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ stop_process (ppos);
+ for (ppos = plugins; NULL != ppos; ppos = ppos->next)
+ discard_plugin_state (ppos);
-/**
- * Open a file
- */
-static int file_open(const char *filename, int oflag, ...)
-{
- int mode;
- const char *fn;
-#ifdef MINGW
- char szFile[_MAX_PATH + 1];
- long lRet;
-
- if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS)
- {
- errno = ENOENT;
- SetLastError(lRet);
- return -1;
- }
- fn = szFile;
+#if WINDOWS
+ destroy_shm_w32 (shm_ptr, map_handle);
#else
- fn = filename;
+ destroy_shm_posix (shm_ptr, shm_id, (fd == -1) ? fsize : MAX_READ, shm_name);
#endif
- mode = 0;
-#ifdef MINGW
- /* Set binary mode */
- mode |= O_BINARY;
-#endif
- return OPEN(fn, oflag, mode);
}
-#ifndef O_LARGEFILE
-#define O_LARGEFILE 0
-#endif
-
-
/**
* Extract keywords from a file using the given set of plugins.
* If needed, opens the file and loads its data (via mmap). Then
@@ -1478,93 +2106,152 @@
*/
void
EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins,
- const char *filename,
- const void *data,
- size_t size,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls)
+ const char *filename,
+ const void *data,
+ size_t size,
+ EXTRACTOR_MetaDataProcessor proc,
+ void *proc_cls)
{
- int fd;
- void * buffer;
- void * tbuffer;
- struct stat fstatbuf;
- size_t fsize;
- size_t tsize;
- int eno;
- off_t offset;
- long pg;
-#ifdef WINDOWS
- SYSTEM_INFO sys;
-#endif
+ int fd = -1;
+ struct stat64 fstatbuf;
+ int64_t fsize = 0;
+ int memory_only = 1;
+ int compression_type = -1;
+ void *buffer = NULL;
+ size_t buffer_size;
+ int decompression_result;
- fd = -1;
- buffer = NULL;
- if ( (data == NULL) &&
- (filename != NULL) &&
- (0 == STAT(filename, &fstatbuf)) &&
- (!S_ISDIR(fstatbuf.st_mode)) &&
- (-1 != (fd = file_open (filename,
- O_RDONLY | O_LARGEFILE))) )
- {
- fsize = (fstatbuf.st_size > 0xFFFFFFFF) ? 0xFFFFFFFF : fstatbuf.st_size;
- if (fsize == 0)
- {
- close(fd);
- return;
- }
- if (fsize > MAX_READ)
- fsize = MAX_READ;
- buffer = MMAP(NULL, fsize, PROT_READ, MAP_PRIVATE, fd, 0);
- if ( (buffer == NULL) || (buffer == (void *) -1) )
- {
- eno = errno;
- close(fd);
- errno = eno;
- return;
- }
+ /* If data is not given, then we need to read it from the file. Try opening
it */
+ if ((data == NULL) &&
+ (filename != NULL) &&
+ (0 == STAT64(filename, &fstatbuf)) &&
+ (!S_ISDIR(fstatbuf.st_mode)) &&
+ (-1 != (fd = file_open (filename,
+ O_RDONLY | O_LARGEFILE))))
+ {
+ /* Empty files are of no interest */
+ fsize = fstatbuf.st_size;
+ if (fsize == 0)
+ {
+ close(fd);
+ return;
}
- if ( (buffer == NULL) &&
- (data == NULL) )
+ /* File is too big -> can't read it into memory */
+ if (fsize > MAX_READ)
+ memory_only = 0;
+ }
+
+ /* Data is not given, and we've failed to open the file with data -> exit */
+ if ((fsize == 0) && (data == NULL))
return;
- /* for footer extraction */
- tsize = 0;
- tbuffer = NULL;
- if ( (data == NULL) &&
- (fstatbuf.st_size > fsize) &&
- (fstatbuf.st_size > MAX_READ) )
+ /* fsize is now size of the data OR size of the file */
+ if (data != NULL)
+ fsize = size;
+
+ errno = 0;
+ /* Peek at first few bytes of the file (or of the data), and see if it's
compressed.
+ * If data is NULL, buffer is allocated by the function and holds the first
few bytes
+ * of the file, buffer_size is set too.
+ */
+ compression_type = get_compression_type (data, fd, fsize, &buffer,
&buffer_size);
+ if (compression_type < 0)
+ {
+ /* errno is set by get_compression_type () */
+ if (fd != -1)
+ close (fd);
+ return;
+ }
+ if (compression_type > 0)
+ {
+ /* Don't assume that MAX_DECOMPRESS < MAX_READ */
+ if ((fsize > MAX_DECOMPRESS) || (fsize > MAX_READ))
{
- pg = SYSCONF (_SC_PAGE_SIZE);
- if ( (pg > 0) &&
- (pg < MAX_READ) )
- {
- offset = (1 + (fstatbuf.st_size - MAX_READ) / pg) * pg;
- if (offset < fstatbuf.st_size)
- {
- tsize = fstatbuf.st_size - offset;
- tbuffer = MMAP (NULL, tsize, PROT_READ, MAP_PRIVATE, fd, offset);
- if ( (tbuffer == NULL) || (tbuffer == (void *) -1) )
- {
- tsize = 0;
- tbuffer = NULL;
- }
- }
- }
+ /* File or data is to big to be decompressed in-memory (the only kind of
decompression we do) */
+ errno = EFBIG;
+ if (fd != -1)
+ close (fd);
+ if (buffer != NULL)
+ free (buffer);
+ return;
}
- decompress_and_extract (plugins,
- buffer != NULL ? buffer : data,
- buffer != NULL ? fsize : size,
- tbuffer,
- tsize,
- proc,
- proc_cls);
+ /* Decompress data (or file contents + what we've read so far. Either way
it writes a new
+ * pointer to buffer, sets buffer_size, and frees the old buffer (if it
wasn't NULL).
+ * In case of failure it cleans up the buffer after itself.
+ * Will also report compression-related metadata to the caller.
+ */
+ decompression_result = try_to_decompress (data, fd, fsize,
compression_type, &buffer, &buffer_size, proc, proc_cls);
+ if (decompression_result != 0)
+ {
+ /* Buffer is taken care of already */
+ close (fd);
+ errno = EILSEQ;
+ return;
+ }
+ else
+ {
+ close (fd);
+ fd = -1;
+ }
+ }
+
+ /* Now we either have a non-NULL data of fsize bytes
+ * OR a valid fd to read from and a small buffer of buffer_size bytes
+ * OR an invalid fd and a big buffer of buffer_size bytes
+ * Simplify this situation a bit:
+ */
+ if ((data == NULL) && (fd == -1) && (buffer_size > 0))
+ {
+ data = (const void *) buffer;
+ fsize = buffer_size;
+ }
+
+ /* Now we either have a non-NULL data of fsize bytes
+ * OR a valid fd to read from and a small buffer of buffer_size bytes
+ * and we might need to free the buffer later in either case
+ */
+
+ /* do_extract () might set errno itself, but from our point of view
everything is OK */
+ errno = 0;
+
+ do_extract (plugins, data, fd, fsize, buffer, buffer_size, proc, proc_cls);
+
if (buffer != NULL)
- MUNMAP (buffer, fsize);
- if (tbuffer != NULL)
- MUNMAP (tbuffer, tsize);
+ free (buffer);
if (-1 != fd)
- close(fd);
+ close(fd);
}
+
+#if WINDOWS
+void CALLBACK
+RundllEntryPoint (HWND hwnd,
+ HINSTANCE hinst,
+ LPSTR lpszCmdLine,
+ int nCmdShow)
+{
+ intptr_t in_h;
+ intptr_t out_h;
+ int in, out;
+
+ sscanf(lpszCmdLine, "%lu %lu", &in_h, &out_h);
+ in = _open_osfhandle (in_h, _O_RDONLY);
+ out = _open_osfhandle (out_h, 0);
+ setmode (in, _O_BINARY);
+ setmode (out, _O_BINARY);
+ process_requests (read_plugin_data (in),
+ in, out);
+}
+
+void CALLBACK
+RundllEntryPointA (HWND hwnd,
+ HINSTANCE hinst,
+ LPSTR lpszCmdLine,
+ int nCmdShow)
+{
+ return RundllEntryPoint(hwnd, hinst, lpszCmdLine, nCmdShow);
+}
+#endif
+
/**
* Initialize gettext and libltdl (and W32 if needed).
*/
@@ -1579,12 +2266,12 @@
if (err > 0) {
#if DEBUG
fprintf(stderr,
- _("Initialization of plugin mechanism failed: %s!\n"),
- lt_dlerror());
+ _("Initialization of plugin mechanism failed: %s!\n"),
+ lt_dlerror());
#endif
return;
}
-#ifdef MINGW
+#if WINDOWS
plibc_init("GNU", PACKAGE);
#endif
}
@@ -1594,12 +2281,10 @@
* Deinit.
*/
void __attribute__ ((destructor)) EXTRACTOR_ltdl_fini() {
-#ifdef MINGW
+#if WINDOWS
plibc_shutdown();
#endif
lt_dlexit ();
}
-
-
/* end of extractor.c */
Modified: Extractor/src/main/extractor_plugins.c
===================================================================
--- Extractor/src/main/extractor_plugins.c 2012-03-27 12:46:29 UTC (rev
20782)
+++ Extractor/src/main/extractor_plugins.c 2012-03-27 13:05:17 UTC (rev
20783)
@@ -204,15 +204,24 @@
plugin->flags = EXTRACTOR_OPTION_DISABLED;
return -1;
}
- plugin->extractMethod = get_symbol_with_prefix (plugin->libraryHandle,
- "_EXTRACTOR_%s_extract",
+ plugin->extract_method = get_symbol_with_prefix (plugin->libraryHandle,
+
"_EXTRACTOR_%s_extract_method",
plugin->libname,
&plugin->specials);
- if (plugin->extractMethod == NULL)
+ plugin->init_state_method = get_symbol_with_prefix (plugin->libraryHandle,
+
"_EXTRACTOR_%s_init_state_method",
+ plugin->libname,
+ &plugin->specials);
+ plugin->discard_state_method = get_symbol_with_prefix (plugin->libraryHandle,
+
"_EXTRACTOR_%s_discard_state_method",
+ plugin->libname,
+ &plugin->specials);
+ if (plugin->extract_method == NULL || plugin->init_state_method == NULL ||
+ plugin->discard_state_method == NULL)
{
#if DEBUG
fprintf (stderr,
- "Resolving `extract' method of plugin `%s' failed: %s\n",
+ "Resolving `extract', 'init_state' or 'discard_state' method(s)
of plugin `%s' failed: %s\n",
plugin->short_libname,
lt_dlerror ());
#endif
@@ -243,8 +252,15 @@
enum EXTRACTOR_Options flags)
{
struct EXTRACTOR_PluginList *result;
+ struct EXTRACTOR_PluginList *i;
char *libname;
+ for (i = prev; i != NULL; i = i->next)
+ {
+ if (strcmp (i->short_libname, library) == 0)
+ return prev;
+ }
+
libname = find_plugin (library);
if (libname == NULL)
{
Modified: Extractor/src/main/extractor_plugins.h
===================================================================
--- Extractor/src/main/extractor_plugins.h 2012-03-27 12:46:29 UTC (rev
20782)
+++ Extractor/src/main/extractor_plugins.h 2012-03-27 13:05:17 UTC (rev
20783)
@@ -64,7 +64,9 @@
/**
* Pointer to the function used for meta data extraction.
*/
- EXTRACTOR_ExtractMethod extractMethod;
+ EXTRACTOR_extract_method extract_method;
+ EXTRACTOR_init_state_method init_state_method;
+ EXTRACTOR_discard_state_method discard_state_method;
/**
* Options for the plugin.
@@ -84,26 +86,72 @@
enum EXTRACTOR_Options flags;
/**
- * Process ID of the child process for this plugin. 0 for
- * none.
+ * Process ID of the child process for this plugin. 0 for none.
*/
-#ifndef WINDOWS
+#if !WINDOWS
int cpid;
#else
HANDLE hProcess;
#endif
/**
- * Pipe used to send information about shared memory segments to
- * the child process. NULL if not initialized.
+ * Pipe used to communicate information to the plugin child process.
+ * NULL if not initialized.
*/
+#if !WINDOWS
FILE *cpipe_in;
+#else
+ HANDLE cpipe_in;
+#endif
/**
+ * A position this plugin wants us to seek to. -1 if it's finished.
+ * Starts at 0;
+ */
+ int64_t seek_request;
+
+#if !WINDOWS
+ int shm_id;
+#else
+ HANDLE map_handle;
+#endif
+
+ void *state;
+
+ int64_t fsize;
+
+ int64_t position;
+
+ unsigned char *shm_ptr;
+
+ size_t map_size;
+
+ /**
* Pipe used to read information about extracted meta data from
- * the child process. -1 if not initialized.
+ * the plugin child process. -1 if not initialized.
*/
+#if !WINDOWS
int cpipe_out;
+#else
+ HANDLE cpipe_out;
+#endif
+
+#if WINDOWS
+ /**
+ * A structure for overlapped reads on W32.
+ */
+ OVERLAPPED ov_read;
+
+ /**
+ * A structure for overlapped writes on W32.
+ */
+ OVERLAPPED ov_write;
+
+ /**
+ * A write buffer for overlapped writes on W32
+ */
+ unsigned char *ov_write_buffer;
+#endif
};
/**
Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am 2012-03-27 12:46:29 UTC (rev 20782)
+++ Extractor/src/plugins/Makefile.am 2012-03-27 13:05:17 UTC (rev 20783)
@@ -1,4 +1,4 @@
-INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common
+INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common
-I$(top_srcdir)/src/main
# install plugins under:
plugindir = $(libdir)/@RPLUGINDIR@
@@ -11,184 +11,24 @@
SUBDIRS = .
-if HAVE_FFMPEG
- thumbffmpeg=libextractor_thumbnailffmpeg.la
-endif
-
-if HAVE_LIBRPM
- rpm=libextractor_rpm.la
-endif
-
-if HAVE_GLIB
-if WITH_GSF
- ole2=libextractor_ole2.la
-endif
-if HAVE_GTK
- thumbgtk=libextractor_thumbnailgtk.la
-endif
-endif
-
-if HAVE_QT
- thumbqt=libextractor_thumbnailqt.la
- qtflags=-lQtGui -lQtCore -lpthread
-else
-if HAVE_QT4
- thumbqt=libextractor_thumbnailqt.la
- qtflags=-lQtGui4 -lQtCore4
-endif
-endif
-
-if HAVE_QT_SVG
- svgflags = -lQtSvg
-else
-if HAVE_QT_SVG4
- svgflags = -lQtSvg4
-endif
-endif
-
-if HAVE_CXX
-if HAVE_EXIV2
- exiv2=libextractor_exiv2.la
-endif
-if HAVE_POPPLER
- pdf=libextractor_pdf.la
-endif
-endif
-
-if HAVE_MPEG2
- mpeg = libextractor_mpeg.la
-endif
-
-if HAVE_VORBISFILE
- ogg = libextractor_ogg.la
-endif
-
-if HAVE_FLAC
- flac = libextractor_flac.la
-endif
-
-if NEED_VORBIS
- vorbisflag = -lvorbis
-endif
-
-if NEED_OGG
- flacoggflag = -logg
-endif
-
plugin_LTLIBRARIES = \
- libextractor_applefile.la \
- libextractor_asf.la \
- libextractor_deb.la \
- libextractor_dvi.la \
- libextractor_elf.la \
- $(exiv2) \
- $(flac) \
- libextractor_flv.la \
- libextractor_gif.la \
- libextractor_html.la \
libextractor_id3.la \
libextractor_id3v2.la \
- libextractor_id3v23.la \
- libextractor_id3v24.la \
- libextractor_it.la \
- libextractor_jpeg.la \
- libextractor_man.la \
- libextractor_mime.la \
- libextractor_mkv.la \
- libextractor_mp3.la \
- $(mpeg) \
- libextractor_nsf.la \
- libextractor_nsfe.la \
- libextractor_odf.la \
- $(ogg) \
- $(ole2) \
- $(pdf) \
- libextractor_png.la \
- libextractor_ps.la \
- libextractor_qt.la \
- libextractor_real.la \
- libextractor_riff.la \
- $(rpm) \
- libextractor_s3m.la \
- libextractor_sid.la \
- libextractor_tar.la \
- $(thumbgtk) \
- $(thumbqt) \
- $(thumbffmpeg) \
- libextractor_tiff.la \
- libextractor_wav.la \
- libextractor_xm.la \
- libextractor_zip.la
+ libextractor_mp3.la
-libextractor_applefile_la_SOURCES = \
- applefile_extractor.c
-libextractor_applefile_la_LDFLAGS = \
+libextractor_mp3_la_SOURCES = \
+ mp3_extractor.c
+libextractor_mp3_la_LDFLAGS = \
$(PLUGINFLAGS)
-libextractor_applefile_la_LIBADD = \
+libextractor_mp3_la_LIBADD = \
$(top_builddir)/src/common/libextractor_common.la \
$(LE_LIBINTL)
-libextractor_asf_la_SOURCES = \
- asf_extractor.c
-libextractor_asf_la_LDFLAGS = \
- $(top_builddir)/src/common/libextractor_common.la \
+libextractor_ebml_la_SOURCES = \
+ ebml_extractor.c
+libextractor_ebml_la_LDFLAGS = \
$(PLUGINFLAGS)
-libextractor_deb_la_SOURCES = \
- deb_extractor.c
-libextractor_deb_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_deb_la_LIBADD = \
- -lz
-
-libextractor_dvi_la_SOURCES = \
- dvi_extractor.c
-libextractor_dvi_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_elf_la_SOURCES = \
- elf_extractor.c
-libextractor_elf_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_elf_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_exiv2_la_SOURCES = \
- exiv2_extractor.cc
-libextractor_exiv2_la_LDFLAGS = \
- $(XTRA_CPPLIBS) $(PLUGINFLAGS)
-libextractor_exiv2_la_LIBADD = \
- -lexiv2
-
-libextractor_flac_la_SOURCES = \
- flac_extractor.c
-libextractor_flac_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_flac_la_LIBADD = \
- -lFLAC $(flacoggflag) \
- $(LE_LIBINTL)
-
-libextractor_flv_la_SOURCES = \
- flv_extractor.c
-libextractor_flv_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_flv_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_gif_la_SOURCES = \
- gif_extractor.c
-libextractor_gif_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_gif_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_html_la_SOURCES = \
- html_extractor.c
-libextractor_html_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_html_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la
-
libextractor_id3_la_SOURCES = \
id3_extractor.c
libextractor_id3_la_LDFLAGS = \
@@ -204,211 +44,4 @@
libextractor_id3v2_la_LIBADD = \
$(top_builddir)/src/common/libextractor_common.la
-libextractor_id3v23_la_SOURCES = \
- id3v23_extractor.c
-libextractor_id3v23_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_id3v23_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_id3v24_la_SOURCES = \
- id3v24_extractor.c
-libextractor_id3v24_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_id3v24_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_it_la_SOURCES = \
- it_extractor.c
-libextractor_it_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_jpeg_la_SOURCES = \
- jpeg_extractor.c
-libextractor_jpeg_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_jpeg_la_LIBADD = \
- $(LE_LIBINTL)
-
-libextractor_man_la_SOURCES = \
- man_extractor.c
-libextractor_man_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_man_la_LIBADD = \
- $(LE_LIBINTL)
-
-libextractor_mime_la_SOURCES = \
- mime_extractor.c
-libextractor_mime_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_mkv_la_SOURCES = \
- mkv_extractor.c
-libextractor_mkv_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_mp3_la_SOURCES = \
- mp3_extractor.c
-libextractor_mp3_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_mp3_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la \
- $(LE_LIBINTL)
-
-libextractor_mpeg_la_SOURCES = \
- mpeg_extractor.c
-libextractor_mpeg_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_mpeg_la_LIBADD = \
- -lmpeg2
-
-libextractor_nsf_la_SOURCES = \
- nsf_extractor.c
-libextractor_nsf_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_nsfe_la_SOURCES = \
- nsfe_extractor.c
-libextractor_nsfe_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_odf_la_SOURCES = \
- odf_extractor.c
-libextractor_odf_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_odf_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la \
- -lz
-
-libextractor_ogg_la_SOURCES = \
- ogg_extractor.c
-libextractor_ogg_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_ogg_la_LIBADD = \
- -lvorbisfile $(vorbisflag) -logg
-
-libextractor_ole2_la_SOURCES = \
- ole2_extractor.c
-libextractor_ole2_la_CFLAGS = \
- $(GSF_CFLAGS)
-libextractor_ole2_la_LIBADD = \
- $(LIBADD) $(GSF_LIBS) \
- $(top_builddir)/src/common/libextractor_common.la
-libextractor_ole2_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_pdf_la_SOURCES = \
- pdf_extractor.cc
-libextractor_pdf_la_LDFLAGS = \
- $(XTRA_CPPLIBS) $(PLUGINFLAGS)
-libextractor_pdf_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la \
- -lpoppler
-
-libextractor_png_la_SOURCES = \
- png_extractor.c
-libextractor_png_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_png_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la \
- -lz
-
-libextractor_ps_la_SOURCES = \
- ps_extractor.c
-libextractor_ps_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_qt_la_SOURCES = \
- qt_extractor.c
-libextractor_qt_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_qt_la_LIBADD = \
- -lz -lm
-
-libextractor_real_la_SOURCES = \
- real_extractor.c
-libextractor_real_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_riff_la_SOURCES = \
- riff_extractor.c
-libextractor_riff_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_riff_la_LIBADD = \
- $(LE_LIBINTL) \
- -lm
-
-libextractor_rpm_la_SOURCES = \
- rpm_extractor.c
-libextractor_rpm_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_rpm_la_LIBADD = \
- -lrpm
-
-libextractor_s3m_la_SOURCES = \
- s3m_extractor.c
-libextractor_s3m_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_sid_la_SOURCES = \
- sid_extractor.c
-libextractor_sid_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_tar_la_SOURCES = \
- tar_extractor.c
-libextractor_tar_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_thumbnailffmpeg_la_SOURCES = \
- thumbnailffmpeg_extractor.c
-libextractor_thumbnailffmpeg_la_LIBADD = \
- -lavformat -lavcodec -lswscale -lavutil -lz -lbz2
-libextractor_thumbnailffmpeg_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_thumbnailgtk_la_CFLAGS = \
- $(GLIB_CFLAGS) $(GTK_CFLAGS)
-libextractor_thumbnailgtk_la_LIBADD = \
- $(LIBADD) -lgobject-2.0 @GTK_LIBS@
-libextractor_thumbnailgtk_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_thumbnailgtk_la_SOURCES = \
- thumbnailgtk_extractor.c
-
-libextractor_thumbnailqt_la_SOURCES = \
- thumbnailqt_extractor.cc
-libextractor_thumbnailqt_la_LDFLAGS = \
- $(QT_LDFLAGS) \
- $(PLUGINFLAGS)
-libextractor_thumbnailqt_la_LIBADD = \
- $(qtflags) $(svgflags)
-libextractor_thumbnailqt_la_CPPFLAGS = \
- $(QT_CPPFLAGS) \
- $(QT_CFLAGS) $(QT_SVG_CFLAGS)
-
-libextractor_tiff_la_SOURCES = \
- tiff_extractor.c
-libextractor_tiff_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_tiff_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la
-
-libextractor_wav_la_SOURCES = \
- wav_extractor.c
-libextractor_wav_la_LDFLAGS = \
- $(PLUGINFLAGS)
-libextractor_wav_la_LIBADD = \
- $(LE_LIBINTL)
-
-libextractor_xm_la_SOURCES = \
- xm_extractor.c
-libextractor_xm_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
-libextractor_zip_la_SOURCES = \
- zip_extractor.c
-libextractor_zip_la_LDFLAGS = \
- $(PLUGINFLAGS)
-
EXTRA_DIST = template_extractor.c
Modified: Extractor/src/plugins/id3_extractor.c
===================================================================
--- Extractor/src/plugins/id3_extractor.c 2012-03-27 12:46:29 UTC (rev
20782)
+++ Extractor/src/plugins/id3_extractor.c 2012-03-27 13:05:17 UTC (rev
20783)
@@ -29,6 +29,8 @@
#include <unistd.h>
#include <stdlib.h>
+#include "extractor_plugins.h"
+
typedef struct
{
char *title;
@@ -199,6 +201,46 @@
#define OK 0
#define INVALID_ID3 1
+struct id3_state
+{
+ int state;
+ id3tag info;
+};
+
+enum ID3State
+{
+ ID3_INVALID = -1,
+ ID3_SEEKING_TO_TAIL = 0,
+ ID3_READING_TAIL = 1
+};
+
+void
+EXTRACTOR_id3_init_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+ struct id3_state *state;
+ state = plugin->state = malloc (sizeof (struct id3_state));
+ if (state == NULL)
+ return;
+ memset (state, 0, sizeof (struct id3_state));
+ state->state = ID3_SEEKING_TO_TAIL;
+}
+
+void
+EXTRACTOR_id3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+ struct id3_state *state = plugin->state;
+ if (state != NULL)
+ {
+ if (state->info.title != NULL) free (state->info.title);
+ if (state->info.year != NULL) free (state->info.year);
+ if (state->info.album != NULL) free (state->info.album);
+ if (state->info.artist != NULL) free (state->info.artist);
+ if (state->info.comment != NULL) free (state->info.comment);
+ free (state);
+ }
+ plugin->state = NULL;
+}
+
static void
trim (char *k)
{
@@ -209,14 +251,14 @@
}
static int
-get_id3 (const char *data, size_t size, id3tag * id3)
+get_id3 (const char *data, int64_t offset, int64_t size, id3tag *id3)
{
const char *pos;
if (size < 128)
return INVALID_ID3;
- pos = &data[size - 128];
+ pos = &data[offset];
if (0 != strncmp ("TAG", pos, 3))
return INVALID_ID3;
pos += 3;
@@ -253,49 +295,82 @@
}
-#define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != (ret = proc
(proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s,
strlen(s)+1)))) goto FINISH; } while (0)
+#define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != proc
(proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)))
return 1; } while (0)
-const char *
-EXTRACTOR_id3_options ()
+int
+EXTRACTOR_id3_extract_method (struct EXTRACTOR_PluginList *plugin,
+ EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
{
- return "want-tail";
-}
+ int64_t file_position;
+ int64_t file_size;
+ int64_t offset = 0;
+ int64_t size;
+ struct id3_state *state;
+ char *data;
+
+ char track[16];
+ if (plugin == NULL || plugin->state == NULL)
+ return 1;
-int
-EXTRACTOR_id3_extract (const char *data,
- size_t size,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls,
- const char *options)
-{
- id3tag info;
- char track[16];
- int ret;
+ state = plugin->state;
+ file_position = plugin->position;
+ file_size = plugin->fsize;
+ size = plugin->map_size;
+ data = (char *) plugin->shm_ptr;
- ret = 0;
- if (OK != get_id3 (data, size, &info))
- return 0;
- ADD (info.title, EXTRACTOR_METATYPE_TITLE);
- ADD (info.artist, EXTRACTOR_METATYPE_ARTIST);
- ADD (info.album, EXTRACTOR_METATYPE_ALBUM);
- ADD (info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
- ADD (info.genre, EXTRACTOR_METATYPE_GENRE);
- ADD (info.comment, EXTRACTOR_METATYPE_COMMENT);
- if (info.track_number != 0)
+ if (plugin->seek_request < 0)
+ return 1;
+ if (file_position - plugin->seek_request > 0)
+ {
+ plugin->seek_request = -1;
+ return 1;
+ }
+ if (plugin->seek_request - file_position < size)
+ offset = plugin->seek_request - file_position;
+
+ while (1)
+ {
+ switch (state->state)
{
- snprintf(track,
- sizeof(track), "%u", info.track_number);
- ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER);
+ case ID3_INVALID:
+ plugin->seek_request = -1;
+ return 1;
+ case ID3_SEEKING_TO_TAIL:
+ offset = file_size - 128 - file_position;
+ if (offset > size)
+ {
+ state->state = ID3_READING_TAIL;
+ plugin->seek_request = file_position + offset;
+ return 0;
+ }
+ else if (offset < 0)
+ {
+ state->state = ID3_INVALID;
+ break;
+ }
+ state->state = ID3_READING_TAIL;
+ break;
+ case ID3_READING_TAIL:
+ if (OK != get_id3 (data, offset, size - offset, &state->info))
+ return 1;
+ ADD (state->info.title, EXTRACTOR_METATYPE_TITLE);
+ ADD (state->info.artist, EXTRACTOR_METATYPE_ARTIST);
+ ADD (state->info.album, EXTRACTOR_METATYPE_ALBUM);
+ ADD (state->info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
+ ADD (state->info.genre, EXTRACTOR_METATYPE_GENRE);
+ ADD (state->info.comment, EXTRACTOR_METATYPE_COMMENT);
+ if (state->info.track_number != 0)
+ {
+ snprintf(track,
+ sizeof(track), "%u", state->info.track_number);
+ ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER);
+ }
+ state->state = ID3_INVALID;
}
-FINISH:
- if (info.title != NULL) free (info.title);
- if (info.year != NULL) free (info.year);
- if (info.album != NULL) free (info.album);
- if (info.artist != NULL) free (info.artist);
- if (info.comment != NULL) free (info.comment);
- return ret;
+ }
+ return 1;
}
/* end of id3_extractor.c */
Deleted: Extractor/src/plugins/id3v23_extractor.c
===================================================================
--- Extractor/src/plugins/id3v23_extractor.c 2012-03-27 12:46:29 UTC (rev
20782)
+++ Extractor/src/plugins/id3v23_extractor.c 2012-03-27 13:05:17 UTC (rev
20783)
@@ -1,420 +0,0 @@
-/*
- This file is part of libextractor.
- (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian
Grothoff
-
- libextractor is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 2, or (at your
- option) any later version.
-
- libextractor is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with libextractor; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA.
-
- */
-#define DEBUG_EXTRACT_ID3v23 0
-
-#include "platform.h"
-#include "extractor.h"
-#include <string.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#ifndef MINGW
-#include <sys/mman.h>
-#endif
-
-#include "convert.h"
-
-enum Id3v23Fmt
- {
- T, /* simple, 0-terminated string, prefixed by encoding */
- U, /* 0-terminated ASCII string, no encoding */
- UL, /* unsync'ed lyrics */
- SL, /* sync'ed lyrics */
- L, /* string with language prefix */
- I /* image */
- };
-
-typedef struct
-{
- const char *text;
- enum EXTRACTOR_MetaType type;
- enum Id3v23Fmt fmt;
-} Matches;
-
-static Matches tmap[] = {
- {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
- {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
- {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
- {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
- {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
- /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, */
- /* TDLY */
- {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
- {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
- {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
- /* TIME */
- {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
- {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
- {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
- /* TKEY */
- {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
- {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as
unit */
- {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
- {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
- {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
- {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
- {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
- {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
- {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
- {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
- {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
- {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
- {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
- {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
- {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
- {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
- /* TRDA */
- {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
- /* TRSO */
- {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
- {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
- /* TSSE */
- {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
- {"WCOM", EXTRACTOR_METATYPE_URL, U},
- {"WCOP", EXTRACTOR_METATYPE_URL, U},
- {"WOAF", EXTRACTOR_METATYPE_URL, U},
- {"WOAS", EXTRACTOR_METATYPE_URL, U},
- {"WORS", EXTRACTOR_METATYPE_URL, U},
- {"WPAY", EXTRACTOR_METATYPE_URL, U},
- {"WPUB", EXTRACTOR_METATYPE_URL, U},
- {"WXXX", EXTRACTOR_METATYPE_URL, T},
- {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
- /* ... */
- {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
- {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
- {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
- /* ... */
- {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
- /* ... */
- {"LINK", EXTRACTOR_METATYPE_URL, U},
- /* ... */
- {"USER", EXTRACTOR_METATYPE_LICENSE, T},
- /* ... */
- {NULL, 0, T}
-};
-
-
-/* mimetype = audio/mpeg */
-int
-EXTRACTOR_id3v23_extract (const unsigned char *data,
- size_t size,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls,
- const char *options)
-{
- int unsync;
- int extendedHdr;
- int experimental;
- uint32_t tsize;
- uint32_t pos;
- uint32_t ehdrSize;
- uint32_t padding;
- uint32_t csize;
- int i;
- uint16_t flags;
- char *mime;
- enum EXTRACTOR_MetaType type;
- size_t off;
- int obo;
-
- if ((size < 16) ||
- (data[0] != 0x49) ||
- (data[1] != 0x44) ||
- (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00))
- return 0;
- unsync = (data[5] & 0x80) > 0;
- if (unsync)
- return 0; /* not supported */
- extendedHdr = (data[5] & 0x40) > 0;
- experimental = (data[5] & 0x20) > 0;
- if (experimental)
- return 0;
- tsize = (((data[6] & 0x7F) << 21) |
- ((data[7] & 0x7F) << 14) |
- ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
- if (tsize + 10 > size)
- return 0;
- pos = 10;
- padding = 0;
- if (extendedHdr)
- {
- ehdrSize = (((data[10]) << 24) |
- ((data[11]) << 16) | ((data[12]) << 8) | ((data[12]) << 0));
-
- padding = (((data[15]) << 24) |
- ((data[16]) << 16) | ((data[17]) << 8) | ((data[18]) << 0));
- pos += 4 + ehdrSize;
- if (padding < tsize)
- tsize -= padding;
- else
- return 0;
- }
-
-
- while (pos < tsize)
- {
- if (pos + 10 > tsize)
- return 0;
- csize =
- (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
- data[pos + 7];
- if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
- (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
- break;
- flags = (data[pos + 8] << 8) + data[pos + 9];
- if (((flags & 0x80) > 0) /* compressed, not yet supported */ ||
- ((flags & 0x40) > 0) /* encrypted, not supported */ )
- {
- pos += 10 + csize;
- continue;
- }
- i = 0;
- while (tmap[i].text != NULL)
- {
- if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
- {
- char *word;
- if ((flags & 0x20) > 0)
- {
- /* "group" identifier, skip a byte */
- pos++;
- csize--;
- }
- switch (tmap[i].fmt)
- {
- case T:
- /* this byte describes the encoding
- try to convert strings to UTF-8
- if it fails, then forget it */
- switch (data[pos + 10])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 11],
- csize - 1,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 11],
- csize - 1,
"UCS-2");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 11],
- csize - 1,
"ISO-8859-1");
- break;
- }
- break;
- case U:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 10],
- csize, "ISO-8859-1");
- break;
- case UL:
- if (csize < 6)
- return 0; /* malformed */
- /* find end of description */
- off = 14;
- while ( (off < size) &&
- (off - pos < csize) &&
- (data[pos + off] == '\0') )
- off++;
- if ( (off >= csize) ||
- (data[pos+off] != '\0') )
- return 0; /* malformed */
- off++;
- switch (data[pos + 10])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + off],
- csize - off,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + off],
- csize - off,
"UCS-2");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + off],
- csize - off,
"ISO-8859-1");
- break;
- }
- break;
- case SL:
- if (csize < 7)
- return 0; /* malformed */
- /* find end of description */
- switch (data[pos + 10])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 16],
- csize - 6,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 16],
- csize - 6,
"UCS-2");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 16],
- csize - 6,
"ISO-8859-1");
- break;
- }
- break;
- case L:
- if (csize < 5)
- return 0; /* malformed */
- /* find end of description */
- obo = data[pos + 14] == '\0' ? 1 : 0; /* someone put a \0 in
front of comments... */
- if (csize < 6)
- obo = 0;
- switch (data[pos + 10])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 14 + obo],
- csize - 4 - obo,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 14 + obo],
- csize - 4 - obo,
"UCS-2");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 14 + obo],
- csize - 4 - obo,
"ISO-8859-1");
- break;
- }
- break;
- case I:
- if (csize < 2)
- return 0; /* malformed */
- /* find end of mime type */
- off = 11;
- while ( (off < size) &&
- (off - pos < csize) &&
- (data[pos + off] == '\0') )
- off++;
- if ( (off >= csize) ||
- (data[pos+off] != '\0') )
- return 0; /* malformed */
- off++;
- mime = strdup ((const char*) &data[pos + 11]);
-
- switch (data[pos+off])
- {
- case 0x03:
- case 0x04:
- type = EXTRACTOR_METATYPE_COVER_PICTURE;
- break;
- case 0x07:
- case 0x08:
- case 0x09:
- case 0x0A:
- case 0x0B:
- case 0x0C:
- type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
- break;
- case 0x0D:
- case 0x0E:
- case 0x0F:
- type = EXTRACTOR_METATYPE_EVENT_PICTURE;
- break;
- case 0x14:
- type = EXTRACTOR_METATYPE_LOGO;
- type = EXTRACTOR_METATYPE_LOGO;
- break;
- default:
- type = EXTRACTOR_METATYPE_PICTURE;
- break;
- }
- off++;
-
- /* find end of description */
- while ( (off < size) &&
- (off - pos < csize) &&
- (data[pos + off] == '\0') )
- off++;
- if ( (off >= csize) ||
- (data[pos+off] != '\0') )
- {
- if (mime != NULL)
- free (mime);
- return 0; /* malformed */
- }
- off++;
- if ( (mime != NULL) &&
- (0 == strcasecmp ("-->",
- mime)) )
- {
- /* not supported */
- }
- else
- {
- if (0 != proc (proc_cls,
- "id3v23",
- type,
- EXTRACTOR_METAFORMAT_BINARY,
- mime,
- (const char*) &data[pos + off],
- csize + 6 - off))
- {
- if (mime != NULL)
- free (mime);
- return 1;
- }
- }
- if (mime != NULL)
- free (mime);
- word = NULL;
- break;
- default:
- return 0;
- }
- if ((word != NULL) && (strlen (word) > 0))
- {
- if (0 != proc (proc_cls,
- "id3v23",
- tmap[i].type,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- word,
- strlen(word)+1))
- {
- free (word);
- return 1;
- }
- }
- if (word != NULL)
- free (word);
- break;
- }
- i++;
- }
- pos += 10 + csize;
- }
- return 0;
-}
-
-/* end of id3v23_extractor.c */
Deleted: Extractor/src/plugins/id3v24_extractor.c
===================================================================
--- Extractor/src/plugins/id3v24_extractor.c 2012-03-27 12:46:29 UTC (rev
20782)
+++ Extractor/src/plugins/id3v24_extractor.c 2012-03-27 13:05:17 UTC (rev
20783)
@@ -1,455 +0,0 @@
-/*
- This file is part of libextractor.
- (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian
Grothoff
-
- libextractor is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 2, or (at your
- option) any later version.
-
- libextractor is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with libextractor; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA.
-
- */
-#define DEBUG_EXTRACT_ID3v24 0
-
-#include "platform.h"
-#include "extractor.h"
-#include <string.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#ifndef MINGW
-#include <sys/mman.h>
-#endif
-
-#include "convert.h"
-
-enum Id3v24Fmt
- {
- T, /* simple, 0-terminated string, prefixed by encoding */
- U, /* 0-terminated ASCII string, no encoding */
- UL, /* unsync'ed lyrics */
- SL, /* sync'ed lyrics */
- L, /* string with language prefix */
- I /* image */
- };
-
-typedef struct
-{
- const char *text;
- enum EXTRACTOR_MetaType type;
- enum Id3v24Fmt fmt;
-} Matches;
-
-static Matches tmap[] = {
- {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
- {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
- {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
- {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
- {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
- /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, deprecated in 24 */
- /* TDLY */
- {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
- {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
- {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
- /* TIME, deprecated in 24 */
- {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
- {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
- {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
- /* TKEY */
- {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
- {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as
unit */
- {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
- {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
- {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
- {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
- {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
- /* {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, deprecated in 24 */
- {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
- {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
- {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
- {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
- {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
- {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
- {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
- {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
- /* TRDA, deprecated in 24 */
- {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
- /* TRSO */
- /* {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, deprecated in 24 */
- {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
- /* TSSE */
- /* {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, deprecated in 24 */
- {"WCOM", EXTRACTOR_METATYPE_URL, U},
- {"WCOP", EXTRACTOR_METATYPE_URL, U},
- {"WOAF", EXTRACTOR_METATYPE_URL, U},
- {"WOAS", EXTRACTOR_METATYPE_URL, U},
- {"WORS", EXTRACTOR_METATYPE_URL, U},
- {"WPAY", EXTRACTOR_METATYPE_URL, U},
- {"WPUB", EXTRACTOR_METATYPE_URL, U},
- {"WXXX", EXTRACTOR_METATYPE_URL, T},
- /* {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, deprecated in 24 */
- /* ... */
- {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
- {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
- {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
- /* ... */
- {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
- /* ... */
- {"LINK", EXTRACTOR_METATYPE_URL, U},
- /* ... */
- {"USER", EXTRACTOR_METATYPE_LICENSE, T},
- /* ... */
- /* new frames in 24 */
- /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
- {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
- /* TDRC, TDRL, TDTG */
- {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
- {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
- {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
- {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
- {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
- {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
- {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
- {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
- {NULL, 0, T}
-};
-
-
-/* mimetype = audio/mpeg */
-int
-EXTRACTOR_id3v24_extract (const unsigned char *data,
- size_t size,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls,
- const char *options)
-{
- int unsync;
- int extendedHdr;
- int experimental;
- uint32_t tsize;
- uint32_t pos;
- uint32_t ehdrSize;
- uint32_t csize;
- int i;
- uint16_t flags;
- char *mime;
- enum EXTRACTOR_MetaType type;
- size_t off;
-
- if ((size < 16) ||
- (data[0] != 0x49) ||
- (data[1] != 0x44) ||
- (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00))
- return 0;
- unsync = (data[5] & 0x80) > 0;
- if (unsync)
- return 0; /* not supported */
- extendedHdr = (data[5] & 0x40) > 0;
- experimental = (data[5] & 0x20) > 0;
- if (experimental)
- return 0;
- /* footer = (data[5] & 0x10) > 0; */
- tsize = (((data[6] & 0x7F) << 21) |
- ((data[7] & 0x7F) << 14) |
- ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
- if (tsize + 10 > size)
- return 0;
- pos = 10;
- if (extendedHdr)
- {
- ehdrSize = (((data[10] & 0x7F) << 21) |
- ((data[11] & 0x7F) << 14) |
- ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0));
- pos += 4 + ehdrSize;
- if (ehdrSize > tsize)
- return 0;
- }
- while (pos < tsize)
- {
- if (pos + 10 > tsize)
- return 0;
- csize =
- (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
- data[pos + 7];
- if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
- (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
- break;
- flags = (data[pos + 8] << 8) + data[pos + 9];
- if (((flags & 0x08) > 0) /* compressed, not yet supported */ ||
- ((flags & 0x04) > 0) /* encrypted, not supported */ ||
- ((flags & 0x02) > 0) /* unsynchronized, not supported */ )
- {
- pos += 10 + csize;
- continue;
- }
- i = 0;
- while (tmap[i].text != NULL)
- {
- if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
- {
- char *word;
- if ((flags & 0x40) > 0)
- {
- /* "group" identifier, skip a byte */
- pos++;
- csize--;
- }
-
- switch (tmap[i].fmt)
- {
- case T:
- /* this byte describes the encoding
- try to convert strings to UTF-8
- if it fails, then forget it */
- switch (data[pos + 10])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 11],
- csize - 1,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 11],
- csize - 1,
"UTF-16");
- break;
- case 0x02:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 11],
- csize - 1,
"UTF-16BE");
- break;
- case 0x03:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 11],
- csize - 1,
"UTF-8");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 11],
- csize - 1,
"ISO-8859-1");
- break;
- }
- break;
- case U:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 10],
- csize, "ISO-8859-1");
- break;
- case UL:
- if (csize < 6)
- return 0; /* malformed */
- /* find end of description */
- off = 14;
- while ( (off < size) &&
- (off - pos < csize) &&
- (data[pos + off] == '\0') )
- off++;
- if ( (off >= csize) ||
- (data[pos+off] != '\0') )
- return 0; /* malformed */
- off++;
- switch (data[pos + 10])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + off],
- csize - off,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + off],
- csize - off,
"UTF-16");
- break;
- case 0x02:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + off],
- csize - off,
"UTF-16BE");
- break;
- case 0x03:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + off],
- csize - off,
"UTF-8");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + off],
- csize - off,
"ISO-8859-1");
- break;
- }
- break;
- case SL:
- if (csize < 7)
- return 0; /* malformed */
- /* find end of description */
- switch (data[pos + 10])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 16],
- csize - 6,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 16],
- csize - 6,
"UTF-16");
- break;
- case 0x02:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 16],
- csize - 6,
"UTF-16BE");
- break;
- case 0x03:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 16],
- csize - 6,
"UTF-8");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 16],
- csize - 6,
"ISO-8859-1");
- break;
- }
- break;
- case L:
- if (csize < 5)
- return 0; /* malformed */
- /* find end of description */
- switch (data[pos + 10])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 14],
- csize - 4,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 14],
- csize - 4,
"UTF-16");
- break;
- case 0x02:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 14],
- csize - 4,
"UTF-16BE");
- break;
- case 0x03:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 14],
- csize - 4,
"UTF-8");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 14],
- csize - 4,
"ISO-8859-1");
- break;
- }
- break;
- case I:
- if (csize < 2)
- return 0; /* malformed */
- /* find end of mime type */
- off = 11;
- while ( (off < size) &&
- (off - pos < csize) &&
- (data[pos + off] == '\0') )
- off++;
- if ( (off >= csize) ||
- (data[pos+off] != '\0') )
- return 0; /* malformed */
- off++;
- mime = strdup ((const char*) &data[pos + 11]);
-
- switch (data[pos+off])
- {
- case 0x03:
- case 0x04:
- type = EXTRACTOR_METATYPE_COVER_PICTURE;
- break;
- case 0x07:
- case 0x08:
- case 0x09:
- case 0x0A:
- case 0x0B:
- case 0x0C:
- type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
- break;
- case 0x0D:
- case 0x0E:
- case 0x0F:
- type = EXTRACTOR_METATYPE_EVENT_PICTURE;
- break;
- case 0x14:
- type = EXTRACTOR_METATYPE_LOGO;
- type = EXTRACTOR_METATYPE_LOGO;
- break;
- default:
- type = EXTRACTOR_METATYPE_PICTURE;
- break;
- }
- off++;
-
- /* find end of description */
- while ( (off < size) &&
- (off - pos < csize) &&
- (data[pos + off] == '\0') )
- off++;
- if ( (off >= csize) ||
- (data[pos+off] != '\0') )
- {
- if (mime != NULL)
- free (mime);
- return 0; /* malformed */
- }
- off++;
- if ( (mime != NULL) &&
- (0 == strcasecmp ("-->",
- mime)) )
- {
- /* not supported */
- }
- else
- {
- if (0 != proc (proc_cls,
- "id3v24",
- type,
- EXTRACTOR_METAFORMAT_BINARY,
- mime,
- (const char*) &data[pos + off],
- csize + 6 - off))
- {
- if (mime != NULL)
- free (mime);
- return 1;
- }
- }
- if (mime != NULL)
- free (mime);
- word = NULL;
- break;
- default:
- return 0;
- }
- if ((word != NULL) && (strlen (word) > 0))
- {
- if (0 != proc (proc_cls,
- "id3v24",
- tmap[i].type,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- word,
- strlen(word)+1))
- {
- free (word);
- return 1;
- }
- }
- if (word != NULL)
- free (word);
- break;
- }
- i++;
- }
- pos += 10 + csize;
- }
- return 0;
-}
-
-/* end of id3v24_extractor.c */
Modified: Extractor/src/plugins/id3v2_extractor.c
===================================================================
--- Extractor/src/plugins/id3v2_extractor.c 2012-03-27 12:46:29 UTC (rev
20782)
+++ Extractor/src/plugins/id3v2_extractor.c 2012-03-27 13:05:17 UTC (rev
20783)
@@ -26,6 +26,8 @@
#endif
#include "convert.h"
+#include "extractor_plugins.h"
+
#define DEBUG_EXTRACT_ID3v2 0
enum Id3v2Fmt
@@ -47,314 +49,723 @@
static Matches tmap[] = {
/* skipping UFI */
- {"TT1", EXTRACTOR_METATYPE_SECTION, T},
- {"TT2", EXTRACTOR_METATYPE_TITLE, T},
- {"TT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
- {"TP1", EXTRACTOR_METATYPE_ARTIST, T},
- {"TP2", EXTRACTOR_METATYPE_PERFORMER, T},
- {"TP3", EXTRACTOR_METATYPE_CONDUCTOR, T},
- {"TP4", EXTRACTOR_METATYPE_INTERPRETATION, T},
- {"TCM", EXTRACTOR_METATYPE_COMPOSER, T},
- {"TXT", EXTRACTOR_METATYPE_WRITER, T},
- {"TLA", EXTRACTOR_METATYPE_LANGUAGE, T},
- {"TCO", EXTRACTOR_METATYPE_GENRE, T},
- {"TAL", EXTRACTOR_METATYPE_ALBUM, T},
- {"TPA", EXTRACTOR_METATYPE_DISC_NUMBER, T},
- {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
- {"TRC", EXTRACTOR_METATYPE_ISRC, T},
- {"TYE", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
+ {"TT1 ", EXTRACTOR_METATYPE_SECTION, T},
+ {"TT2 ", EXTRACTOR_METATYPE_TITLE, T},
+ {"TT3 ", EXTRACTOR_METATYPE_SONG_VERSION, T},
+ {"TP1 ", EXTRACTOR_METATYPE_ARTIST, T},
+ {"TP2 ", EXTRACTOR_METATYPE_PERFORMER, T},
+ {"TP3 ", EXTRACTOR_METATYPE_CONDUCTOR, T},
+ {"TP4 ", EXTRACTOR_METATYPE_INTERPRETATION, T},
+ {"TCM ", EXTRACTOR_METATYPE_COMPOSER, T},
+ {"TXT ", EXTRACTOR_METATYPE_WRITER, T},
+ {"TLA ", EXTRACTOR_METATYPE_LANGUAGE, T},
+ {"TCO ", EXTRACTOR_METATYPE_GENRE, T},
+ {"TAL ", EXTRACTOR_METATYPE_ALBUM, T},
+ {"TPA ", EXTRACTOR_METATYPE_DISC_NUMBER, T},
+ {"TRK ", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
+ {"TRC ", EXTRACTOR_METATYPE_ISRC, T},
+ {"TYE ", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
/*
FIXME: these two and TYE should be combined into
the actual publication date (if TRD is missing)
- {"TDA", EXTRACTOR_METATYPE_PUBLICATION_DATE},
- {"TIM", EXTRACTOR_METATYPE_PUBLICATION_DATE},
+ {"TDA ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
+ {"TIM ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
*/
- {"TRD", EXTRACTOR_METATYPE_CREATION_TIME, T},
- {"TMT", EXTRACTOR_METATYPE_SOURCE, T},
- {"TFT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
- {"TBP", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
- {"TCR", EXTRACTOR_METATYPE_COPYRIGHT, T},
- {"TPB", EXTRACTOR_METATYPE_PUBLISHER, T},
- {"TEN", EXTRACTOR_METATYPE_ENCODED_BY, T},
- {"TSS", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T},
- {"TOF", EXTRACTOR_METATYPE_FILENAME, T},
- {"TLE", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as
unit */
- {"TSI", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
+ {"TRD ", EXTRACTOR_METATYPE_CREATION_TIME, T},
+ {"TMT ", EXTRACTOR_METATYPE_SOURCE, T},
+ {"TFT ", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
+ {"TBP ", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
+ {"TCR ", EXTRACTOR_METATYPE_COPYRIGHT, T},
+ {"TPB ", EXTRACTOR_METATYPE_PUBLISHER, T},
+ {"TEN ", EXTRACTOR_METATYPE_ENCODED_BY, T},
+ {"TSS ", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T},
+ {"TOF ", EXTRACTOR_METATYPE_FILENAME, T},
+ {"TLE ", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as
unit */
+ {"TSI ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
/* skipping TDY, TKE */
- {"TOT", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
- {"TOA", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
- {"TOL", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
- {"TOR", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
+ {"TOT ", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
+ {"TOA ", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
+ {"TOL ", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
+ {"TOR ", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
/* skipping TXX */
- {"WAF", EXTRACTOR_METATYPE_URL, U},
- {"WAR", EXTRACTOR_METATYPE_URL, U},
- {"WAS", EXTRACTOR_METATYPE_URL, U},
- {"WCM", EXTRACTOR_METATYPE_URL, U},
- {"WCP", EXTRACTOR_METATYPE_RIGHTS, U},
- {"WCB", EXTRACTOR_METATYPE_URL, U},
+ {"WAF ", EXTRACTOR_METATYPE_URL, U},
+ {"WAR ", EXTRACTOR_METATYPE_URL, U},
+ {"WAS ", EXTRACTOR_METATYPE_URL, U},
+ {"WCM ", EXTRACTOR_METATYPE_URL, U},
+ {"WCP ", EXTRACTOR_METATYPE_RIGHTS, U},
+ {"WCB ", EXTRACTOR_METATYPE_URL, U},
/* skipping WXX */
- {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
+ {"IPL ", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
/* skipping MCI */
/* skipping ETC */
/* skipping MLL */
/* skipping STC */
- {"ULT", EXTRACTOR_METATYPE_LYRICS, UL},
- {"SLT", EXTRACTOR_METATYPE_LYRICS, SL},
- {"COM", EXTRACTOR_METATYPE_COMMENT, L},
+ {"ULT ", EXTRACTOR_METATYPE_LYRICS, UL},
+ {"SLT ", EXTRACTOR_METATYPE_LYRICS, SL},
+ {"COM ", EXTRACTOR_METATYPE_COMMENT, L},
/* skipping RVA */
/* skipping EQU */
/* skipping REV */
- {"PIC", EXTRACTOR_METATYPE_PICTURE, I},
+ {"PIC ", EXTRACTOR_METATYPE_PICTURE, I},
/* skipping GEN */
- /* {"CNT", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */
- /* {"POP", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */
+ /* {"CNT ", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */
+ /* {"POP ", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */
/* skipping BUF */
/* skipping CRM */
/* skipping CRA */
- /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */
+ /* {"LNK ", EXTRACTOR_METATYPE_URL, XXX}, */
+
+
+ {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
+ {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
+ {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
+ {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
+ {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
+ {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, /* idv23 only */
+ /* TDLY */
+ {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
+ {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
+ {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
+ /* TIME, idv23 only */
+ {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
+ {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
+ {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
+ /* TKEY */
+ {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
+ {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as
unit */
+ {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
+ {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
+ {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
+ {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
+ {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
+ {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, /* idv23 only */
+ {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
+ {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
+ {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
+ {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
+ {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
+ {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
+ {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
+ {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
+ /* TRDA, idv23 only */
+ {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
+ /* TRSO */
+ {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, /* idv23 only */
+ {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
+ /* TSSE */
+ {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, /* idv23 only */
+ {"WCOM", EXTRACTOR_METATYPE_URL, U},
+ {"WCOP", EXTRACTOR_METATYPE_URL, U},
+ {"WOAF", EXTRACTOR_METATYPE_URL, U},
+ {"WOAS", EXTRACTOR_METATYPE_URL, U},
+ {"WORS", EXTRACTOR_METATYPE_URL, U},
+ {"WPAY", EXTRACTOR_METATYPE_URL, U},
+ {"WPUB", EXTRACTOR_METATYPE_URL, U},
+ {"WXXX", EXTRACTOR_METATYPE_URL, T},
+ {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, /* idv23 only */
+ /* ... */
+ {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
+ {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
+ {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
+ /* ... */
+ {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
+ /* ... */
+ {"LINK", EXTRACTOR_METATYPE_URL, U},
+ /* ... */
+ {"USER", EXTRACTOR_METATYPE_LICENSE, T},
+ /* ... */
+
+ /* new frames in id3v24 */
+ /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
+ {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
+ /* TDRC, TDRL, TDTG */
+ {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
+ {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
+ {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
+ {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
+ {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
+ {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
+ {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
+ {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
+
{NULL, 0, T},
};
+struct id3v2_state
+{
+ int state;
+ unsigned int tsize;
+ size_t csize;
+ char id[4];
+ int32_t ti;
+ char ver;
+ char extended_header;
+ uint16_t frame_flags;
+ char *mime;
+};
-/* mimetype = audio/mpeg */
-int
-EXTRACTOR_id3v2_extract (const unsigned char *data,
- size_t size,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls,
- const char *options)
+enum ID3v2State
{
- unsigned int tsize;
- unsigned int pos;
+ ID3V2_INVALID = -1,
+ ID3V2_READING_HEADER = 0,
+ ID3V2_READING_FRAME_HEADER,
+ ID3V23_READING_EXTENDED_HEADER,
+ ID3V24_READING_EXTENDED_HEADER,
+ ID3V2_READING_FRAME
+};
+
+void
+EXTRACTOR_id3v2_init_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+ struct id3v2_state *state;
+ state = plugin->state = malloc (sizeof (struct id3v2_state));
+ if (state == NULL)
+ return;
+ memset (state, 0, sizeof (struct id3v2_state));
+ state->state = ID3V2_READING_HEADER;
+ state->ti = -1;
+ state->mime = NULL;
+}
+
+void
+EXTRACTOR_id3v2_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+ struct id3v2_state *state = plugin->state;
+ if (state != NULL)
+ {
+ if (state->mime != NULL)
+ free (state->mime);
+ free (state);
+ }
+ plugin->state = NULL;
+}
+
+static int
+find_type (const char *id, size_t len)
+{
+ int i;
+ for (i = 0; tmap[i].text != NULL; i++)
+ if (0 == strncmp (tmap[i].text, id, len))
+ return i;
+ return -1;
+}
+
+int
+EXTRACTOR_id3v2_extract_method (struct EXTRACTOR_PluginList *plugin,
+ EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+ int64_t file_position;
+ int64_t file_size;
+ int64_t offset = 0;
+ int64_t size;
+ struct id3v2_state *state;
+ unsigned char *data;
+ char *word = NULL;
unsigned int off;
enum EXTRACTOR_MetaType type;
- const char *mime;
+ unsigned char picture_type;
- if ((size < 16) ||
- (data[0] != 0x49) ||
- (data[1] != 0x44) ||
- (data[2] != 0x33) || (data[3] != 0x02) || (data[4] != 0x00))
- return 0;
- /* unsync: (data[5] & 0x80) > 0; */
- tsize = (((data[6] & 0x7F) << 21) |
- ((data[7] & 0x7F) << 14) |
- ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
+ if (plugin == NULL || plugin->state == NULL)
+ return 1;
- if (tsize + 10 > size)
- return 0;
- pos = 10;
- while (pos < tsize)
+ state = plugin->state;
+ file_position = plugin->position;
+ file_size = plugin->fsize;
+ size = plugin->map_size;
+ data = plugin->shm_ptr;
+
+ if (plugin->seek_request < 0)
+ return 1;
+ if (file_position - plugin->seek_request > 0)
+ {
+ plugin->seek_request = -1;
+ return 1;
+ }
+ if (plugin->seek_request - file_position < size)
+ offset = plugin->seek_request - file_position;
+
+ while (1)
+ {
+ switch (state->state)
{
- size_t csize;
- int i;
+ case ID3V2_INVALID:
+ plugin->seek_request = -1;
+ return 1;
+ case ID3V2_READING_HEADER:
+ /* TODO: support id3v24 tags at the end of file. Here's a quote from id3
faq:
+ * Q: Where is an ID3v2 tag located in an MP3 file?
+ * A: It is most likely located at the beginning of the file. Look for
the
+ * marker "ID3" in the first 3 bytes of the file. If it's not there,
it
+ * could be at the end of the file (if the tag is ID3v2.4). Look for
the
+ * marker "3DI" 10 bytes from the end of the file, or 10 bytes before
the
+ * beginning of an ID3v1 tag. Finally it is possible to embed ID3v2
tags
+ * in the actual MPEG stream, on an MPEG frame boundry. Almost nobody
does
+ * this.
+ * Parsing of such tags will not be completely correct, because we can't
+ * seek backwards. We will have to seek to file_size - chunk_size instead
+ * (by the way, chunk size is theoretically unknown, LE is free to use
any chunk
+ * size, even though plugins often make assumptions about chunk size
being large
+ * enough to make one atomic read without seeking, if offset == 0) and
search
+ * for id3v1 at -128 offset, then look if there's a 3DI marker 10 bytes
before
+ * it (or 10 bytes before the end of file, if id3v1 is not there; not
sure
+ * about APETAGs; we should probably just scan byte-by-byte from the end
of file,
+ * until we hit 3DI, or reach the offset == 0), and use it set offset to
the
+ * start of ID3v24 header, adjust the following file_position check and
data
+ * indices (use offset), and otherwise proceed as normal (maybe file
size checks
+ * along the way will have to be adjusted by -1, or made ">" instead of
">=";
+ * these problems do not arise for tags at the beginning of the file,
since
+ * audio itself is usually at least 1-byte long; when the tag is at the
end of
+ * file, these checks will have to be 100% correct).
+ * If there are two tags (at the beginning and at the end of the file),
+ * a SEEK in the one at the beginning of the file can be used to seek to
the
+ * one at the end.
+ */
+ /* TODO: merge id3v1 and id3v2 parsers. There's an "update" flag in
id3v2 that
+ * tells the parser to augument id3v1 values with the values from id3v2
(if this
+ * flag is not set, id3v2 parser must discard id3v1 data).
+ * At the moment id3v1 and id3v2 are parsed separately, and update flag
is ignored.
+ */
+ if (file_position != 0 || size < 10 || (data[0] != 0x49) || (data[1] !=
0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) &&
(data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are
backward-compatible*/)
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ state->ver = data[3];
+ if (state->ver == 0x02)
+ {
+ state->extended_header = 0;
+ }
+ else if ((state->ver == 0x03) || (state->ver == 0x04))
+ {
+ if ((data[5] & 0x80) > 0)
+ {
+ /* unsync is not supported in id3v23 or id3v24*/
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ state->extended_header = (data[5] & 0x40) > 0;
+ if ((data[5] & 0x20) > 0)
+ {
+ /* experimental is not supported in id3v23 or id3v24*/
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ }
+ state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) |
((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
+ if (state->tsize + 10 > file_size)
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ offset = 10;
+ if (state->ver == 0x03 && state->extended_header)
+ state->state = ID3V23_READING_EXTENDED_HEADER;
+ else if (state->ver == 0x04 && state->extended_header)
+ state->state = ID3V24_READING_EXTENDED_HEADER;
+ else
+ state->state = ID3V2_READING_FRAME_HEADER;
+ break;
+ case ID3V23_READING_EXTENDED_HEADER:
+ if (offset + 9 >= size)
+ {
+ if (offset == 0)
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ plugin->seek_request = file_position + offset;
+ return 0;
+ }
+ if (state->ver == 0x03 && state->extended_header)
+ {
+ uint32_t padding, extended_header_size;
+ extended_header_size = (((data[offset]) << 24) | ((data[offset + 1])
<< 16) | ((data[offset + 2]) << 8) | ((data[offset + 3]) << 0));
+ padding = (((data[offset + 6]) << 24) | ((data[offset + 7]) << 16) |
((data[offset + 8]) << 8) | ((data[offset + 9]) << 0));
+ if (data[offset + 4] == 0 && data[offset + 5] == 0)
+ /* Skip the CRC32 byte after extended header */
+ offset += 1;
+ offset += 4 + extended_header_size;
+ if (padding < state->tsize)
+ state->tsize -= padding;
+ else
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ }
+ break;
+ case ID3V24_READING_EXTENDED_HEADER:
+ if (offset + 6 >= size)
+ {
+ if (offset == 0)
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ plugin->seek_request = file_position + offset;
+ return 0;
+ }
+ if ( (state->ver == 0x04) && (state->extended_header))
+ {
+ uint32_t extended_header_size;
- if (pos + 7 > tsize)
+ extended_header_size = (((data[offset]) << 24) |
+ ((data[offset + 1]) << 16) |
+ ((data[offset + 2]) << 8) |
+ ((data[offset + 3]) << 0));
+ offset += 4 + extended_header_size;
+ }
+ break;
+ case ID3V2_READING_FRAME_HEADER:
+ if (file_position + offset > state->tsize ||
+ ((state->ver == 0x02) && file_position + offset + 6 >= state->tsize)
||
+ (((state->ver == 0x03) || (state->ver == 0x04))&& file_position +
offset + 10 >= state->tsize))
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ if (((state->ver == 0x02) && (offset + 6 >= size)) ||
+ (((state->ver == 0x03) || (state->ver == 0x04)) && (offset + 10 >=
size)))
+ {
+ plugin->seek_request = file_position + offset;
return 0;
- csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5];
- if ((pos + 7 + csize > tsize) || (csize > tsize) || (csize == 0))
+ }
+ if (state->ver == 0x02)
+ {
+ memcpy (state->id, &data[offset], 3);
+ state->csize = (data[offset + 3] << 16) + (data[offset + 4] << 8) +
data[offset + 5];
+ if ((file_position + offset + 6 + state->csize > file_size) ||
(state->csize > file_size) || (state->csize == 0))
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ offset += 6;
+ state->frame_flags = 0;
+ }
+ else if ((state->ver == 0x03) || (state->ver == 0x04))
+ {
+ memcpy (state->id, &data[offset], 4);
+ if (state->ver == 0x03)
+ state->csize = (data[offset + 4] << 24) + (data[offset + 5] << 16) +
(data[offset + 6] << 8) + data[offset + 7];
+ else if (state->ver == 0x04)
+ state->csize = ((data[offset + 4] & 0x7F) << 21) | ((data[offset +
5] & 0x7F) << 14) | ((data[offset + 6] & 0x7F) << 07) | ((data[offset + 7] &
0x7F) << 00);
+ if ((file_position + offset + 10 + state->csize > file_size) ||
(state->csize > file_size) || (state->csize == 0))
+ {
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ state->frame_flags = (data[offset + 8] << 8) + data[offset + 9];
+ if (state->ver == 0x03)
+ {
+ if (((state->frame_flags & 0x80) > 0) /* compressed, not yet
supported */ ||
+ ((state->frame_flags & 0x40) > 0) /* encrypted, not supported */)
+ {
+ /* Skip to next frame header */
+ offset += 10 + state->csize;
+ break;
+ }
+ }
+ else if (state->ver == 0x04)
+ {
+ if (((state->frame_flags & 0x08) > 0) /* compressed, not yet
supported */ ||
+ ((state->frame_flags & 0x04) > 0) /* encrypted, not supported */
||
+ ((state->frame_flags & 0x02) > 0) /* unsynchronization, not
supported */)
+ {
+ /* Skip to next frame header */
+ offset += 10 + state->csize;
+ break;
+ }
+ if ((state->frame_flags & 0x01) > 0)
+ {
+ /* Skip data length indicator */
+ state->csize -= 4;
+ offset += 4;
+ }
+ }
+ offset += 10;
+ }
+
+ state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ?
3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0));
+ if (state->ti == -1)
+ {
+ offset += state->csize;
break;
- i = 0;
- while (tmap[i].text != NULL)
+ }
+ state->state = ID3V2_READING_FRAME;
+ break;
+ case ID3V2_READING_FRAME:
+ if (offset == 0 && state->csize > size)
+ {
+ /* frame size is larger than the size of one data chunk we get at a
time */
+ offset += state->csize;
+ state->state = ID3V2_READING_FRAME_HEADER;
+ break;
+ }
+ if (offset + state->csize > size)
+ {
+ plugin->seek_request = file_position + offset;
+ return 0;
+ }
+ word = NULL;
+ if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) ||
+ ((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0)))
+ {
+ /* "group" identifier, skip a byte */
+ offset++;
+ state->csize--;
+ }
+ switch (tmap[state->ti].fmt)
+ {
+ case T:
+ if (data[offset] == 0x00)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 1],
+ state->csize - 1, "ISO-8859-1");
+ else if (data[offset] == 0x01)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 1],
+ state->csize - 1, "UCS-2");
+ else if ((state->ver == 0x04) && (data[offset] == 0x02))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 1],
+ state->csize - 1, "UTF-16BE");
+ else if ((state->ver == 0x04) && (data[offset] == 0x03))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 1],
+ state->csize - 1, "UTF-8");
+ else
+ /* bad encoding byte, try to convert from iso-8859-1 */
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 1],
+ state->csize - 1, "ISO-8859-1");
+ break;
+ case U:
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset],
+ state->csize, "ISO-8859-1");
+ break;
+ case UL:
+ if (state->csize < 6)
{
- if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 3))
- {
- char *word;
- /* this byte describes the encoding
- try to convert strings to UTF-8
- if it fails, then forget it */
- switch (tmap[i].fmt)
- {
- case T:
- switch (data[pos + 6])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 7],
- csize - 1,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 7],
- csize - 1,
"UCS-2");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 7],
- csize - 1,
"ISO-8859-1");
- break;
- }
- break;
- case U:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 6],
- csize, "ISO-8859-1");
- break;
- case UL:
- if (csize < 6)
- return 0; /* malformed */
- /* find end of description */
- off = 10;
- while ( (off < size) &&
- (off - pos < csize) &&
- (data[pos + off] == '\0') )
- off++;
- if ( (off >= csize) ||
- (data[pos+off] != '\0') )
- return 0; /* malformed */
- off++;
- switch (data[pos + 6])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + off],
- csize - off,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + off],
- csize - off,
"UCS-2");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + off],
- csize - off,
"ISO-8859-1");
- break;
- }
- break;
- case SL:
- if (csize < 7)
- return 0; /* malformed */
- /* find end of description */
- switch (data[pos + 6])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 12],
- csize - 6,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 12],
- csize - 6,
"UCS-2");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 12],
- csize - 6,
"ISO-8859-1");
- break;
- }
- break;
- case L:
- if (csize < 5)
- return 0; /* malformed */
- /* find end of description */
- switch (data[pos + 6])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 10],
- csize - 4,
"ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 10],
- csize - 4,
"UCS-2");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *)
&data[pos + 10],
- csize - 4,
"ISO-8859-1");
- break;
- }
- break;
- case I:
- if (csize < 6)
- return 0; /* malformed */
- /* find end of description */
- off = 12;
- while ( (off < size) &&
- (off - pos < csize) &&
- (data[pos + off] == '\0') )
- off++;
- if ( (off >= csize) ||
- (data[pos+off] != '\0') )
- return 0; /* malformed */
- off++;
- switch (data[pos+11])
- {
- case 0x03:
- case 0x04:
- type = EXTRACTOR_METATYPE_COVER_PICTURE;
- break;
- case 0x07:
- case 0x08:
- case 0x09:
- case 0x0A:
- case 0x0B:
- case 0x0C:
- type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
- break;
- case 0x0D:
- case 0x0E:
- case 0x0F:
- type = EXTRACTOR_METATYPE_EVENT_PICTURE;
- break;
- case 0x14:
- type = EXTRACTOR_METATYPE_LOGO;
- type = EXTRACTOR_METATYPE_LOGO;
- break;
- default:
- type = EXTRACTOR_METATYPE_PICTURE;
- break;
- }
- if (0 == strncasecmp ("PNG",
- (const char*) &data[pos + 7], 3))
- mime = "image/png";
- else if (0 == strncasecmp ("JPG",
- (const char*) &data[pos + 7], 3))
- mime = "image/jpeg";
- else
- mime = NULL;
- if (0 == strncasecmp ("-->",
- (const char*) &data[pos + 7], 3))
- {
- /* not supported */
- }
- else
- {
- if (0 != proc (proc_cls,
- "id3v2",
- type,
- EXTRACTOR_METAFORMAT_BINARY,
- mime,
- (const char*) &data[pos + off],
- csize + 6 - off))
- return 1;
- }
- word = NULL;
- break;
- default:
- return 0;
- }
- if ((word != NULL) && (strlen (word) > 0))
- {
- if (0 != proc (proc_cls,
- "id3v2",
- tmap[i].type,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- word,
- strlen(word)+1))
- {
- free (word);
- return 1;
- }
- }
- if (word != NULL)
- free (word);
- break;
- }
- i++;
+ /* malformed */
+ state->state = ID3V2_INVALID;
+ break;
}
- pos += 6 + csize;
+ /* find end of description */
+ off = 4;
+ while ((off < size) && (off < offset + state->csize) && (data[offset +
off] != '\0'))
+ off++;
+ if ((off >= state->csize) || (data[offset + off] != '\0'))
+ {
+ /* malformed */
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ off++;
+ if (data[offset] == 0x00)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ state->csize - off, "ISO-8859-1");
+ else if (data[offset] == 0x01)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ state->csize - off, "UCS-2");
+ else if ((state->ver == 0x04) && (data[offset] == 0x02))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ state->csize - off, "UTF-16BE");
+ else if ((state->ver == 0x04) && (data[offset] == 0x03))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ state->csize - off, "UTF-8");
+ else
+ /* bad encoding byte, try to convert from iso-8859-1 */
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ state->csize - off, "ISO-8859-1");
+ break;
+ case SL:
+ if (state->csize < 7)
+ {
+ /* malformed */
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ if (data[offset] == 0x00)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 6],
+ state->csize - 6, "ISO-8859-1");
+ else if (data[offset] == 0x01)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 6],
+ state->csize - 6, "UCS-2");
+ else if ((state->ver == 0x04) && (data[offset] == 0x02))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 6],
+ state->csize - 6, "UTF-16BE");
+ else if ((state->ver == 0x04) && (data[offset] == 0x03))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 6],
+ state->csize - 6, "UTF-8");
+ else
+ /* bad encoding byte, try to convert from iso-8859-1 */
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ 6],
+ state->csize - 6, "ISO-8859-1");
+ break;
+ case L:
+ if (state->csize < 5)
+ {
+ /* malformed */
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ /* find end of description */
+ off = 4;
+ while ((off < size) && (off < offset + state->csize) && (data[offset +
off] != '\0'))
+ off++;
+ if ((off >= state->csize) || (data[offset + off] != '\0'))
+ {
+ /* malformed */
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ off++;
+
+ if (data[offset] == 0x00)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ state->csize - off, "ISO-8859-1");
+ else if (data[offset] == 0x01)
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ state->csize - off, "UCS-2");
+ else if ((state->ver == 0x04) && (data[offset] == 0x02))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ state->csize - off, "UTF-1offBE");
+ else if ((state->ver == 0x04) && (data[offset] == 0x03))
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ state->csize - off, "UTF-8");
+ else
+ /* bad encoding byte, try to convert from iso-8859-1 */
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset
+ off],
+ state->csize - off, "ISO-8859-1");
+ break;
+ case I:
+ if ( ( (state->ver == 0x02) &&
+ (state->csize < 7) ) ||
+ ( ( (state->ver == 0x03) ||
+ (state->ver == 0x04)) && (state->csize < 5)) )
+ {
+ /* malformed */
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ if (state->mime != NULL)
+ free (state->mime);
+ state->mime = NULL;
+ if (state->ver == 0x02)
+ {
+ off = 5;
+ picture_type = data[offset + 5];
+ }
+ else if ((state->ver == 0x03) || (state->ver == 0x04))
+ {
+ off = 1;
+ while ((off < size) && (off < offset + state->csize) && (data[offset
+ off] != '\0') )
+ off++;
+ if ((off >= state->csize) || (data[offset + off] != '\0'))
+ {
+ /* malformed */
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ state->mime = malloc (off);
+ memcpy (state->mime, &data[offset + 1], off - 1);
+ state->mime[off - 1] = '\0';
+ off += 1;
+ picture_type = data[offset];
+ off += 1;
+ }
+ /* find end of description */
+ while ((off < size) && (off < offset + state->csize) && (data[offset +
off] != '\0'))
+ off++;
+ if ((off >= state->csize) || (data[offset + off] != '\0'))
+ {
+ free (state->mime);
+ state->mime = NULL;
+ /* malformed */
+ state->state = ID3V2_INVALID;
+ break;
+ }
+ off++;
+ switch (picture_type)
+ {
+ case 0x03:
+ case 0x04:
+ type = EXTRACTOR_METATYPE_COVER_PICTURE;
+ break;
+ case 0x07:
+ case 0x08:
+ case 0x09:
+ case 0x0A:
+ case 0x0B:
+ case 0x0C:
+ type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
+ break;
+ case 0x0D:
+ case 0x0E:
+ case 0x0F:
+ type = EXTRACTOR_METATYPE_EVENT_PICTURE;
+ break;
+ case 0x14:
+ type = EXTRACTOR_METATYPE_LOGO;
+ type = EXTRACTOR_METATYPE_LOGO;
+ break;
+ default:
+ type = EXTRACTOR_METATYPE_PICTURE;
+ break;
+ }
+ if (state->ver == 0x02)
+ {
+ if (0 == strncasecmp ("PNG", (const char *) &data[offset + 1], 3))
+ state->mime = strdup ("image/png");
+ else if (0 == strncasecmp ("JPG", (const char *) &data[offset + 1],
3))
+ state->mime = strdup ("image/jpeg");
+ else
+ state->mime = NULL;
+ }
+ else if (((state->ver == 0x03) || (state->ver == 0x04)) && (strchr
(state->mime, '/') == NULL))
+ {
+ size_t mime_len = strlen (state->mime);
+ char *type_mime = malloc (mime_len + 6 + 1);
+ snprintf (type_mime, mime_len + 6 + 1, "image/%s", state->mime);
+ free (state->mime);
+ state->mime = type_mime;
+ }
+ if ((state->mime != NULL) && (0 == strcmp (state->mime, "-->")))
+ {
+ /* not supported */
+ free (state->mime);
+ state->mime = NULL;
+ }
+ else
+ {
+ if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY,
state->mime, (const char*) &data[offset + off], state->csize - off))
+ {
+ if (state->mime != NULL)
+ free (state->mime);
+ state->mime = NULL;
+ return 1;
+ }
+ if (state->mime != NULL)
+ free (state->mime);
+ state->mime = NULL;
+ }
+ word = NULL;
+ break;
+ default:
+ return 1;
+ }
+ if ((word != NULL) && (strlen (word) > 0))
+ {
+ if (0 != proc (proc_cls, "id3v2", tmap[state->ti].type,
EXTRACTOR_METAFORMAT_UTF8, "text/plain", word, strlen (word) + 1))
+ {
+ free (word);
+ return 1;
+ }
+ }
+ if (word != NULL)
+ free (word);
+ offset = offset + state->csize;
+ state->state = ID3V2_READING_FRAME_HEADER;
+ break;
}
- return 0;
+ }
+ return 1;
}
/* end of id3v2_extractor.c */
Modified: Extractor/src/plugins/mp3_extractor.c
===================================================================
--- Extractor/src/plugins/mp3_extractor.c 2012-03-27 12:46:29 UTC (rev
20782)
+++ Extractor/src/plugins/mp3_extractor.c 2012-03-27 13:05:17 UTC (rev
20783)
@@ -36,8 +36,41 @@
#include <unistd.h>
#include <stdlib.h>
-#define MAX_MP3_SCAN_DEEP 16768
-const int max_frames_scan = 1024;
+#include "extractor_plugins.h"
+
+#if WINDOWS
+#include <sys/param.h> /* #define BYTE_ORDER */
+#endif
+#ifndef __BYTE_ORDER
+#ifdef _BYTE_ORDER
+#define __BYTE_ORDER _BYTE_ORDER
+#else
+#ifdef BYTE_ORDER
+#define __BYTE_ORDER BYTE_ORDER
+#endif
+#endif
+#endif
+#ifndef __BIG_ENDIAN
+#ifdef _BIG_ENDIAN
+#define __BIG_ENDIAN _BIG_ENDIAN
+#else
+#ifdef BIG_ENDIAN
+#define __BIG_ENDIAN BIG_ENDIAN
+#endif
+#endif
+#endif
+#ifndef __LITTLE_ENDIAN
+#ifdef _LITTLE_ENDIAN
+#define __LITTLE_ENDIAN _LITTLE_ENDIAN
+#else
+#ifdef LITTLE_ENDIAN
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#endif
+#endif
+#endif
+
+#define LARGEST_FRAME_SIZE 8065
+
enum
{ MPEG_ERR = 0, MPEG_V1 = 1, MPEG_V2 = 2, MPEG_V25 = 3 };
@@ -45,6 +78,11 @@
{ LAYER_ERR = 0, LAYER_1 = 1, LAYER_2 = 2, LAYER_3 = 3 };
#define MPA_SYNC_MASK ((unsigned int) 0xFFE00000)
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define MPA_SYNC_MASK_MEM ((unsigned int) 0xFFE00000)
+#else
+#define MPA_SYNC_MASK_MEM ((unsigned int) 0x0000E0FF)
+#endif
#define MPA_LAST_SYNC_BIT_MASK ((unsigned int) 0x00100000)
#define MPA_VERSION_MASK ((unsigned int) 0x00080000)
#define MPA_LAYER_MASK ((unsigned int) 0x3)
@@ -106,169 +144,274 @@
#define ADDR(s,t) do { if (0 != proc (proc_cls, "mp3", t,
EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0)
-/* mimetype = audio/mpeg */
-int
-EXTRACTOR_mp3_extract (const unsigned char *data,
- size_t size,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls,
- const char *options)
+struct mp3_state
{
- unsigned int header;
- int counter = 0;
+ int state;
+
+ uint32_t header;
+ int sample_rate;
+ char mpeg_ver;
+ char layer;
+ char vbr_flag;
+ int ch;
+ char copyright_flag;
+ char original_flag;
+ int avg_bps;
+ int bitrate;
+
+ int64_t number_of_frames;
+ int64_t number_of_valid_frames;
+};
+
+enum MP3State
+{
+ MP3_LOOKING_FOR_FRAME = 0,
+ MP3_READING_FRAME = 1,
+};
+
+void
+EXTRACTOR_mp3_init_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+ struct mp3_state *state;
+ state = plugin->state = malloc (sizeof (struct mp3_state));
+ if (state == NULL)
+ return;
+ state->header = 0;
+ state->sample_rate = 0;
+ state->number_of_frames = 0;
+ state->number_of_valid_frames = 0;
+ state->mpeg_ver = 0;
+ state->layer = 0;
+ state->vbr_flag = 0;
+ state->ch = 0;
+ state->copyright_flag = 0;
+ state->original_flag = 0;
+ state->avg_bps = 0;
+ state->bitrate = 0;
+ state->state = 0;
+}
+
+void
+EXTRACTOR_mp3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+ if (plugin->state != NULL)
+ {
+ free (plugin->state);
+ }
+ plugin->state = NULL;
+}
+
+static int
+calculate_frame_statistics_and_maybe_report_it (struct EXTRACTOR_PluginList
*plugin,
+ struct mp3_state *state, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+ int length;
+ char format[512];
+
+ if (((double) state->number_of_valid_frames / (double)
state->number_of_frames) < 0.5 ||
+ state->number_of_valid_frames < 2)
+ /* Unlikely to be an mp3 file */
+ return 0;
+ ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE);
+ state->avg_bps = state->avg_bps / state->number_of_valid_frames;
+ if (state->sample_rate > 0)
+ length = 1152 * state->number_of_valid_frames / state->sample_rate;
+ else if (state->avg_bps > 0 || state->bitrate > 0)
+ length = plugin->fsize / (state->avg_bps ? state->avg_bps : state->bitrate
? state->bitrate : 1) / 125;
+ else
+ length = 0;
+
+ ADDR (mpeg_versions[state->mpeg_ver - 1], EXTRACTOR_METATYPE_FORMAT_VERSION);
+ snprintf (format,
+ sizeof (format),
+ "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s",
+ mpeg_versions[state->mpeg_ver - 1],
+ layer_names[state->layer - 1],
+ state->avg_bps,
+ state->vbr_flag ? _("VBR") : _("CBR"),
+ state->sample_rate,
+ channel_modes[state->ch],
+ state->copyright_flag ? _("copyright") : _("no copyright"),
+ state->original_flag ? _("original") : _("copy") );
+
+ ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE);
+ snprintf (format,
+ sizeof (format), "%dm%02d",
+ length / 60, length % 60);
+ ADDR (format, EXTRACTOR_METATYPE_DURATION);
+ return 0;
+}
+
+int
+EXTRACTOR_mp3_extract_method (struct EXTRACTOR_PluginList *plugin,
+ EXTRACTOR_MetaDataProcessor proc,
+ void *proc_cls)
+{
+ int64_t file_position;
+ int64_t file_size;
+ size_t offset = 0;
+ size_t size;
+ unsigned char *data;
+ struct mp3_state *state;
+
+ size_t frames_found_in_this_round = 0;
+ int start_anew = 0;
+
char mpeg_ver = 0;
char layer = 0;
int idx_num = 0;
int bitrate = 0; /*used for each frame */
- int avg_bps = 0; /*average bitrate */
- int vbr_flag = 0;
int copyright_flag = 0;
int original_flag = 0;
- int length = 0;
int sample_rate = 0;
int ch = 0;
int frame_size;
- int frames = 0;
- size_t pos = 0;
- char format[512];
- do
+ if (plugin == NULL || plugin->state == NULL)
+ return 1;
+
+ state = plugin->state;
+ file_position = plugin->position;
+ file_size = plugin->fsize;
+ size = plugin->map_size;
+ data = plugin->shm_ptr;
+
+ if (plugin->seek_request < 0)
+ return 1;
+ if (file_position - plugin->seek_request > 0)
+ {
+ plugin->seek_request = -1;
+ return 1;
+ }
+ if (plugin->seek_request - file_position < size)
+ offset = plugin->seek_request - file_position;
+
+ while (1)
+ {
+ switch (state->state)
{
- /* seek for frame start */
- if (pos + sizeof (header) > size)
+ case MP3_LOOKING_FOR_FRAME:
+ /* Look for a frame header */
+ while (offset + sizeof (state->header) < size && (((*((uint32_t *)
&data[offset])) & MPA_SYNC_MASK_MEM) != MPA_SYNC_MASK_MEM))
+ offset += 1;
+ if (offset + sizeof (state->header) >= size)
+ {
+ /* Alternative: (frames_found_in_this_round < (size /
LARGEST_FRAME_SIZE / 2)) is to generous */
+ if ((file_position == 0 && ((double) state->number_of_valid_frames /
(double) state->number_of_frames) < 0.5) ||
+ file_position + offset + sizeof (state->header) >= file_size)
{
- return 0;
- } /*unable to find header */
- header = (data[pos] << 24) | (data[pos+1] << 16) |
- (data[pos+2] << 8) | data[pos+3];
- if ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK)
- break; /*found header sync */
- pos++;
- counter++; /*next try */
- }
- while (counter < MAX_MP3_SCAN_DEEP);
- if (counter >= MAX_MP3_SCAN_DEEP)
- return 0;
-
- do
- { /*ok, now we found a mp3 frame header */
- frames++;
- switch (header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK))
- {
- case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK):
- mpeg_ver = MPEG_V1;
- break;
- case (MPA_LAST_SYNC_BIT_MASK):
- mpeg_ver = MPEG_V2;
- break;
- case 0:
- mpeg_ver = MPEG_V25;
- break;
- case (MPA_VERSION_MASK):
- default:
- return 0;
+ calculate_frame_statistics_and_maybe_report_it (plugin, state, proc,
proc_cls);
+ return 1;
}
- switch (header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT))
- {
- case (0x1 << MPA_LAYER_SHIFT):
- layer = LAYER_3;
- break;
- case (0x2 << MPA_LAYER_SHIFT):
- layer = LAYER_2;
- break;
- case (0x3 << MPA_LAYER_SHIFT):
- layer = LAYER_1;
- break;
- case 0x0:
- default:
- return 0;
- }
+ plugin->seek_request = file_position + offset;
+ return 0;
+ }
+ state->header = (data[offset] << 24) | (data[offset + 1] << 16) |
+ (data[offset + 2] << 8) | data[offset + 3];
+ if ((state->header & MPA_SYNC_MASK) == MPA_SYNC_MASK)
+ {
+ state->state = MP3_READING_FRAME;
+ break;
+ }
+ break;
+ case MP3_READING_FRAME:
+ state->number_of_frames += 1;
+ start_anew = 0;
+ switch (state->header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK))
+ {
+ case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK):
+ mpeg_ver = MPEG_V1;
+ break;
+ case (MPA_LAST_SYNC_BIT_MASK):
+ mpeg_ver = MPEG_V2;
+ break;
+ case 0:
+ mpeg_ver = MPEG_V25;
+ break;
+ case (MPA_VERSION_MASK):
+ default:
+ state->state = MP3_LOOKING_FOR_FRAME;
+ offset += 1;
+ start_anew = 1;
+ }
+ if (start_anew)
+ break;
+ switch (state->header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT))
+ {
+ case (0x1 << MPA_LAYER_SHIFT):
+ layer = LAYER_3;
+ break;
+ case (0x2 << MPA_LAYER_SHIFT):
+ layer = LAYER_2;
+ break;
+ case (0x3 << MPA_LAYER_SHIFT):
+ layer = LAYER_1;
+ break;
+ case 0x0:
+ default:
+ state->state = MP3_LOOKING_FOR_FRAME;
+ offset += 1;
+ start_anew = 1;
+ }
+ if (start_anew)
+ break;
if (mpeg_ver < MPEG_V25)
idx_num = (mpeg_ver - 1) * 3 + layer - 1;
else
idx_num = 2 + layer;
- bitrate = 1000 * bitrate_table[(header >> MPA_BITRATE_SHIFT) &
+ bitrate = 1000 * bitrate_table[(state->header >> MPA_BITRATE_SHIFT) &
MPA_BITRATE_MASK][idx_num];
if (bitrate < 0)
- {
- frames--;
- break;
- } /*error in header */
- sample_rate = freq_table[(header >> MPA_FREQ_SHIFT) &
+ {
+ /*error in header */
+ state->state = MP3_LOOKING_FOR_FRAME;
+ offset += 1;
+ break;
+ }
+ sample_rate = freq_table[(state->header >> MPA_FREQ_SHIFT) &
MPA_FREQ_MASK][mpeg_ver - 1];
- if (sample_rate < 0)
- {
- frames--;
- break;
- } /*error in header */
- ch = ((header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK);
- copyright_flag = (header >> MPA_COPYRIGHT_SHIFT) & 0x1;
- original_flag = (header >> MPA_ORIGINAL_SHIFT) & 0x1;
- frame_size =
- 144 * bitrate / (sample_rate ? sample_rate : 1) +
- ((header >> MPA_PADDING_SHIFT) & 0x1);
+ if (sample_rate <= 0)
+ {
+ /*error in header */
+ state->state = MP3_LOOKING_FOR_FRAME;
+ offset += 1;
+ break;
+ }
+ ch = ((state->header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK);
+ copyright_flag = (state->header >> MPA_COPYRIGHT_SHIFT) & 0x1;
+ original_flag = (state->header >> MPA_ORIGINAL_SHIFT) & 0x1;
+ if (layer == LAYER_1)
+ frame_size = (12 * bitrate / sample_rate + ((state->header >>
MPA_PADDING_SHIFT) & 0x1)) * 4;
+ else
+ frame_size = 144 * bitrate / sample_rate + ((state->header >>
MPA_PADDING_SHIFT) & 0x1);
if (frame_size <= 0)
- {
- /* Technically, bitrate can be 0. However, but this particular
- * extractor is incapable of correctly processing 0-bitrate files
- * anyway. And bitrate == 0 might also mean that this is just a
- * random binary sequence, which is far more likely to be true.
- *
- * amatus suggests to use a different algorithm and parse significant
- * part of the file, then count the number of correct mpeg frames.
- * If the the percentage of correct frames is below a threshold,
- * then this is not an mpeg file at all.
- */
- frames -= 1;
- break;
- }
- avg_bps += bitrate / 1000;
+ {
+ /*error in header */
+ state->state = MP3_LOOKING_FOR_FRAME;
+ offset += 1;
+ break;
+ }
- pos += frame_size - 4;
- if (frames > max_frames_scan)
- break; /*optimization */
- if (avg_bps / frames != bitrate / 1000)
- vbr_flag = 1;
- if (pos + sizeof (header) > size)
- break; /* EOF */
- header = (data[pos] << 24) | (data[pos+1] << 16) |
- (data[pos+2] << 8) | data[pos+3];
- }
- while ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK);
+ /* Only save data from valid frames in the state */
+ state->avg_bps += bitrate / 1000;
+ state->sample_rate = sample_rate;
+ state->mpeg_ver = mpeg_ver;
+ state->layer = layer;
+ state->ch = ch;
+ state->copyright_flag = copyright_flag;
+ state->original_flag = original_flag;
+ state->bitrate = bitrate;
- if (frames < 2)
- return 0; /*no valid frames */
- ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE);
- avg_bps = avg_bps / frames;
- if (max_frames_scan)
- { /*if not all frames scaned */
- length =
- size / (avg_bps ? avg_bps : bitrate ? bitrate : 0xFFFFFFFF) / 125;
+ frames_found_in_this_round += 1;
+ state->number_of_valid_frames += 1;
+ if (state->avg_bps / state->number_of_valid_frames != bitrate / 1000)
+ state->vbr_flag = 1;
+ offset += frame_size;
+ state->state = MP3_LOOKING_FOR_FRAME;
+ break;
}
- else
- {
- length = 1152 * frames / (sample_rate ? sample_rate : 0xFFFFFFFF);
- }
-
- ADDR (mpeg_versions[mpeg_ver-1], EXTRACTOR_METATYPE_FORMAT_VERSION);
- snprintf (format,
- sizeof(format),
- "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s",
- mpeg_versions[mpeg_ver-1],
- layer_names[layer-1],
- avg_bps,
- vbr_flag ? _("VBR") : _("CBR"),
- sample_rate,
- channel_modes[ch],
- copyright_flag ? _("copyright") : _("no copyright"),
- original_flag ? _("original") : _("copy") );
-
- ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE);
- snprintf (format,
- sizeof (format), "%dm%02d",
- length / 60, length % 60);
- ADDR (format, EXTRACTOR_METATYPE_DURATION);
- return 0;
+ }
+ return 1;
}
/* end of mp3_extractor.c */
Modified: Extractor/src/plugins/template_extractor.c
===================================================================
--- Extractor/src/plugins/template_extractor.c 2012-03-27 12:46:29 UTC (rev
20782)
+++ Extractor/src/plugins/template_extractor.c 2012-03-27 13:05:17 UTC (rev
20783)
@@ -21,21 +21,113 @@
#include "platform.h"
#include "extractor.h"
-int
-EXTRACTOR_template_extract (const unsigned char *data,
- size_t size,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls,
- const char *options)
+#include "extractor_plugins.h"
+
+struct template_state
{
- if (0 != proc (proc_cls,
- "template",
- EXTRACTOR_METATYPE_RESERVED,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- "foo",
- strlen ("foo")+1))
+ int state;
+
+ /* more state fields here
+ * all variables that should survive more than one atomic read
+ * from the "file" are to be placed here.
+ */
+};
+
+enum TemplateState
+{
+ TEMPLATE_INVALID = -1,
+ TEMPLATE_LOOKING_FOR_FOO = 0,
+ TEMPLATE_READING_FOO,
+ TEMPLATE_READING_BAR,
+ TEMPLATE_SEEKING_TO_ZOOL
+};
+
+void
+EXTRACTOR_template_init_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+ struct template_state *state;
+ state = plugin->state = malloc (sizeof (struct template_state));
+ if (state == NULL)
+ return;
+ state->state = TEMPLATE_LOOKING_FOR_FOO; /* or whatever is the initial one */
+ /* initialize other fields to their "uninitialized" values or defaults */
+}
+
+void
+EXTRACTOR_template_discard_state_method (struct EXTRACTOR_PluginList *plugin)
+{
+ if (plugin->state != NULL)
+ {
+ /* free other state fields that are heap-allocated */
+ free (plugin->state);
+ }
+ plugin->state = NULL;
+}
+
+int
+EXTRACTOR_template_extract_method (struct EXTRACTOR_PluginList *plugin,
+ EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
+{
+ int64_t file_position;
+ int64_t file_size;
+ size_t offset = 0;
+ size_t size;
+ unsigned char *data;
+ unsigned char *ff;
+ struct mp3_state *state;
+
+ /* temporary variables are declared here */
+
+ if (plugin == NULL || plugin->state == NULL)
return 1;
- /* insert more here */
- return 0;
+
+ /* for easier access (and conforms better with the old plugins var names) */
+ state = plugin->state;
+ file_position = plugin->position;
+ file_size = plugin->fsize;
+ size = plugin->map_size;
+ data = plugin->shm_ptr;
+
+ /* sanity checks */
+ if (plugin->seek_request < 0)
+ return 1;
+ if (file_position - plugin->seek_request > 0)
+ {
+ plugin->seek_request = -1;
+ return 1;
+ }
+ if (plugin->seek_request - file_position < size)
+ offset = plugin->seek_request - file_position;
+
+ while (1)
+ {
+ switch (state->state)
+ {
+ case TEMPLATE_INVALID:
+ plugin->seek_request = -1;
+ return 1;
+ case TEMPLATE_LOOKING_FOR_FOO:
+ /* Find FOO in data buffer.
+ * If found, set offset to its position and set state to
TEMPLATE_READING_FOO
+ * If not found, set seek_request to file_position + offset and return 1
+ * (but it's better to give up as early as possible, to avoid reading
the whole
+ * file byte-by-byte).
+ */
+ break;
+ case TEMPLATE_READING_FOO:
+ /* See if offset + sizeof(foo) < size, otherwise set seek_request to
offset and return 1;
+ * If file_position is 0, and size is still to small, give up.
+ * Read FOO, maybe increase offset to reflect that (depends on the
parser logic).
+ * Either process FOO right here, or jump to another state (see ebml
plugin for an example of complex
+ * state-jumps).
+ * If FOO says you need to seek somewhere - set offset to seek_target -
file_position and set the
+ * next state (next state will check that offset < size; all states that
do reading should do that,
+ * and also check for EOF).
+ */
+ /* ... */
+ break;
+ }
+ }
+ /* Should not reach this */
+ return 1;
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r20783 - in Extractor: . src/include src/main src/plugins,
gnunet <=