[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r2684 - in Extractor: . m4 src/include src/main src/plugins
From: |
grothoff |
Subject: |
[GNUnet-SVN] r2684 - in Extractor: . m4 src/include src/main src/plugins src/plugins/hash src/plugins/ole2 |
Date: |
Fri, 28 Apr 2006 21:49:15 -0700 (PDT) |
Author: grothoff
Date: 2006-04-28 21:49:06 -0700 (Fri, 28 Apr 2006)
New Revision: 2684
Added:
Extractor/m4/abi-gsf.m4
Extractor/src/plugins/ole2/README
Removed:
Extractor/src/plugins/wordleaker/
Modified:
Extractor/ChangeLog
Extractor/configure.ac
Extractor/src/include/extractor.h
Extractor/src/main/extractor.c
Extractor/src/plugins/Makefile.am
Extractor/src/plugins/hash/rmd160extractor.c
Extractor/src/plugins/ole2/Makefile.am
Extractor/src/plugins/ole2/ole2extractor.c
Log:
integrating wordleaker into ole2 plugin, switching to libgsf
Modified: Extractor/ChangeLog
===================================================================
--- Extractor/ChangeLog 2006-04-29 01:07:14 UTC (rev 2683)
+++ Extractor/ChangeLog 2006-04-29 04:49:06 UTC (rev 2684)
@@ -1,3 +1,7 @@
+Fri Apr 28 22:26:43 PDT 2006
+ Integrated wordleaker into OLE2 plugin. Changed OLE2 plugin to use
+ libgsf (new dependency!).
+
Fri Apr 28 16:18:26 PDT 2006
Fixing some i18n issues. Specifically, EXTRACTOR_getKeywordTypeAsString
will now never return the translated version of the keyword type
Modified: Extractor/configure.ac
===================================================================
--- Extractor/configure.ac 2006-04-29 01:07:14 UTC (rev 2683)
+++ Extractor/configure.ac 2006-04-29 04:49:06 UTC (rev 2684)
@@ -313,6 +313,7 @@
AM_CONDITIONAL(HAVE_EXIV2, test x$exiv2 != x0)
AC_DEFINE_UNQUOTED([HAVE_EXIV2], $exiv2, [We use EXIV2])
+ABI_GSF
AC_SUBST(CPPFLAGS)
AC_SUBST(LDFLAGS)
@@ -358,9 +359,9 @@
AC_MSG_NOTICE([NOTICE: printable plugins enabled])
fi
-if test "x$without_glib" = "xtrue"
+if test "x$have_gsf" != "xtrue"
then
- AC_MSG_NOTICE([NOTICE: glib not used, no OLE2 (MS Office) support])
+ AC_MSG_NOTICE([NOTICE: libgsf not found, no OLE2 (MS Office) support])
fi
if test "x$without_gtk" = "xtrue"
Added: Extractor/m4/abi-gsf.m4
===================================================================
--- Extractor/m4/abi-gsf.m4 2006-04-29 01:07:14 UTC (rev 2683)
+++ Extractor/m4/abi-gsf.m4 2006-04-29 04:49:06 UTC (rev 2684)
@@ -0,0 +1,78 @@
+# start: abi/ac-helpers/abi-gsf.m4
+#
+# Copyright (C) 2005 Christian Neumair
+#
+# This file is free software; you may copy and/or distribute it with
+# or without modifications, as long as this notice is preserved.
+# This software is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY, to the extent permitted by law; without even
+# the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+# PURPOSE.
+#
+# The above license applies to THIS FILE ONLY, the GNUnet code
+# itself may be copied and distributed under the terms of the GNU
+# GPL, see COPYING for more details
+#
+# Usage: ABI_GSF
+
+# Check for gsf
+
+AC_DEFUN([ABI_GSF], [
+
+test_gsf=true
+have_gsf=false
+
+test_gsf_gnome=true
+have_gsf_gnome=false
+
+AC_ARG_ENABLE(gsf,[ --disable-gsf Turn off gsf], [
+ if test "x$enableval" = "xno"; then
+ test_gsf=false
+ fi
+])
+
+AC_ARG_ENABLE(gsf-gnome,[ --disable-gnome Turn off gsf-gnome], [
+ if test "x$enableval" = "xno"; then
+ test_gsf_gnome=false
+ fi
+])
+
+if test "x$test_gsf" = "xtrue" ; then
+ PKG_CHECK_MODULES(GSF,[libgsf-1 >= 1.10], [
+ have_gsf=true
+ GSF_CFLAGS="$GSF_CFLAGS -DHAVE_GSF"
+ ],
+ [
+ have_gsf=false
+ ])
+fi
+
+if test "x$have_gsf" = "xtrue" -a "x$test_gsf_gnome" = "xtrue" ; then
+ PKG_CHECK_MODULES(GSF_GNOME, [libgsf-gnome-1 >= 1.10], [
+ have_gsf_gnome=true
+ GSF_GNOME_CFLAGS="$GSF_GNOME_CFLAGS -DHAVE_GSF_GNOME"
+ ],
+ [
+ have_gsf_gnome=false
+ ])
+fi
+
+AC_SUBST(GSF_CFLAGS)
+AC_SUBST(GSF_LIBS)
+
+AC_SUBST(GSF_GNOME_CFLAGS)
+AC_SUBST(GSF_GNOME_LIBS)
+
+AM_CONDITIONAL(WITH_GSF, test "x$have_gsf" = "xtrue")
+AM_CONDITIONAL(WITH_GSF_GNOME, test "x$have_gsf_gnome" = "xtrue")
+
+if test "x$have_gsf_gnome" = "xtrue" ; then
+ abi_gsf_message="yes, with GNOME support"
+else if test "x$have_gsf" = "xtrue" ; then
+ abi_gsf_message="yes, without GNOME support"
+else
+ abi_gsf_message="no"
+fi
+fi
+
+])
Modified: Extractor/src/include/extractor.h
===================================================================
--- Extractor/src/include/extractor.h 2006-04-29 01:07:14 UTC (rev 2683)
+++ Extractor/src/include/extractor.h 2006-04-29 04:49:06 UTC (rev 2684)
@@ -152,6 +152,14 @@
EXTRACTOR_MODIFIED_BY_SOFTWARE = 99,
EXTRACTOR_REVISION_HISTORY = 100,
EXTRACTOR_LOWERCASE = 101,
+ EXTRACTOR_COMPANY = 102,
+ EXTRACTOR_GENERATOR = 103,
+ EXTRACTOR_CHARACTER_SET = 104,
+ EXTRACTOR_LINE_COUNT = 105,
+ EXTRACTOR_PARAGRAPH_COUNT = 106,
+ EXTRACTOR_EDITING_CYCLES = 107,
+ EXTRACTOR_SCALE = 108,
+ EXTRACTOR_MANAGER = 109,
} EXTRACTOR_KeywordType;
/**
Modified: Extractor/src/main/extractor.c
===================================================================
--- Extractor/src/main/extractor.c 2006-04-29 01:07:14 UTC (rev 2683)
+++ Extractor/src/main/extractor.c 2006-04-29 04:49:06 UTC (rev 2684)
@@ -41,113 +41,121 @@
* The sources of keywords as strings.
*/
static const char *keywordTypes[] = {
- gettext_noop("unknown"),
+ gettext_noop("unknown"), /* 0 */
gettext_noop("filename"),
gettext_noop("mimetype"),
gettext_noop("title"),
gettext_noop("author"),
- gettext_noop("artist"),
+ gettext_noop("artist"), /* 5 */
gettext_noop("description"),
gettext_noop("comment"),
gettext_noop("date"),
gettext_noop("publisher"),
- gettext_noop("language"),
+ gettext_noop("language"), /* 10 */
gettext_noop("album"),
gettext_noop("genre"),
gettext_noop("location"),
gettext_noop("version"),
- gettext_noop("organization"),
+ gettext_noop("organization"), /* 15 */
gettext_noop("copyright"),
gettext_noop("subject"),
gettext_noop("keywords"),
gettext_noop("contributor"),
- gettext_noop("resource-type"),
+ gettext_noop("resource-type"), /* 20 */
gettext_noop("format"),
gettext_noop("resource-identifier"),
gettext_noop("source"),
gettext_noop("relation"),
- gettext_noop("coverage"),
+ gettext_noop("coverage"), /* 25 */
gettext_noop("software"),
gettext_noop("disclaimer"),
gettext_noop("warning"),
gettext_noop("translated"),
- gettext_noop("creation date"),
+ gettext_noop("creation date"), /* 30 */
gettext_noop("modification date"),
gettext_noop("creator"),
gettext_noop("producer"),
gettext_noop("page count"),
- gettext_noop("page orientation"),
+ gettext_noop("page orientation"), /* 35 */
gettext_noop("paper size"),
gettext_noop("used fonts"),
gettext_noop("page order"),
gettext_noop("created for"),
- gettext_noop("magnification"),
+ gettext_noop("magnification"), /* 40 */
gettext_noop("release"),
gettext_noop("group"),
gettext_noop("size"),
gettext_noop("summary"),
- gettext_noop("packager"),
+ gettext_noop("packager"), /* 45 */
gettext_noop("vendor"),
gettext_noop("license"),
gettext_noop("distribution"),
gettext_noop("build-host"),
- gettext_noop("os"),
+ gettext_noop("operating system"), /* 50 */
gettext_noop("dependency"),
gettext_noop("MD4"),
gettext_noop("MD5"),
gettext_noop("SHA-0"),
- gettext_noop("SHA-1"),
+ gettext_noop("SHA-1"), /* 55 */
gettext_noop("RipeMD160"),
gettext_noop("resolution"),
gettext_noop("category"),
gettext_noop("book title"),
- gettext_noop("priority"),
+ gettext_noop("priority"), /* 60 */
gettext_noop("conflicts"),
gettext_noop("replaces"),
gettext_noop("provides"),
gettext_noop("conductor"),
- gettext_noop("interpreter"),
+ gettext_noop("interpreter"), /* 65 */
gettext_noop("owner"),
gettext_noop("lyrics"),
gettext_noop("media type"),
gettext_noop("contact"),
- gettext_noop("binary thumbnail data"),
+ gettext_noop("binary thumbnail data"), /* 70 */
gettext_noop("publication date"),
gettext_noop("camera make"),
gettext_noop("camera model"),
gettext_noop("exposure"),
- gettext_noop("aperture"),
+ gettext_noop("aperture"), /* 75 */
gettext_noop("exposure bias"),
gettext_noop("flash"),
gettext_noop("flash bias"),
gettext_noop("focal length"),
- gettext_noop("focal length (35mm equivalent)"),
+ gettext_noop("focal length (35mm equivalent)"), /* 80 */
gettext_noop("iso speed"),
gettext_noop("exposure mode"),
gettext_noop("metering mode"),
gettext_noop("macro mode"),
- gettext_noop("image quality"),
+ gettext_noop("image quality"), /* 85 */
gettext_noop("white balance"),
gettext_noop("orientation"),
gettext_noop("template"),
gettext_noop("split"),
- gettext_noop("product version"),
+ gettext_noop("product version"), /* 90 */
gettext_noop("last saved by"),
gettext_noop("last printed"),
gettext_noop("word count"),
gettext_noop("character count"),
- gettext_noop("total editing time"),
+ gettext_noop("total editing time"), /* 95 */
gettext_noop("thumbnails"),
gettext_noop("security"),
gettext_noop("created by software"),
gettext_noop("modified by software"),
- gettext_noop("revision history"),
+ gettext_noop("revision history"), /* 100 */
gettext_noop("lower case conversion"),
+ gettext_noop("company"),
+ gettext_noop("generator"),
+ gettext_noop("character set"),
+ gettext_noop("line count"), /* 105 */
+ gettext_noop("paragraph count"),
+ gettext_noop("editing cycles"),
+ gettext_noop("scale"),
+ gettext_noop("manager"),
NULL,
};
/* the number of keyword types (for bounds-checking) */
-#define HIGHEST_TYPE_NUMBER 102
+#define HIGHEST_TYPE_NUMBER 110
#ifdef HAVE_LIBOGG
#if HAVE_VORBIS
@@ -211,7 +219,6 @@
libextractor_mpeg:\
libextractor_elf:\
libextractor_oo:\
-libextractor_word:\
libextractor_asf"
#define DEFAULT_LIBRARIES EXSO OLESO OGGSO QTSO DEFSO
Modified: Extractor/src/plugins/Makefile.am
===================================================================
--- Extractor/src/plugins/Makefile.am 2006-04-29 01:07:14 UTC (rev 2683)
+++ Extractor/src/plugins/Makefile.am 2006-04-29 04:49:06 UTC (rev 2684)
@@ -1,7 +1,9 @@
include Makefile-plugins.am
if HAVE_GLIB
-oledir=ole2
+if WITH_GSF
+ oledir=ole2
+endif
if HAVE_GTK
thumbdir=thumbnail
endif
@@ -15,6 +17,7 @@
exiv2dir=exiv2
endif
+
if HAVE_XPDF
xpdfdir=pdf
else
@@ -23,7 +26,7 @@
# toggle for development
# SUBDIRS = .
-SUBDIRS = . $(oodir) $(printdir) hash $(oledir) rpm $(xpdfdir) $(thumbdir)
$(exiv2dir) wordleaker
+SUBDIRS = . $(oodir) $(printdir) hash $(oledir) rpm $(xpdfdir) $(thumbdir)
$(exiv2dir)
if HAVE_VORBISFILE
Modified: Extractor/src/plugins/hash/rmd160extractor.c
===================================================================
--- Extractor/src/plugins/hash/rmd160extractor.c 2006-04-29 01:07:14 UTC
(rev 2683)
+++ Extractor/src/plugins/hash/rmd160extractor.c 2006-04-29 04:49:06 UTC
(rev 2684)
@@ -619,10 +619,11 @@
#define rmd160_new() rmd160_copy(NULL,NULL)
-struct EXTRACTOR_Keywords * libextractor_hash_rmd160_extract(const char *
filename,
- char * data,
- size_t size,
- struct
EXTRACTOR_Keywords * prev) {
+struct EXTRACTOR_Keywords *
+libextractor_hash_rmd160_extract(const char * filename,
+ const unsigned char * data,
+ size_t size,
+ struct EXTRACTOR_Keywords * prev) {
unsigned char bin_buffer[MAX_DIGEST_BIN_BYTES];
char hash[8 * MAX_DIGEST_BIN_BYTES];
char buf[16];
Modified: Extractor/src/plugins/ole2/Makefile.am
===================================================================
--- Extractor/src/plugins/ole2/Makefile.am 2006-04-29 01:07:14 UTC (rev
2683)
+++ Extractor/src/plugins/ole2/Makefile.am 2006-04-29 04:49:06 UTC (rev
2684)
@@ -4,12 +4,11 @@
plugin_LTLIBRARIES = \
libextractor_ole2.la
-AM_CFLAGS = $(GLIB_CFLAGS)
-
libextractor_ole2_la_CFLAGS = \
- $(GLIB_CFLAGS)
+ $(GSF_CFLAGS)
libextractor_ole2_la_LIBADD = \
- $(LIBADD) $(GLIB_LIBS) -lgobject-2.0 \
+ $(LIBADD) $(GSF_LIBS) \
+ $(top_builddir)/src/plugins/libconvert.la \
$(top_builddir)/src/main/libextractor.la
libextractor_ole2_la_LDFLAGS = \
$(PLUGINFLAGS) $(retaincommand)
Added: Extractor/src/plugins/ole2/README
===================================================================
--- Extractor/src/plugins/ole2/README 2006-04-29 01:07:14 UTC (rev 2683)
+++ Extractor/src/plugins/ole2/README 2006-04-29 04:49:06 UTC (rev 2684)
@@ -0,0 +1,25 @@
+WordLeaker v.0.1 (c) 2005
+ by Madelman (http://elligre.tk/madelman/)
+
+Shows information about a Word file.
+It can show all the summary and the revision history of the file.
+
+It should be portable but, for now, it doesn't work in Linux. I haven't had
+the time to debug it but I'll do when I can.
+
+There are a lot of things that don't work yet, if you want to help contact me.
+
+Copyright and License
+=====================
+
+WordLeaker v.0.1 (c) 2005 by Madelman (address@hidden)
+
+WordLeaker program is free software; you can redistribute it and/or modify it
under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option) any
+later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
Modified: Extractor/src/plugins/ole2/ole2extractor.c
===================================================================
--- Extractor/src/plugins/ole2/ole2extractor.c 2006-04-29 01:07:14 UTC (rev
2683)
+++ Extractor/src/plugins/ole2/ole2extractor.c 2006-04-29 04:49:06 UTC (rev
2684)
@@ -1,6 +1,6 @@
/*
This file is part of libextractor.
- (C) 2004,2005 Vidyut Samanta and Christian Grothoff
+ (C) 2004, 2005, 2006 Vidyut Samanta and Christian Grothoff
libextractor is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -17,1218 +17,31 @@
Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.
- Most of the code in this directory comes from
- libgsf 1.10.1 (Licensed under GPL/LGPL).
-
- libgsf -- The G Structured File Library
+ This code makes extensive use of libgsf
+ -- the Gnome Structured File Library
Copyright (C) 2002-2004 Jody Goldberg (address@hidden)
+ Part of this code was borrowed from wordleaker.cpp. See also
+ the README file in this directory.
*/
#include "platform.h"
#include "extractor.h"
+#include "../convert.h"
+
#include <glib-object.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
+#include <gsf/gsf-utils.h>
+#include <gsf/gsf-input-memory.h>
+#include <gsf/gsf-infile.h>
+#include <gsf/gsf-infile-msole.h>
+#include <gsf/gsf-msole-utils.h>
+
#define DEBUG_OLE2 0
-#if DEBUG_OLE2
-#define d(code) do { code } while (0)
-#define warning printf
-#else
-#define d(code)
- static void warning(const char * format, ...) {}
-#endif
-
-#undef g_return_val_if_fail
-#define g_return_val_if_fail(a,b) if (! (a)) return (b);
-
-/* *********************** formerly gsf-input.c ************* */
-
-typedef struct GsfInput {
- off_t size;
- off_t cur_offset;
- char * name;
- const unsigned char * buf;
- int needs_free;
-} GsfInput;
-
-
-static void
-gsf_input_init (GsfInput * input)
-{
- input->size = 0;
- input->cur_offset = 0;
- input->name = NULL;
- input->buf = NULL;
-}
-
-/**
- * gsf_input_memory_new:
- * @buf: The input bytes
- * @length: The length of @buf
- * @needs_free: Whether you want this memory to be free'd at object destruction
- *
- * Returns: A new #GsfInputMemory
- */
-static GsfInput *
-gsf_input_new (const unsigned char * buf,
- off_t length,
- int needs_free)
-{
- GsfInput *mem = malloc(sizeof(GsfInput));
- if (mem == NULL)
- return NULL;
- gsf_input_init(mem);
- mem->buf = buf;
- mem->size = length;
- mem->needs_free = needs_free;
- return mem;
-}
-
-static void
-gsf_input_finalize (GsfInput * input)
-{
- if (input->name != NULL) {
- free (input->name);
- input->name = NULL;
- }
- if ( (input->buf) && input->needs_free)
- free((void*) input->buf);
- free(input);
-}
-
-/**
- * gsf_input_set_name :
- * @input :
- * @name :
- *
- * protected.
- *
- * Returns : TRUE if the assignment was ok.
- **/
-static int
-gsf_input_set_name (GsfInput *input, char const *name)
-{
- char *buf;
-
- g_return_val_if_fail (input != NULL, 0);
-
- buf = strdup (name);
- if (input->name != NULL)
- free (input->name);
- input->name = buf;
- return 1;
-}
-
-
-
-static GsfInput *
-gsf_input_dup (GsfInput *src)
-{
- GsfInput * dst = malloc(sizeof(GsfInput));
- if (dst == NULL)
- return NULL;
- gsf_input_init(dst);
- dst->buf = src->buf;
- dst->needs_free = 0;
- dst->size = src->size;
- if (src->name != NULL)
- gsf_input_set_name (dst, src->name);
- dst->cur_offset = src->cur_offset;
- return dst;
-}
-
-static const unsigned char *
-gsf_input_read (GsfInput * mem, size_t num_bytes, unsigned char *
optional_buffer)
-{
- const unsigned char *src = mem->buf;
- if (src == NULL)
- return NULL;
- if (optional_buffer) {
- memcpy (optional_buffer, src + mem->cur_offset, num_bytes);
- mem->cur_offset += num_bytes;
-
- return optional_buffer;
- } else {
- const unsigned char * ret = src + mem->cur_offset;
- mem->cur_offset += num_bytes;
- return ret;
- }
-}
-
-/**
- * gsf_input_size :
- * @input : The input
- *
- * Looks up and caches the number of bytes in the input
- *
- * Returns : the size or -1 on error
- **/
-static off_t
-gsf_input_size (GsfInput *input)
-{
- g_return_val_if_fail (input != NULL, -1);
- return input->size;
-}
-
-/**
- * gsf_input_seek :
- * @input :
- * @offset :
- * @whence :
- *
- * Returns TRUE on error.
- **/
-static int
-gsf_input_seek (GsfInput *input, off_t offset, int whence)
-{
- off_t pos = offset;
-
- g_return_val_if_fail (input != NULL, 1);
-
- switch (whence) {
- case SEEK_SET : break;
- case SEEK_CUR : pos += input->cur_offset; break;
- case SEEK_END : pos += input->size; break;
- default : return 1;
- }
-
- if (pos < 0 || pos > input->size)
- return 1;
-
- /*
- * If we go nowhere, just return. This in particular handles null
- * seeks for streams with no seek method.
- */
- if (pos == input->cur_offset)
- return 0;
-
- input->cur_offset = pos;
- return 0;
-}
-
-
-
-
-/* ******************** formerly gsf-utils.c **************** */
-
-
-/* Do this the ugly way so that we don't have to worry about alignment */
-#define GSF_LE_GET_GUINT8(p) (*(guint8 const *)(p))
-#define GSF_LE_GET_GUINT16(p) \
- (guint16)((((guint8 const *)(p))[0] << 0) | \
- (((guint8 const *)(p))[1] << 8))
-#define GSF_LE_GET_GUINT32(p) \
- (guint32)((((guint8 const *)(p))[0] << 0) | \
- (((guint8 const *)(p))[1] << 8) | \
- (((guint8 const *)(p))[2] << 16) | \
- (((guint8 const *)(p))[3] << 24))
-
-#define GSF_LE_GET_GUINT64(p) (gsf_le_get_guint64 (p))
-#define GSF_LE_GET_GINT64(p) ((gint64)GSF_LE_GET_GUINT64(p))
-#define GSF_LE_GET_GINT8(p) ((gint8)GSF_LE_GET_GUINT8(p))
-#define GSF_LE_GET_GINT16(p) ((gint16)GSF_LE_GET_GUINT16(p))
-#define GSF_LE_GET_GINT32(p) ((gint32)GSF_LE_GET_GUINT32(p))
-#define GSF_LE_GET_FLOAT(p) (gsf_le_get_float (p))
-#define GSF_LE_GET_DOUBLE(p) (gsf_le_get_double (p))
-#define GSF_LE_SET_GUINT8(p, dat) \
- (*((guint8 *)(p)) = ((dat) & 0xff))
-#define GSF_LE_SET_GUINT16(p, dat) \
- ((*((guint8 *)(p) + 0) = ((dat) & 0xff)),\
- (*((guint8 *)(p) + 1) = ((dat) >> 8) & 0xff))
-#define GSF_LE_SET_GUINT32(p, dat) \
- ((*((guint8 *)(p) + 0) = ((dat)) & 0xff), \
- (*((guint8 *)(p) + 1) = ((dat) >> 8) & 0xff), \
- (*((guint8 *)(p) + 2) = ((dat) >> 16) & 0xff), \
- (*((guint8 *)(p) + 3) = ((dat) >> 24) & 0xff))
-#define GSF_LE_SET_GINT8(p,dat) GSF_LE_SET_GUINT8((p),(dat))
-#define GSF_LE_SET_GINT16(p,dat) GSF_LE_SET_GUINT16((p),(dat))
-#define GSF_LE_SET_GINT32(p,dat) GSF_LE_SET_GUINT32((p),(dat))
-
-
-/*
- * Glib gets this wrong, really. ARM's floating point format is a weird
- * mixture.
- */
-#define G_ARMFLOAT_ENDIAN 56781234
-#if defined(__arm__) && !defined(__vfp__) && (G_BYTE_ORDER == G_LITTLE_ENDIAN)
-#define G_FLOAT_BYTE_ORDER G_ARMFLOAT_ENDIAN
-#else
-#define G_FLOAT_BYTE_ORDER G_BYTE_ORDER
-#endif
-
-static guint64
-gsf_le_get_guint64 (void const *p)
-{
-#if G_BYTE_ORDER == G_BIG_ENDIAN
- if (sizeof (guint64) == 8) {
- guint64 li;
- int i;
- guint8 *t = (guint8 *)&li;
- guint8 *p2 = (guint8 *)p;
- int sd = sizeof (li);
-
- for (i = 0; i < sd; i++)
- t[i] = p2[sd - 1 - i];
-
- return li;
- } else {
- g_error ("Big endian machine, but weird size of guint64");
- }
-#elif G_BYTE_ORDER == G_LITTLE_ENDIAN
- if (sizeof (guint64) == 8) {
- /*
- * On i86, we could access directly, but Alphas require
- * aligned access.
- */
- guint64 data;
- memcpy (&data, p, sizeof (data));
- return data;
- } else {
- g_error ("Little endian machine, but weird size of guint64");
- }
-#else
-#error "Byte order not recognised -- out of luck"
-#endif
-}
-
-static float
-gsf_le_get_float (void const *p)
-{
-#if G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
- if (sizeof (float) == 4) {
- float f;
- int i;
- guint8 *t = (guint8 *)&f;
- guint8 *p2 = (guint8 *)p;
- int sd = sizeof (f);
-
- for (i = 0; i < sd; i++)
- t[i] = p2[sd - 1 - i];
-
- return f;
- } else {
- g_error ("Big endian machine, but weird size of floats");
- }
-#elif (G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN) || (G_FLOAT_BYTE_ORDER ==
G_ARMFLOAT_ENDIAN)
- if (sizeof (float) == 4) {
- /*
- * On i86, we could access directly, but Alphas require
- * aligned access.
- */
- float data;
- memcpy (&data, p, sizeof (data));
- return data;
- } else {
- g_error ("Little endian machine, but weird size of floats");
- }
-#else
-#error "Floating-point byte order not recognised -- out of luck"
-#endif
-}
-
-static double
-gsf_le_get_double (void const *p)
-{
-#if G_FLOAT_BYTE_ORDER == G_ARMFLOAT_ENDIAN
- double data;
- memcpy ((char *)&data + 4, p, 4);
- memcpy ((char *)&data, (const char *)p + 4, 4);
- return data;
-#elif G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
- if (sizeof (double) == 8) {
- double d;
- int i;
- guint8 *t = (guint8 *)&d;
- guint8 *p2 = (guint8 *)p;
- int sd = sizeof (d);
-
- for (i = 0; i < sd; i++)
- t[i] = p2[sd - 1 - i];
-
- return d;
- } else {
- g_error ("Big endian machine, but weird size of doubles");
- }
-#elif G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN
- if (sizeof (double) == 8) {
- /*
- * On i86, we could access directly, but Alphas require
- * aligned access.
- */
- double data;
- memcpy (&data, p, sizeof (data));
- return data;
- } else {
- g_error ("Little endian machine, but weird size of doubles");
- }
-#else
-#error "Floating-point byte order not recognised -- out of luck"
-#endif
-}
-
-/**
- * gsf_iconv_close : A utility wrapper to safely close an iconv handle
- * @handle :
- **/
-static void
-gsf_iconv_close (GIConv handle)
-{
- if (handle != NULL && handle != ((GIConv)-1))
- g_iconv_close (handle);
-}
-
-
-/* ***************************** formerly gsf-infile-msole.c
********************* */
-
-#define OLE_HEADER_SIZE 0x200 /* independent of big block
size size */
-#define OLE_HEADER_SIGNATURE 0x00
-#define OLE_HEADER_CLSID 0x08 /* See ReadClassStg */
-#define OLE_HEADER_MINOR_VER 0x18 /* 0x33 and 0x3e have been seen */
-#define OLE_HEADER_MAJOR_VER 0x1a /* 0x3 been seen in wild */
-#define OLE_HEADER_BYTE_ORDER 0x1c /* 0xfe 0xff == Intel Little Endian */
-#define OLE_HEADER_BB_SHIFT 0x1e
-#define OLE_HEADER_SB_SHIFT 0x20
-/* 0x22..0x27 reserved == 0 */
-#define OLE_HEADER_CSECTDIR 0x28
-#define OLE_HEADER_NUM_BAT 0x2c
-#define OLE_HEADER_DIRENT_START 0x30
-/* 0x34..0x37 transacting signature must be 0 */
-#define OLE_HEADER_THRESHOLD 0x38
-#define OLE_HEADER_SBAT_START 0x3c
-#define OLE_HEADER_NUM_SBAT 0x40
-#define OLE_HEADER_METABAT_BLOCK 0x44
-#define OLE_HEADER_NUM_METABAT 0x48
-#define OLE_HEADER_START_BAT 0x4c
-#define BAT_INDEX_SIZE 4
-#define OLE_HEADER_METABAT_SIZE ((OLE_HEADER_SIZE -
OLE_HEADER_START_BAT) / BAT_INDEX_SIZE)
-
-#define DIRENT_MAX_NAME_SIZE 0x40
-#define DIRENT_DETAILS_SIZE 0x40
-#define DIRENT_SIZE (DIRENT_MAX_NAME_SIZE + DIRENT_DETAILS_SIZE)
-#define DIRENT_NAME_LEN 0x40 /* length in bytes incl 0
terminator */
-#define DIRENT_TYPE 0x42
-#define DIRENT_COLOUR 0x43
-#define DIRENT_PREV 0x44
-#define DIRENT_NEXT 0x48
-#define DIRENT_CHILD 0x4c
-#define DIRENT_CLSID 0x50 /* only for dirs */
-#define DIRENT_USERFLAGS 0x60 /* only for dirs */
-#define DIRENT_CREATE_TIME 0x64 /* for files */
-#define DIRENT_MODIFY_TIME 0x6c /* for files */
-#define DIRENT_FIRSTBLOCK 0x74
-#define DIRENT_FILE_SIZE 0x78
-/* 0x7c..0x7f reserved == 0 */
-
-#define DIRENT_TYPE_INVALID 0
-#define DIRENT_TYPE_DIR 1
-#define DIRENT_TYPE_FILE 2
-#define DIRENT_TYPE_LOCKBYTES 3 /* ? */
-#define DIRENT_TYPE_PROPERTY 4 /* ? */
-#define DIRENT_TYPE_ROOTDIR 5
-#define DIRENT_MAGIC_END 0xffffffff
-
-/* flags in the block allocation list to denote special blocks */
-#define BAT_MAGIC_UNUSED 0xffffffff /* -1 */
-#define BAT_MAGIC_END_OF_CHAIN 0xfffffffe /* -2 */
-#define BAT_MAGIC_BAT 0xfffffffd /* a bat block, -3 */
-#define BAT_MAGIC_METABAT 0xfffffffc /* a metabat block -4 */
-
-
-
-
-typedef struct {
- guint32 *block;
- guint32 num_blocks;
-} MSOleBAT;
-
-typedef struct {
- char *name;
- char *collation_name;
- int index;
- size_t size;
- gboolean use_sb;
- guint32 first_block;
- gboolean is_directory;
- GList *children;
- unsigned char clsid[16]; /* 16 byte GUID used by some apps */
-} MSOleDirent;
-
-typedef struct {
- struct {
- MSOleBAT bat;
- unsigned shift;
- unsigned filter;
- size_t size;
- } bb, sb;
- off_t max_block;
- guint32 threshold; /* transition between small and big blocks */
- guint32 sbat_start, num_sbat;
-
- MSOleDirent *root_dir;
- struct GsfInput *sb_file;
-
- int ref_count;
-} MSOleInfo;
-
-typedef struct GsfInfileMSOle {
- off_t size;
- off_t cur_offset;
- struct GsfInput *input;
- MSOleInfo *info;
- MSOleDirent *dirent;
- MSOleBAT bat;
- off_t cur_block;
-
- struct {
- guint8 *buf;
- size_t buf_size;
- } stream;
-} GsfInfileMSOle;
-
-/* utility macros */
-#define OLE_BIG_BLOCK(index, ole) ((index) >> ole->info->bb.shift)
-
-static struct GsfInput *gsf_infile_msole_new_child (GsfInfileMSOle *parent,
- MSOleDirent *dirent);
-
-/**
- * ole_get_block :
- * @ole : the infile
- * @block :
- * @buffer : optionally NULL
- *
- * Read a block of data from the underlying input.
- * Be really anal.
- **/
-static const guint8 *
-ole_get_block (const GsfInfileMSOle *ole, guint32 block, guint8 *buffer)
-{
- g_return_val_if_fail (block < ole->info->max_block, NULL);
-
- /* OLE_HEADER_SIZE is fixed at 512, but the sector containing the
- * header is padded out to bb.size (sector size) when bb.size > 512. */
- if (gsf_input_seek (ole->input,
- (off_t)(MAX (OLE_HEADER_SIZE, ole->info->bb.size) + (block <<
ole->info->bb.shift)),
- SEEK_SET) < 0)
- return NULL;
-
- return gsf_input_read (ole->input, ole->info->bb.size, buffer);
-}
-
-/**
- * ole_make_bat :
- * @metabat : a meta bat to connect to the raw blocks (small or large)
- * @size_guess : An optional guess as to how many blocks are in the file
- * @block : The first block in the list.
- * @res : where to store the result.
- *
- * Walk the linked list of the supplied block allocation table and build up a
- * table for the list starting in @block.
- *
- * Returns TRUE on error.
- */
-static gboolean
-ole_make_bat (MSOleBAT const *metabat, size_t size_guess, guint32 block,
- MSOleBAT *res)
-{
- /* NOTE : Only use size as a suggestion, sometimes it is wrong */
- GArray *bat = g_array_sized_new (FALSE, FALSE,
- sizeof (guint32), size_guess);
-
- guint8 *used = (guint8*)g_alloca (1 + metabat->num_blocks / 8);
- memset (used, 0, 1 + metabat->num_blocks / 8);
-
- if (block < metabat->num_blocks)
- do {
- /* Catch cycles in the bat list */
- g_return_val_if_fail (0 == (used[block/8] & (1 <<
(block & 0x7))), TRUE);
- used[block/8] |= 1 << (block & 0x7);
-
- g_array_append_val (bat, block);
- block = metabat->block [block];
- } while (block < metabat->num_blocks);
-
- res->block = NULL;
-
- res->num_blocks = bat->len;
- res->block = (guint32 *) (gpointer) g_array_free (bat, FALSE);
-
- if (block != BAT_MAGIC_END_OF_CHAIN) {
-#if 0
- g_warning ("This OLE2 file is invalid.\n"
- "The Block Allocation Table for one of the streams
had %x instead of a terminator (%x).\n"
- "We might still be able to extract some data, but
you'll want to check the file.",
- block, BAT_MAGIC_END_OF_CHAIN);
-#endif
- }
-
- return FALSE;
-}
-
-static void
-ols_bat_release (MSOleBAT *bat)
-{
- if (bat->block != NULL) {
- g_free (bat->block);
- bat->block = NULL;
- bat->num_blocks = 0;
- }
-}
-
-/**
- * ole_info_read_metabat :
- * @ole :
- * @bats :
- *
- * A small utility routine to read a set of references to bat blocks
- * either from the OLE header, or a meta-bat block.
- *
- * Returns a pointer to the element after the last position filled.
- **/
-static guint32 *
-ole_info_read_metabat (GsfInfileMSOle *ole, guint32 *bats, guint32 max,
- guint32 const *metabat, guint32 const *metabat_end)
-{
- guint8 const *bat, *end;
-
- for (; metabat < metabat_end; metabat++) {
- bat = ole_get_block (ole, *metabat, NULL);
- if (bat == NULL)
- return NULL;
- end = bat + ole->info->bb.size;
- for ( ; bat < end ; bat += BAT_INDEX_SIZE, bats++) {
- *bats = GSF_LE_GET_GUINT32 (bat);
- g_return_val_if_fail (*bats < max ||
- *bats >= BAT_MAGIC_METABAT, NULL);
- }
- }
- return bats;
-}
-
-/**
- * gsf_ole_get_guint32s :
- * @dst :
- * @src :
- * @num_bytes :
- *
- * Copy some some raw data into an array of guint32.
- **/
-static void
-gsf_ole_get_guint32s (guint32 *dst, guint8 const *src, int num_bytes)
-{
- for (; (num_bytes -= BAT_INDEX_SIZE) >= 0 ; src += BAT_INDEX_SIZE)
- *dst++ = GSF_LE_GET_GUINT32 (src);
-}
-
-static struct GsfInput *
-ole_info_get_sb_file (GsfInfileMSOle *parent)
-{
- MSOleBAT meta_sbat;
-
- if (parent->info->sb_file != NULL)
- return parent->info->sb_file;
-
- parent->info->sb_file = gsf_infile_msole_new_child (parent,
- parent->info->root_dir);
-
- if (NULL == parent->info->sb_file)
- return NULL;
-
- g_return_val_if_fail (parent->info->sb.bat.block == NULL, NULL);
-
- if (ole_make_bat (&parent->info->bb.bat,
- parent->info->num_sbat,
- parent->info->sbat_start,
- &meta_sbat)) {
- return NULL;
- }
-
- parent->info->sb.bat.num_blocks = meta_sbat.num_blocks *
(parent->info->bb.size / BAT_INDEX_SIZE);
- parent->info->sb.bat.block = g_new0 (guint32,
parent->info->sb.bat.num_blocks);
- ole_info_read_metabat (parent, parent->info->sb.bat.block,
- parent->info->sb.bat.num_blocks,
- meta_sbat.block, meta_sbat.block + meta_sbat.num_blocks);
- ols_bat_release (&meta_sbat);
-
- return parent->info->sb_file;
-}
-
-static gint
-ole_dirent_cmp (const MSOleDirent *a, const MSOleDirent *b)
-{
- g_return_val_if_fail (a, 0);
- g_return_val_if_fail (b, 0);
-
- g_return_val_if_fail (a->collation_name, 0);
- g_return_val_if_fail (b->collation_name, 0);
-
- return strcmp (b->collation_name, a->collation_name);
-}
-
-/**
- * ole_dirent_new :
- * @ole :
- * @entry :
- * @parent : optional
- *
- * Parse dirent number @entry and recursively handle its siblings and children.
- **/
-static MSOleDirent *
-ole_dirent_new (GsfInfileMSOle *ole, guint32 entry, MSOleDirent *parent)
-{
- MSOleDirent *dirent;
- guint32 block, next, prev, child, size;
- guint8 const *data;
- guint8 type;
- guint16 name_len;
-
- if (entry >= DIRENT_MAGIC_END)
- return NULL;
-
- block = OLE_BIG_BLOCK (entry * DIRENT_SIZE, ole);
-
- g_return_val_if_fail (block < ole->bat.num_blocks, NULL);
- data = ole_get_block (ole, ole->bat.block [block], NULL);
- if (data == NULL)
- return NULL;
- data += (DIRENT_SIZE * entry) % ole->info->bb.size;
-
- type = GSF_LE_GET_GUINT8 (data + DIRENT_TYPE);
- if (type != DIRENT_TYPE_DIR &&
- type != DIRENT_TYPE_FILE &&
- type != DIRENT_TYPE_ROOTDIR) {
-#if 0
- g_warning ("Unknown stream type 0x%x", type);
-#endif
- return NULL;
- }
-
- /* It looks like directory (and root directory) sizes are sometimes
bogus */
- size = GSF_LE_GET_GUINT32 (data + DIRENT_FILE_SIZE);
- g_return_val_if_fail (type == DIRENT_TYPE_DIR || type ==
DIRENT_TYPE_ROOTDIR ||
- size <= (guint32)gsf_input_size(ole->input),
NULL);
-
- dirent = g_new0 (MSOleDirent, 1);
- dirent->index = entry;
- dirent->size = size;
- /* Store the class id which is 16 byte identifier used by some apps */
- memcpy(dirent->clsid, data + DIRENT_CLSID, sizeof(dirent->clsid));
-
- /* root dir is always big block */
- dirent->use_sb = parent && (size < ole->info->threshold);
- dirent->first_block = (GSF_LE_GET_GUINT32 (data + DIRENT_FIRSTBLOCK));
- dirent->is_directory = (type != DIRENT_TYPE_FILE);
- dirent->children = NULL;
- prev = GSF_LE_GET_GUINT32 (data + DIRENT_PREV);
- next = GSF_LE_GET_GUINT32 (data + DIRENT_NEXT);
- child = GSF_LE_GET_GUINT32 (data + DIRENT_CHILD);
- name_len = GSF_LE_GET_GUINT16 (data + DIRENT_NAME_LEN);
- dirent->name = NULL;
- if (0 < name_len && name_len <= DIRENT_MAX_NAME_SIZE) {
- gunichar2 uni_name [DIRENT_MAX_NAME_SIZE+1];
- gchar const *end;
- int i;
-
- /* address@hidden
- * Sometimes, rarely, people store the stream name as ascii
- * rather than utf16. Do a validation first just in case.
- */
- if (!g_utf8_validate ((const char*) data, -1, &end) ||
- ((guint8 const *)end - data + 1) != name_len) {
- /* be wary about endianness */
- for (i = 0 ; i < name_len ; i += 2)
- uni_name [i/2] = GSF_LE_GET_GUINT16 (data + i);
- uni_name [i/2] = 0;
-
- dirent->name = g_utf16_to_utf8 (uni_name, -1, NULL,
NULL, NULL);
- } else
- dirent->name = g_strndup ((gchar *)data,
(gsize)((guint8 const *)end - data + 1));
- }
- /* be really anal in the face of screwups */
- if (dirent->name == NULL)
- dirent->name = g_strdup ("");
- dirent->collation_name = g_utf8_collate_key (dirent->name, -1);
-
- if (parent != NULL)
- parent->children = g_list_insert_sorted (parent->children,
- dirent, (GCompareFunc)ole_dirent_cmp);
-
- /* NOTE : These links are a tree, not a linked list */
- if (prev != entry)
- ole_dirent_new (ole, prev, parent);
- if (next != entry)
- ole_dirent_new (ole, next, parent);
-
- if (dirent->is_directory)
- ole_dirent_new (ole, child, dirent);
- return dirent;
-}
-
-static void
-ole_dirent_free (MSOleDirent *dirent)
-{
- GList *tmp;
- g_return_if_fail (dirent != NULL);
-
- g_free (dirent->name);
- g_free (dirent->collation_name);
-
- for (tmp = dirent->children; tmp; tmp = tmp->next)
- ole_dirent_free ((MSOleDirent *)tmp->data);
- g_list_free (dirent->children);
- g_free (dirent);
-}
-
-/*****************************************************************************/
-
-static void
-ole_info_unref (MSOleInfo *info)
-{
- if (info->ref_count-- != 1)
- return;
-
- ols_bat_release (&info->bb.bat);
- ols_bat_release (&info->sb.bat);
- if (info->root_dir != NULL) {
- ole_dirent_free (info->root_dir);
- info->root_dir = NULL;
- }
- if (info->sb_file != NULL) {
- gsf_input_finalize(info->sb_file);
- info->sb_file = NULL;
- }
- g_free (info);
-}
-
-static MSOleInfo *
-ole_info_ref (MSOleInfo *info)
-{
- info->ref_count++;
- return info;
-}
-
-static void
-gsf_infile_msole_init (GsfInfileMSOle * ole)
-{
- ole->cur_offset = 0;
- ole->size = 0;
- ole->input = NULL;
- ole->info = NULL;
- ole->bat.block = NULL;
- ole->bat.num_blocks = 0;
- ole->cur_block = BAT_MAGIC_UNUSED;
- ole->stream.buf = NULL;
- ole->stream.buf_size = 0;
-}
-
-static void
-gsf_infile_msole_finalize (GsfInfileMSOle * ole)
-{
- if (ole->input != NULL) {
- gsf_input_finalize(ole->input);
- ole->input = NULL;
- }
- if (ole->info != NULL) {
- ole_info_unref (ole->info);
- ole->info = NULL;
- }
- ols_bat_release (&ole->bat);
-
- g_free (ole->stream.buf);
- free(ole);
-}
-
-/**
- * ole_dup :
- * @src :
- *
- * Utility routine to _partially_ replicate a file. It does NOT copy the bat
- * blocks, or init the dirent.
- *
- * Return value: the partial duplicate.
- **/
-static GsfInfileMSOle *
-ole_dup (GsfInfileMSOle const * src)
-{
- GsfInfileMSOle *dst;
- struct GsfInput *input;
-
- g_return_val_if_fail (src != NULL, NULL);
-
- dst = malloc(sizeof(GsfInfileMSOle));
- if (dst == NULL)
- return NULL;
- gsf_infile_msole_init(dst);
- input = gsf_input_dup (src->input);
- if (input == NULL) {
- gsf_infile_msole_finalize(dst);
- return NULL;
- }
- dst->input = input;
- dst->info = ole_info_ref (src->info);
-
- /* buf and buf_size are initialized to NULL */
-
- return dst;
-}
-
-/**
- * ole_init_info :
- * @ole :
- *
- * Read an OLE header and do some sanity checking
- * along the way.
- *
- * Return value: TRUE on error
- **/
-static gboolean
-ole_init_info (GsfInfileMSOle *ole)
-{
- static guint8 const signature[] =
- { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
- guint8 const *header, *tmp;
- guint32 *metabat = NULL;
- MSOleInfo *info;
- guint32 bb_shift, sb_shift, num_bat, num_metabat, last, dirent_start;
- guint32 metabat_block, *ptr;
-
- /* check the header */
- if (gsf_input_seek (ole->input, (off_t) 0, SEEK_SET) ||
- NULL == (header = gsf_input_read (ole->input, OLE_HEADER_SIZE,
NULL)) ||
- 0 != memcmp (header, signature, sizeof (signature))) {
- return TRUE;
- }
-
- bb_shift = GSF_LE_GET_GUINT16 (header + OLE_HEADER_BB_SHIFT);
- sb_shift = GSF_LE_GET_GUINT16 (header + OLE_HEADER_SB_SHIFT);
- num_bat = GSF_LE_GET_GUINT32 (header + OLE_HEADER_NUM_BAT);
- dirent_start = GSF_LE_GET_GUINT32 (header + OLE_HEADER_DIRENT_START);
- metabat_block = GSF_LE_GET_GUINT32 (header + OLE_HEADER_METABAT_BLOCK);
- num_metabat = GSF_LE_GET_GUINT32 (header + OLE_HEADER_NUM_METABAT);
-
- /* Some sanity checks
- * 1) There should always be at least 1 BAT block
- * 2) It makes no sense to have a block larger than 2^31 for now.
- * Maybe relax this later, but not much.
- */
- if (6 > bb_shift || bb_shift >= 31 || sb_shift > bb_shift) {
- return TRUE;
- }
-
- info = g_new0 (MSOleInfo, 1);
- ole->info = info;
-
- info->ref_count = 1;
- info->bb.shift = bb_shift;
- info->bb.size = 1 << info->bb.shift;
- info->bb.filter = info->bb.size - 1;
- info->sb.shift = sb_shift;
- info->sb.size = 1 << info->sb.shift;
- info->sb.filter = info->sb.size - 1;
- info->threshold = GSF_LE_GET_GUINT32 (header +
OLE_HEADER_THRESHOLD);
- info->sbat_start = GSF_LE_GET_GUINT32 (header +
OLE_HEADER_SBAT_START);
- info->num_sbat = GSF_LE_GET_GUINT32 (header +
OLE_HEADER_NUM_SBAT);
- info->max_block = (gsf_input_size (ole->input) - OLE_HEADER_SIZE)
/ info->bb.size;
- info->sb_file = NULL;
-
- if (info->num_sbat == 0 && info->sbat_start != BAT_MAGIC_END_OF_CHAIN) {
-#if 0
- g_warning ("There is are not supposed to be any blocks in the
small block allocation table, yet there is a link to some. Ignoring it.");
-#endif
- }
-
- /* very rough heuristic, just in case */
- if (num_bat < info->max_block) {
- info->bb.bat.num_blocks = num_bat * (info->bb.size /
BAT_INDEX_SIZE);
- info->bb.bat.block = g_new0 (guint32,
info->bb.bat.num_blocks);
-
- metabat = (guint32 *)g_alloca (MAX (info->bb.size,
OLE_HEADER_SIZE));
-
- /* Reading the elements invalidates this memory, make copy */
- gsf_ole_get_guint32s (metabat, header + OLE_HEADER_START_BAT,
- OLE_HEADER_SIZE - OLE_HEADER_START_BAT);
- last = num_bat;
- if (last > OLE_HEADER_METABAT_SIZE)
- last = OLE_HEADER_METABAT_SIZE;
-
- ptr = ole_info_read_metabat (ole, info->bb.bat.block,
- info->bb.bat.num_blocks, metabat, metabat + last);
- num_bat -= last;
- } else
- ptr = NULL;
-
- last = (info->bb.size - BAT_INDEX_SIZE) / BAT_INDEX_SIZE;
- while (ptr != NULL && num_metabat-- > 0) {
- tmp = ole_get_block (ole, metabat_block, NULL);
- if (tmp == NULL) {
- ptr = NULL;
- break;
- }
-
- /* Reading the elements invalidates this memory, make copy */
- gsf_ole_get_guint32s (metabat, tmp, (int)info->bb.size);
-
- if (num_metabat == 0) {
- if (last < num_bat) {
- /* there should be less that a full metabat
block
- * remaining */
- ptr = NULL;
- break;
- }
- last = num_bat;
- } else if (num_metabat > 0) {
- metabat_block = metabat[last];
- num_bat -= last;
- }
-
- ptr = ole_info_read_metabat (ole, ptr,
- info->bb.bat.num_blocks, metabat, metabat + last);
- }
-
- if (ptr == NULL) {
- return TRUE;
- }
-
- /* Read the directory's bat, we do not know the size */
- if (ole_make_bat (&info->bb.bat, 0, dirent_start, &ole->bat)) {
- return TRUE;
- }
-
- /* Read the directory */
- ole->dirent = info->root_dir = ole_dirent_new (ole, 0, NULL);
- if (ole->dirent == NULL) {
- return TRUE;
- }
-
- return FALSE;
-}
-
-static guint8 const *
-gsf_infile_msole_read (GsfInfileMSOle *ole, size_t num_bytes, guint8 *buffer)
-{
- off_t first_block, last_block, raw_block, offset, i;
- guint8 const *data;
- guint8 *ptr;
- size_t count;
-
- /* small block files are preload */
- if (ole->dirent != NULL && ole->dirent->use_sb) {
- if (buffer != NULL) {
- memcpy (buffer, ole->stream.buf + ole->cur_offset,
num_bytes);
- ole->cur_offset += num_bytes;
- return buffer;
- }
- data = ole->stream.buf + ole->cur_offset;
- ole->cur_offset += num_bytes;
- return data;
- }
-
- /* GsfInput guarantees that num_bytes > 0 */
- first_block = OLE_BIG_BLOCK (ole->cur_offset, ole);
- last_block = OLE_BIG_BLOCK (ole->cur_offset + num_bytes - 1, ole);
- offset = ole->cur_offset & ole->info->bb.filter;
-
- /* optimization : are all the raw blocks contiguous */
- i = first_block;
- raw_block = ole->bat.block [i];
- while (++i <= last_block && ++raw_block == ole->bat.block [i])
- ;
- if (i > last_block) {
- /* optimization don't seek if we don't need to */
- if (ole->cur_block != first_block) {
- if (gsf_input_seek (ole->input,
- (off_t)(MAX (OLE_HEADER_SIZE,
ole->info->bb.size) + (ole->bat.block [first_block] << ole->info->bb.shift) +
offset),
- SEEK_SET) < 0)
- return NULL;
- }
- ole->cur_block = last_block;
- return gsf_input_read (ole->input,
- num_bytes,
- (unsigned char*) buffer);
- }
-
- /* damn, we need to copy it block by block */
- if (buffer == NULL) {
- if (ole->stream.buf_size < num_bytes) {
- if (ole->stream.buf != NULL)
- g_free (ole->stream.buf);
- ole->stream.buf_size = num_bytes;
- ole->stream.buf = g_new (guint8, num_bytes);
- }
- buffer = ole->stream.buf;
- }
-
- ptr = buffer;
- for (i = first_block ; i <= last_block ; i++ , ptr += count, num_bytes
-= count) {
- count = ole->info->bb.size - offset;
- if (count > num_bytes)
- count = num_bytes;
- data = ole_get_block (ole, ole->bat.block [i], NULL);
- if (data == NULL)
- return NULL;
-
- /* TODO : this could be optimized to avoid the copy */
- memcpy (ptr, data + offset, count);
- offset = 0;
- }
- ole->cur_block = BAT_MAGIC_UNUSED;
- ole->cur_offset += num_bytes;
- return buffer;
-}
-
-static struct GsfInput *
-gsf_infile_msole_new_child (GsfInfileMSOle *parent,
- MSOleDirent *dirent)
-{
- GsfInfileMSOle * child;
- MSOleInfo *info;
- MSOleBAT const *metabat;
- struct GsfInput *sb_file = NULL;
- size_t size_guess;
- char * buf;
-
-
- if ( (dirent->index != 0) &&
- (dirent->is_directory) ) {
- /* be wary. It seems as if some implementations pretend that
the
- * directories contain data */
- return gsf_input_new((const unsigned char*) "",
- (off_t) 0,
- 0);
- }
- child = ole_dup (parent);
- if (child == NULL)
- return NULL;
- child->dirent = dirent;
- child->size = (off_t) dirent->size;
-
- info = parent->info;
-
- if (dirent->use_sb) { /* build the bat */
- metabat = &info->sb.bat;
- size_guess = dirent->size >> info->sb.shift;
- sb_file = ole_info_get_sb_file (parent);
- } else {
- metabat = &info->bb.bat;
- size_guess = dirent->size >> info->bb.shift;
- }
- if (ole_make_bat (metabat, size_guess + 1, dirent->first_block,
&child->bat)) {
- gsf_infile_msole_finalize(child);
- return NULL;
- }
-
- if (dirent->use_sb) {
- unsigned i;
- guint8 const *data;
-
- if (sb_file == NULL) {
- gsf_infile_msole_finalize(child);
- return NULL;
- }
-
- child->stream.buf_size = info->threshold;
- child->stream.buf = g_new (guint8, info->threshold);
-
- for (i = 0 ; i < child->bat.num_blocks; i++)
- if (gsf_input_seek (sb_file,
- (off_t)(child->bat.block [i] <<
info->sb.shift), SEEK_SET) < 0 ||
- (data = gsf_input_read (sb_file,
- info->sb.size,
- child->stream.buf + (i << info->sb.shift))) ==
NULL) {
- gsf_infile_msole_finalize(child);
- return NULL;
- }
- }
- buf = malloc(child->size);
- if (buf == NULL) {
- gsf_infile_msole_finalize(child);
- return NULL;
- }
- if (NULL == gsf_infile_msole_read(child,
- child->size,
- (guint8*) buf)) {
- gsf_infile_msole_finalize(child);
- return NULL;
- }
- gsf_infile_msole_finalize(child);
- return gsf_input_new((const unsigned char*) buf,
- (off_t) dirent->size,
- 1);
-}
-
-
-static struct GsfInput *
-gsf_infile_msole_child_by_index (GsfInfileMSOle * ole, int target)
-{
- GList *p;
-
- for (p = ole->dirent->children; p != NULL ; p = p->next)
- if (target-- <= 0)
- return gsf_infile_msole_new_child (ole,
- (MSOleDirent *)p->data);
- return NULL;
-}
-
-static char const *
-gsf_infile_msole_name_by_index (GsfInfileMSOle * ole, int target)
-{
- GList *p;
-
- for (p = ole->dirent->children; p != NULL ; p = p->next)
- if (target-- <= 0)
- return ((MSOleDirent *)p->data)->name;
- return NULL;
-}
-
-static int
-gsf_infile_msole_num_children (GsfInfileMSOle * ole)
-{
- g_return_val_if_fail (ole->dirent != NULL, -1);
-
- if (!ole->dirent->is_directory)
- return -1;
- return g_list_length (ole->dirent->children);
-}
-
-
-/**
- * gsf_infile_msole_new :
- * @source :
- *
- * Opens the root directory of an MS OLE file.
- * NOTE : adds a reference to @source
- *
- * Returns : the new ole file handler
- **/
-static GsfInfileMSOle *
-gsf_infile_msole_new (struct GsfInput *source)
-{
- GsfInfileMSOle * ole;
-
- ole = malloc(sizeof(GsfInfileMSOle));
- if (ole == NULL)
- return NULL;
- gsf_infile_msole_init(ole);
- ole->input = source;
- ole->size = (off_t) 0;
-
- if (ole_init_info (ole)) {
- gsf_infile_msole_finalize(ole);
- return NULL;
- }
-
- return ole;
-}
-
-
-
-
-
-
/* ******************************** main extraction code
************************ */
/* using libgobject, needs init! */
@@ -1240,21 +53,21 @@
addKeyword(EXTRACTOR_KeywordList *oldhead,
const char *phrase,
EXTRACTOR_KeywordType type) {
- EXTRACTOR_KeywordList * keyword;
-
- if (strlen(phrase) == 0)
- return oldhead;
- if (0 == strcmp(phrase, "\"\""))
- return oldhead;
- if (0 == strcmp(phrase, "\" \""))
- return oldhead;
- if (0 == strcmp(phrase, " "))
- return oldhead;
- keyword = (EXTRACTOR_KeywordList*) malloc(sizeof(EXTRACTOR_KeywordList));
- keyword->next = oldhead;
- keyword->keyword = strdup(phrase);
- keyword->keywordType = type;
- return keyword;
+ EXTRACTOR_KeywordList * keyword;
+
+ if (strlen(phrase) == 0)
+ return oldhead;
+ if (0 == strcmp(phrase, "\"\""))
+ return oldhead;
+ if (0 == strcmp(phrase, "\" \""))
+ return oldhead;
+ if (0 == strcmp(phrase, " "))
+ return oldhead;
+ keyword = malloc(sizeof(EXTRACTOR_KeywordList));
+ keyword->next = oldhead;
+ keyword->keyword = strdup(phrase);
+ keyword->keywordType = type;
+ return keyword;
}
@@ -1273,123 +86,7 @@
0x93, 0x97, 0x08, 0x00, 0x2b, 0x2c, 0xf9, 0xae
};
-typedef enum {
- GSF_MSOLE_META_DATA_COMPONENT,
- GSF_MSOLE_META_DATA_DOCUMENT,
- GSF_MSOLE_META_DATA_USER
-} GsfMSOleMetaDataType;
-
-typedef enum {
- LE_VT_EMPTY = 0,
- LE_VT_NULL = 1,
- LE_VT_I2 = 2,
- LE_VT_I4 = 3,
- LE_VT_R4 = 4,
- LE_VT_R8 = 5,
- LE_VT_CY = 6,
- LE_VT_DATE = 7,
- LE_VT_BSTR = 8,
- LE_VT_DISPATCH = 9,
- LE_VT_ERROR = 10,
- LE_VT_BOOL = 11,
- LE_VT_VARIANT = 12,
- LE_VT_UNKNOWN = 13,
- LE_VT_DECIMAL = 14,
- LE_VT_I1 = 16,
- LE_VT_UI1 = 17,
- LE_VT_UI2 = 18,
- LE_VT_UI4 = 19,
- LE_VT_I8 = 20,
- LE_VT_UI8 = 21,
- LE_VT_INT = 22,
- LE_VT_UINT = 23,
- LE_VT_VOID = 24,
- LE_VT_HRESULT = 25,
- LE_VT_PTR = 26,
- LE_VT_SAFEARRAY = 27,
- LE_VT_CARRAY = 28,
- LE_VT_USERDEFINED = 29,
- LE_VT_LPSTR = 30,
- LE_VT_LPWSTR = 31,
- LE_VT_FILETIME = 64,
- LE_VT_BLOB = 65,
- LE_VT_STREAM = 66,
- LE_VT_STORAGE = 67,
- LE_VT_STREAMED_OBJECT = 68,
- LE_VT_STORED_OBJECT = 69,
- LE_VT_BLOB_OBJECT = 70,
- LE_VT_CF = 71,
- LE_VT_CLSID = 72,
- LE_VT_VECTOR = 0x1000
-} GsfMSOleVariantType;
-
typedef struct {
- char const *name;
- guint32 id;
- GsfMSOleVariantType prefered_type;
-} GsfMSOleMetaDataPropMap;
-
-typedef struct {
- guint32 id;
- off_t offset;
-} GsfMSOleMetaDataProp;
-
-typedef struct {
- GsfMSOleMetaDataType type;
- off_t offset;
- guint32 size, num_props;
- GIConv iconv_handle;
- unsigned char_size;
- GHashTable *dict;
-} GsfMSOleMetaDataSection;
-
-static GsfMSOleMetaDataPropMap const document_props[] = {
- { "Category", 2, LE_VT_LPSTR },
- { "PresentationFormat", 3, LE_VT_LPSTR },
- { "NumBytes", 4, LE_VT_I4 },
- { "NumLines", 5, LE_VT_I4 },
- { "NumParagraphs", 6, LE_VT_I4 },
- { "NumSlides", 7, LE_VT_I4 },
- { "NumNotes", 8, LE_VT_I4 },
- { "NumHiddenSlides", 9, LE_VT_I4 },
- { "NumMMClips", 10, LE_VT_I4 },
- { "Scale", 11, LE_VT_BOOL },
- { "HeadingPairs", 12, LE_VT_VECTOR | LE_VT_VARIANT },
- { "DocumentParts", 13, LE_VT_VECTOR | LE_VT_LPSTR },
- { "Manager", 14, LE_VT_LPSTR },
- { "Company", 15, LE_VT_LPSTR },
- { "LinksDirty", 16, LE_VT_BOOL }
-};
-
-static GsfMSOleMetaDataPropMap const component_props[] = {
- { "Title", 2, LE_VT_LPSTR },
- { "Subject", 3, LE_VT_LPSTR },
- { "Author", 4, LE_VT_LPSTR },
- { "Keywords", 5, LE_VT_LPSTR },
- { "Comments", 6, LE_VT_LPSTR },
- { "Template", 7, LE_VT_LPSTR },
- { "LastSavedBy", 8, LE_VT_LPSTR },
- { "RevisionNumber", 9, LE_VT_LPSTR },
- { "TotalEditingTime", 10, LE_VT_FILETIME },
- { "LastPrinted", 11, LE_VT_FILETIME },
- { "CreateTime", 12, LE_VT_FILETIME },
- { "LastSavedTime", 13, LE_VT_FILETIME },
- { "NumPages", 14, LE_VT_I4 },
- { "NumWords", 15, LE_VT_I4 },
- { "NumCharacters", 16, LE_VT_I4 },
- { "Thumbnail", 17, LE_VT_CF },
- { "AppName", 18, LE_VT_LPSTR },
- { "Security", 19, LE_VT_I4 }
-};
-
-static GsfMSOleMetaDataPropMap const common_props[] = {
- { "Dictionary", 0, 0, /* magic */},
- { "CodePage", 1, LE_VT_UI2 },
- { "LOCALE_SYSTEM_DEFAULT", 0x80000000, LE_VT_UI4},
- { "CASE_SENSITIVE", 0x80000003, LE_VT_UI4},
-};
-
-typedef struct {
char * text;
EXTRACTOR_KeywordType type;
} Matches;
@@ -1398,8 +95,8 @@
{ "Title", EXTRACTOR_TITLE },
{ "PresentationFormat", EXTRACTOR_FORMAT },
{ "Category", EXTRACTOR_DESCRIPTION },
- { "Manager", EXTRACTOR_CREATED_FOR },
- { "Company", EXTRACTOR_ORGANIZATION },
+ { "Manager", EXTRACTOR_MANAGER },
+ { "Company", EXTRACTOR_COMPANY },
{ "Subject", EXTRACTOR_SUBJECT },
{ "Author", EXTRACTOR_AUTHOR },
{ "Keywords", EXTRACTOR_KEYWORDS },
@@ -1412,709 +109,98 @@
{ "NumBytes", EXTRACTOR_SIZE },
{ "CreatedTime", EXTRACTOR_CREATION_DATE },
{ "LastSavedTime" , EXTRACTOR_MODIFICATION_DATE },
+ { "gsf:company", EXTRACTOR_COMPANY },
+ /* { "gsf:security", EXTRACTOR_SECURITY }, */
+ { "gsf:character-count", EXTRACTOR_CHARACTER_COUNT },
+ { "gsf:page-count", EXTRACTOR_PAGE_COUNT },
+ { "gsf:line-count", EXTRACTOR_LINE_COUNT },
+ { "gsf:word-count", EXTRACTOR_WORD_COUNT },
+ { "gsf:paragraph-count", EXTRACTOR_PARAGRAPH_COUNT },
+ { "gsf:last-saved-by", EXTRACTOR_LAST_SAVED_BY },
+ /* { "gsf:scale", EXTRACTOR_SCALE }, // always "false"? */
+ { "gsf:manager", EXTRACTOR_MANAGER },
+ { "dc:title", EXTRACTOR_TITLE },
+ { "dc:creator", EXTRACTOR_CREATOR },
+ { "dc:date", EXTRACTOR_DATE },
+ { "dc:subject", EXTRACTOR_SUBJECT },
+ { "dc:keywords", EXTRACTOR_KEYWORDS },
+ { "dc:last-printed", EXTRACTOR_LAST_PRINTED },
+ { "dc:description", EXTRACTOR_DESCRIPTION },
+ { "meta:creation-date", EXTRACTOR_CREATION_DATE },
+ /* { "meta:editing-duration", EXTRACTOR_TOTAL_EDITING_TIME }, // encoding? */
+ { "meta:generator", EXTRACTOR_GENERATOR },
+ { "meta:template", EXTRACTOR_TEMPLATE },
+ /* { "meta:editing-cycles", EXTRACTOR_EDITING_CYCLES }, // usually "FALSE" */
+ /* { "msole:codepage", EXTRACTOR_CHARACTER_SET }, */
{ NULL, 0 },
};
+static void processMetadata(gpointer key,
+ gpointer value,
+ gpointer user_data) {
+ struct EXTRACTOR_Keywords ** pprev = user_data;
+ const char * type = key;
+ const GsfDocProp * prop = value;
+ const GValue * gval;
+ char * contents;
+ int pos;
-static char const *
-msole_prop_id_to_gsf (GsfMSOleMetaDataSection *section, guint32 id)
-{
- char const *res = NULL;
- GsfMSOleMetaDataPropMap const *map = NULL;
- unsigned i = 0;
-
- if (section->dict != NULL) {
- if (id & 0x1000000) {
- id &= ~0x1000000;
- d (printf ("LINKED "););
- }
-
- res = g_hash_table_lookup (section->dict, GINT_TO_POINTER (id));
-
- if (res != NULL) {
- d (printf (res););
- return res;
- }
+ if ( (key == NULL) ||
+ (value == NULL) )
+ return;
+ gval = gsf_doc_prop_get_val(prop);
+
+ if (G_VALUE_TYPE(gval) == G_TYPE_STRING) {
+ contents = strdup(g_value_get_string(gval));
+ } else {
+ /* convert other formats? */
+ contents = g_strdup_value_contents(gval);
}
-
- if (section->type == GSF_MSOLE_META_DATA_COMPONENT) {
- map = component_props;
- i = G_N_ELEMENTS (component_props);
- } else if (section->type == GSF_MSOLE_META_DATA_DOCUMENT) {
- map = document_props;
- i = G_N_ELEMENTS (document_props);
+ if ( (strlen(contents) > 0) &&
+ (contents[strlen(contents)-1] == '\n') )
+ contents[strlen(contents)-1] = '\0';
+ if (contents == NULL)
+ return;
+ pos = 0;
+ while (tmap[pos].text != NULL) {
+ if (0 == strcmp(tmap[pos].text,
+ type))
+ break;
+ pos++;
}
- while (i-- > 0)
- if (map[i].id == id) {
- d (printf (map[i].name););
- return map[i].name;
- }
-
- map = common_props;
- i = G_N_ELEMENTS (common_props);
- while (i-- > 0)
- if (map[i].id == id) {
- d (printf (map[i].name););
- return map[i].name;
- }
-
- d (printf ("_UNKNOWN_(0x%x %d)", id, id););
-
- return NULL;
-}
-
-static GValue *
-msole_prop_parse(GsfMSOleMetaDataSection *section,
- guint32 type,
- guint8 const **data,
- guint8 const *data_end)
-{
- GValue *res;
- char *str;
- guint32 len;
- gboolean const is_vector = type & LE_VT_VECTOR;
- GError * error;
-
- g_return_val_if_fail (!(type & (unsigned)(~0x1fff)), NULL); /* not valid in
a prop set */
-
- type &= 0xfff;
-
- if (is_vector) {
- unsigned i, n;
-
- g_return_val_if_fail (*data + 4 <= data_end, NULL);
-
- n = GSF_LE_GET_GUINT32 (*data);
- *data += 4;
-
- d (printf (" array with %d elem\n", n););
- for (i = 0 ; i < n ; i++) {
- GValue *v;
- d (printf ("\t[%d] ", i););
- v = msole_prop_parse (section, type, data, data_end);
- if (v) {
- /* FIXME: do something with it. */
- if (G_IS_VALUE (v))
- g_value_unset (v);
- g_free (v);
- }
- }
- return NULL;
- }
-
- res = g_new0 (GValue, 1);
- switch (type) {
- case LE_VT_EMPTY : d (puts ("VT_EMPTY"););
- /* value::unset == empty */
- break;
-
- case LE_VT_NULL : d (puts ("VT_NULL"););
- /* value::unset == null too :-) do we need to distinguish ? */
- break;
-
- case LE_VT_I2 : d (puts ("VT_I2"););
- g_return_val_if_fail (*data + 2 <= data_end, NULL);
- g_value_init (res, G_TYPE_INT);
- g_value_set_int (res, GSF_LE_GET_GINT16 (*data));
- *data += 2;
- break;
-
- case LE_VT_I4 : d (puts ("VT_I4"););
- g_return_val_if_fail (*data + 4 <= data_end, NULL);
- g_value_init (res, G_TYPE_INT);
- g_value_set_int (res, GSF_LE_GET_GINT32 (*data));
- *data += 4;
- break;
-
- case LE_VT_R4 : d (puts ("VT_R4"););
- g_return_val_if_fail (*data + 4 <= data_end, NULL);
- g_value_init (res, G_TYPE_FLOAT);
- g_value_set_float (res, GSF_LE_GET_FLOAT (*data));
- *data += 4;
- break;
-
- case LE_VT_R8 : d (puts ("VT_R8"););
- g_return_val_if_fail (*data + 8 <= data_end, NULL);
- g_value_init (res, G_TYPE_DOUBLE);
- g_value_set_double (res, GSF_LE_GET_DOUBLE (*data));
- *data += 8;
- break;
-
- case LE_VT_CY : d (puts ("VT_CY"););
- /* 8-byte two's complement integer (scaled by 10,000) */
- /* CHEAT : just store as an int64 for now */
- g_return_val_if_fail (*data + 8 <= data_end, NULL);
- g_value_init (res, G_TYPE_INT64);
- g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data));
- break;
-
- case LE_VT_DATE : d (puts ("VT_DATE"););
- break;
-
- case LE_VT_BSTR : d (puts ("VT_BSTR"););
- break;
-
- case LE_VT_DISPATCH : d (puts ("VT_DISPATCH"););
- break;
-
- case LE_VT_BOOL : d (puts ("VT_BOOL"););
- g_return_val_if_fail (*data + 1 <= data_end, NULL);
- g_value_init (res, G_TYPE_BOOLEAN);
- g_value_set_boolean (res, **data ? TRUE : FALSE);
- *data += 1;
- break;
-
- case LE_VT_VARIANT : d (printf ("VT_VARIANT containing a "););
- g_free (res);
- type = GSF_LE_GET_GUINT32 (*data);
- *data += 4;
- return msole_prop_parse (section, type, data, data_end);
-
- case LE_VT_UI1 : d (puts ("VT_UI1"););
- g_return_val_if_fail (*data + 1 <= data_end, NULL);
- g_value_init (res, G_TYPE_UCHAR);
- g_value_set_uchar (res, (guchar)(**data));
- *data += 1;
- break;
-
- case LE_VT_UI2 : d (puts ("VT_UI2"););
- g_return_val_if_fail (*data + 2 <= data_end, NULL);
- g_value_init (res, G_TYPE_UINT);
- g_value_set_uint (res, GSF_LE_GET_GUINT16 (*data));
- *data += 2;
- break;
-
- case LE_VT_UI4 : d (puts ("VT_UI4"););
- g_return_val_if_fail (*data + 4 <= data_end, NULL);
- g_value_init (res, G_TYPE_UINT);
- *data += 4;
- d (printf ("%u\n", GSF_LE_GET_GUINT32 (*data)););
- break;
-
- case LE_VT_I8 : d (puts ("VT_I8"););
- g_return_val_if_fail (*data + 8 <= data_end, NULL);
- g_value_init (res, G_TYPE_INT64);
- g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data));
- *data += 8;
- break;
-
- case LE_VT_UI8 : d (puts ("VT_UI8"););
- g_return_val_if_fail (*data + 8 <= data_end, NULL);
- g_value_init (res, G_TYPE_UINT64);
- g_value_set_uint64 (res, GSF_LE_GET_GUINT64 (*data));
- *data += 8;
- break;
-
- case LE_VT_LPSTR : d (puts ("VT_LPSTR"););
- /*
- * This is the representation of many strings. It is stored in
- * the same representation as VT_BSTR. Note that the serialized
- * representation of VP_LPSTR has a preceding byte count, whereas
- * the in-memory representation does not.
- */
- /* be anal and safe */
- g_return_val_if_fail (*data + 4 <= data_end, NULL);
-
- len = GSF_LE_GET_GUINT32 (*data);
-
- g_return_val_if_fail (len < 0x10000, NULL);
- g_return_val_if_fail (*data + 4 + len*section->char_size <= data_end,
NULL);
-
- error = NULL;
- d (gsf_mem_dump (*data + 4, len * section->char_size););
- str = g_convert_with_iconv ((char*) *data + 4,
- len * section->char_size,
- section->iconv_handle, NULL, NULL, &error);
-
- g_value_init (res, G_TYPE_STRING);
- if (NULL != str) {
- g_value_set_string (res, str);
- g_free (str);
- } else if (NULL != error) {
- g_warning ("error: %s", error->message);
- g_error_free (error);
- } else {
- // g_warning ("unknown error converting string property, using blank");
- }
- *data += 4 + len * section->char_size;
- break;
-
- case LE_VT_LPWSTR : d (puts ("VT_LPWSTR"););
- /*
- * A counted and null-terminated Unicode string; a DWORD character
- * count (where the count includes the terminating null) followed
- * by that many Unicode (16-bit) characters. Note that the count
- * is character count, not byte count.
- */
- /* be anal and safe */
- g_return_val_if_fail (*data + 4 <= data_end, NULL);
-
- len = GSF_LE_GET_GUINT32 (*data);
-
- g_return_val_if_fail (len < 0x10000, NULL);
- g_return_val_if_fail (*data + 4 + len <= data_end, NULL);
-
- error = NULL;
- d (gsf_mem_dump (*data + 4, len*2););
- str = g_convert ((char*) *data + 4,
- len*2,
- "UTF-8",
- "UTF-16LE",
- NULL,
- NULL,
- &error);
-
- g_value_init (res, G_TYPE_STRING);
- if (NULL != str) {
- g_value_set_string (res, str);
- g_free (str);
- } else if (NULL != error) {
- g_warning ("error: %s", error->message);
- g_error_free (error);
- } else {
- g_warning ("unknown error converting string property, using blank");
- }
- *data += 4 + len*2;
- break;
-
- case LE_VT_FILETIME : d (puts ("VT_FILETIME"););
-
- g_return_val_if_fail (*data + 8 <= data_end, NULL);
-
- g_value_init (res, G_TYPE_STRING);
- {
- /* ft * 100ns since Jan 1 1601 */
- guint64 ft = GSF_LE_GET_GUINT64 (*data);
-
- ft /= 10000000; /* convert to seconds */
-#ifdef _MSC_VER
- ft -= 11644473600i64; /* move to Jan 1 1970 */
-#else
- ft -= 11644473600ULL; /* move to Jan 1 1970 */
+ if (tmap[pos].text != NULL)
+ *pprev = addKeyword(*pprev,
+ contents,
+ tmap[pos].type);
+#if DEBUG_OLE2
+ else
+ printf("No match for type `%s'\n",
+ type);
#endif
-
- str = g_strdup(ctime((time_t*)&ft));
-
- g_value_set_string (res, str);
-
- *data += 8;
- break;
- }
- case LE_VT_BLOB : d (puts ("VT_BLOB"););
- g_free (res);
- res = NULL;
- break;
- case LE_VT_STREAM : d (puts ("VT_STREAM"););
- g_free (res);
- res = NULL;
- break;
- case LE_VT_STORAGE : d (puts ("VT_STORAGE"););
- g_free (res);
- res = NULL;
- break;
- case LE_VT_STREAMED_OBJECT: d (puts ("VT_STREAMED_OBJECT"););
- g_free (res);
- res = NULL;
- break;
- case LE_VT_STORED_OBJECT : d (puts ("VT_STORED_OBJECT"););
- g_free (res);
- res = NULL;
- break;
- case LE_VT_BLOB_OBJECT : d (puts ("VT_BLOB_OBJECT"););
- g_free (res);
- res = NULL;
- break;
- case LE_VT_CF : d (puts ("VT_CF"););
- break;
- case LE_VT_CLSID : d (puts ("VT_CLSID"););
- *data += 16;
- g_free (res);
- res = NULL;
- break;
-
- case LE_VT_ERROR :
- case LE_VT_UNKNOWN :
- case LE_VT_DECIMAL :
- case LE_VT_I1 :
- case LE_VT_INT :
- case LE_VT_UINT :
- case LE_VT_VOID :
- case LE_VT_HRESULT :
- case LE_VT_PTR :
- case LE_VT_SAFEARRAY :
- case LE_VT_CARRAY :
- case LE_VT_USERDEFINED :
- warning ("type %d (0x%x) is not permitted in property sets",
- type, type);
- g_free (res);
- res = NULL;
- break;
-
- default :
- warning ("Unknown property type %d (0x%x)", type, type);
- g_free (res);
- res = NULL;
- };
-
- d ( if (res != NULL && G_IS_VALUE (res)) {
- char *val = g_strdup_value_contents (res);
- d(printf ("%s\n", val););
- g_free (val);
- } else
- puts ("<unparsed>\n");
- );
- return res;
+ free(contents);
}
-static GValue *
-msole_prop_read (struct GsfInput *in,
- GsfMSOleMetaDataSection *section,
- GsfMSOleMetaDataProp *props,
- unsigned i)
-{
- guint32 type;
- guint8 const *data;
- /* TODO : why size-4 ? I must be missing something */
- off_t size = ((i+1) >= section->num_props)
- ? section->size-4 : props[i+1].offset;
- char const *prop_name;
- g_return_val_if_fail (i < section->num_props, NULL);
- g_return_val_if_fail (size >= props[i].offset + 4, NULL);
+static struct EXTRACTOR_Keywords *
+process(GsfInput * in,
+ struct EXTRACTOR_Keywords * prev) {
+ GsfDocMetaData * sections;
+ GError * error;
- size -= props[i].offset; /* includes the type id */
- if (gsf_input_seek (in, section->offset+props[i].offset, SEEK_SET) ||
- NULL == (data = gsf_input_read (in, size, NULL))) {
- warning ("failed to read prop #%d", i);
- return NULL;
+ sections = gsf_doc_meta_data_new();
+ error = gsf_msole_metadata_read(in, sections);
+ if (error == NULL) {
+ gsf_doc_meta_data_foreach(sections,
+ &processMetadata,
+ &prev);
}
-
- type = GSF_LE_GET_GUINT32 (data);
- data += 4;
-
- /* dictionary is magic */
- if (props[i].id == 0) {
- guint32 len, id, i, n;
- gsize gslen;
- char *name;
- guint8 const *start = data;
-
- g_return_val_if_fail (section->dict == NULL, NULL);
-
- section->dict = g_hash_table_new_full (
- g_direct_hash, g_direct_equal,
- NULL, g_free);
-
- n = type;
- for (i = 0 ; i < n ; i++) {
- id = GSF_LE_GET_GUINT32 (data);
- len = GSF_LE_GET_GUINT32 (data + 4);
-
- g_return_val_if_fail (len < 0x10000, NULL);
-
- gslen = 0;
- name = g_convert_with_iconv ((char*) data + 8,
- len * section->char_size,
- section->iconv_handle, &gslen, NULL, NULL);
-
- len = (guint32)gslen;
- data += 8 + len;
-
- d (printf ("\t%u == %s\n", id, name););
- g_hash_table_replace (section->dict,
- GINT_TO_POINTER (id), name);
-
- /* MS documentation blows goats !
- * The docs claim there are padding bytes in the dictionary.
- * Their examples show padding bytes.
- * In reality non-unicode strings do not see to have padding.
- */
- if (section->char_size != 1 && (data - start) % 4)
- data += 4 - ((data - start) % 4);
- }
-
- return NULL;
- }
-
- d (printf ("%u) ", i););
- prop_name = msole_prop_id_to_gsf (section, props[i].id);
-
- d (printf (" @ %x %x = ", (unsigned)props[i].offset, (unsigned)size););
- return msole_prop_parse (section, type, &data, data + size);
-}
-
-static int
-msole_prop_cmp (gconstpointer a, gconstpointer b)
-{
- GsfMSOleMetaDataProp const *prop_a = a ;
- GsfMSOleMetaDataProp const *prop_b = b ;
- return prop_a->offset - prop_b->offset;
-}
-
-/**
- * gsf_msole_iconv_open_codepage_for_import :
- * @to:
- * @codepage :
- *
- * Returns an iconv converter for @codepage -> utf8.
- **/
-static GIConv
-gsf_msole_iconv_open_codepage_for_import(char const *to,
- int codepage)
-{
- GIConv iconv_handle;
-
- g_return_val_if_fail (to != NULL, (GIConv)(-1));
- /* sometimes it is stored as signed short */
- if (codepage == 65001 || codepage == -535) {
- iconv_handle = g_iconv_open (to, "UTF-8");
- if (iconv_handle != (GIConv)(-1))
- return iconv_handle;
- } else if (codepage != 1200 && codepage != 1201) {
- char* src_charset = g_strdup_printf ("CP%d", codepage);
- iconv_handle = g_iconv_open (to, src_charset);
- g_free (src_charset);
- if (iconv_handle != (GIConv)(-1))
- return iconv_handle;
- } else {
- char const *from = (codepage == 1200) ? "UTF-16LE" : "UTF-16BE";
- iconv_handle = g_iconv_open (to, from);
- if (iconv_handle != (GIConv)(-1))
- return iconv_handle;
- }
-
- /* Try aliases. */
- if (codepage == 10000) {
- /* gnu iconv. */
- iconv_handle = g_iconv_open (to, "MACROMAN");
- if (iconv_handle != (GIConv)(-1))
- return iconv_handle;
-
- /* glibc. */
- iconv_handle = g_iconv_open (to, "MACINTOSH");
- if (iconv_handle != (GIConv)(-1))
- return iconv_handle;
- }
-
- warning ("Unable to open an iconv handle from codepage %d -> %s",
- codepage, to);
- return (GIConv)(-1);
-}
-
-/**
- * gsf_msole_iconv_open_for_import :
- * @codepage :
- *
- * Returns an iconv converter for single byte encodings @codepage -> utf8.
- * Attempt to handle the semantics of a specification for multibyte
encodings
- * since this is only supposed to be used for single bytes.
- **/
-static GIConv
-gsf_msole_iconv_open_for_import (int codepage)
-{
- return gsf_msole_iconv_open_codepage_for_import ("UTF-8", codepage);
-}
-
-
-
-
-
-static struct EXTRACTOR_Keywords * process(struct GsfInput * in,
- struct EXTRACTOR_Keywords * prev) {
- guint8 const *data = gsf_input_read (in, 28, NULL);
- guint16 version;
- guint32 os, num_sections;
- unsigned i, j;
- GsfMSOleMetaDataSection *sections;
- GsfMSOleMetaDataProp *props;
-
- if (NULL == data)
- return prev;
-
- /* NOTE : high word is the os, low word is the os version
- * 0 = win16
- * 1 = mac
- * 2 = win32
- */
- os = GSF_LE_GET_GUINT16 (data + 6);
-
- version = GSF_LE_GET_GUINT16 (data + 2);
-
- num_sections = GSF_LE_GET_GUINT32 (data + 24);
- if (GSF_LE_GET_GUINT16 (data + 0) != 0xfffe
- || (version != 0 && version != 1)
- || os > 2
- || num_sections > 100) { /* arbitrary sanity check */
- return prev;
- }
-
- /* extract the section info */
- sections = (GsfMSOleMetaDataSection *)g_alloca (sizeof
(GsfMSOleMetaDataSection)* num_sections);
- for (i = 0 ; i < num_sections ; i++) {
- data = gsf_input_read (in, 20, NULL);
- if (NULL == data) {
- return prev;
- }
- if (!memcmp (data, component_guid, sizeof (component_guid)))
- sections [i].type = GSF_MSOLE_META_DATA_COMPONENT;
- else if (!memcmp (data, document_guid, sizeof (document_guid)))
- sections [i].type = GSF_MSOLE_META_DATA_DOCUMENT;
- else if (!memcmp (data, user_guid, sizeof (user_guid)))
- sections [i].type = GSF_MSOLE_META_DATA_USER;
- else {
- sections [i].type = GSF_MSOLE_META_DATA_USER;
- warning ("Unknown property section type, treating it as USER");
- }
-
- sections [i].offset = GSF_LE_GET_GUINT32 (data + 16);
-#ifndef NO_DEBUG_OLE_PROPS
- d(printf ("0x%x\n", (guint32)sections [i].offset););
-#endif
- }
- for (i = 0 ; i < num_sections ; i++) {
- if (gsf_input_seek (in, sections[i].offset, SEEK_SET) ||
- NULL == (data = gsf_input_read (in, 8, NULL))) {
- return prev;
- }
-
- sections[i].iconv_handle = (GIConv)-1;
- sections[i].char_size = 1;
- sections[i].dict = NULL;
- sections[i].size = GSF_LE_GET_GUINT32 (data); /* includes header */
- sections[i].num_props = GSF_LE_GET_GUINT32 (data + 4);
- if (sections[i].num_props <= 0)
- continue;
- props = g_new (GsfMSOleMetaDataProp, sections[i].num_props);
- for (j = 0; j < sections[i].num_props; j++) {
- if (NULL == (data = gsf_input_read (in, 8, NULL))) {
- g_free (props);
- return prev;
- }
-
- props [j].id = GSF_LE_GET_GUINT32 (data);
- props [j].offset = GSF_LE_GET_GUINT32 (data + 4);
- }
-
- /* order prop info by offset to facilitate bounds checking */
- qsort (props, sections[i].num_props,
- sizeof (GsfMSOleMetaDataProp),
- msole_prop_cmp);
-
- sections[i].iconv_handle = (GIConv)-1;
- sections[i].char_size = 1;
- for (j = 0; j < sections[i].num_props; j++) /* first codepage */
- if (props[j].id == 1) {
- GValue *v = msole_prop_read (in, sections+i, props, j);
- if (v != NULL) {
- if (G_IS_VALUE (v)) {
- if (G_VALUE_HOLDS_INT (v)) {
- int codepage = g_value_get_int (v);
- sections[i].iconv_handle = gsf_msole_iconv_open_for_import
(codepage);
- if (codepage == 1200 || codepage == 1201)
- sections[i].char_size = 2;
- }
- g_value_unset (v);
- }
- g_free (v) ;
- }
- }
- if (sections[i].iconv_handle == (GIConv)-1)
- sections[i].iconv_handle = gsf_msole_iconv_open_for_import (1252);
-
- for (j = 0; j < sections[i].num_props; j++) /* then dictionary */
- if (props[j].id == 0) {
- GValue *v = msole_prop_read (in, sections+i, props, j);
- if (v) {
- if (G_VALUE_TYPE(v) == G_TYPE_STRING) {
- gchar * contents = g_strdup_value_contents(v);
- free(contents);
- } else {
-
- /* FIXME: do something with non-strings... */
- }
- if (G_IS_VALUE (v))
- g_value_unset (v);
- g_free (v);
- }
- }
- for (j = 0; j < sections[i].num_props; j++) /* the rest */
- if (props[j].id > 1) {
- GValue *v = msole_prop_read (in, sections+i, props, j);
- if (v && G_IS_VALUE(v)) {
- gchar * contents = NULL;
- int pc;
- int ipc;
-
- if (G_VALUE_TYPE(v) == G_TYPE_STRING) {
- contents = strdup(g_value_get_string(v));
- } else {
- /* convert other formats? */
- contents = g_strdup_value_contents(v);
- }
- pc = 0;
- if (contents != NULL) {
- for (ipc=strlen(contents)-1;ipc>=0;ipc--)
- if ( (isprint(contents[ipc])) &&
- (! isspace(contents[ipc])) )
- pc++;
- if ( (strlen(contents) > 0) &&
- (contents[strlen(contents)-1] == '\n') )
- contents[strlen(contents)-1] = '\0';
- }
- if (pc > 0) {
- int pos = 0;
- const char * prop
- = msole_prop_id_to_gsf(sections+i, props[j].id);
- if (prop != NULL) {
- while (tmap[pos].text != NULL) {
- if (0 == strcmp(tmap[pos].text,
- prop))
- break;
- pos++;
- }
- if (tmap[pos].text != NULL)
- prev = addKeyword(prev,
- contents,
- tmap[pos].type);
- }
- }
- if (contents != NULL)
- free(contents);
- }
- if (v) {
- if (G_IS_VALUE (v))
- g_value_unset (v);
- g_free (v);
- }
- }
-
- gsf_iconv_close (sections[i].iconv_handle);
- g_free (props);
- if (sections[i].dict != NULL)
- g_hash_table_destroy (sections[i].dict);
- }
- switch (os) {
- case 0:
- prev = addKeyword(prev,
- "Win16",
- EXTRACTOR_OS);
- break;
- case 1:
- prev = addKeyword(prev,
- "MacOS",
- EXTRACTOR_OS);
- break;
- case 2:
- prev = addKeyword(prev,
- "Win32",
- EXTRACTOR_OS);
- break;
- }
+ g_object_unref(G_OBJECT(sections));
return prev;
}
-static struct EXTRACTOR_Keywords * processSO(struct GsfInput * src,
- struct EXTRACTOR_Keywords * prev) {
+static struct EXTRACTOR_Keywords *
+processSO(GsfInput * src,
+ struct EXTRACTOR_Keywords * prev) {
off_t size;
char * buf;
@@ -2161,61 +247,290 @@
return prev;
}
+/* *************** wordleaker stuff *************** */
+
+#define __(a) dgettext("iso-639", a)
+
+static const char * lidToLanguage( unsigned int lid ) {
+ switch ( lid ) {
+ case 0x0400:
+ return _("No Proofing");
+ case 0x0401:
+ return __("Arabic");
+ case 0x0402:
+ return __("Bulgarian");
+ case 0x0403:
+ return __("Catalan");
+ case 0x0404:
+ return _("Traditional Chinese");
+ case 0x0804:
+ return _("Simplified Chinese");
+ case 0x0405:
+ return __("Chechen");
+ case 0x0406:
+ return __("Danish");
+ case 0x0407:
+ return __("German");
+ case 0x0807:
+ return _("Swiss German");
+ case 0x0408:
+ return __("Greek");
+ case 0x0409:
+ return _("U.S. English");
+ case 0x0809:
+ return _("U.K. English");
+ case 0x0c09:
+ return _("Australian English");
+ case 0x040a:
+ return _("Castilian Spanish");
+ case 0x080a:
+ return _("Mexican Spanish");
+ case 0x040b:
+ return __("Finnish");
+ case 0x040c:
+ return __("French");
+ case 0x080c:
+ return _("Belgian French");
+ case 0x0c0c:
+ return _("Canadian French");
+ case 0x100c:
+ return _("Swiss French");
+ case 0x040d:
+ return __("Hebrew");
+ case 0x040e:
+ return __("Hungarian");
+ case 0x040f:
+ return __("Icelandic");
+ case 0x0410:
+ return __("Italian");
+ case 0x0810:
+ return _("Swiss Italian");
+ case 0x0411:
+ return __("Japanese");
+ case 0x0412:
+ return __("Korean");
+ case 0x0413:
+ return __("Dutch");
+ case 0x0813:
+ return _("Belgian Dutch");
+ case 0x0414:
+ return _("Norwegian Bokmal");
+ case 0x0814:
+ return __("Norwegian Nynorsk");
+ case 0x0415:
+ return __("Polish");
+ case 0x0416:
+ return __("Brazilian Portuguese");
+ case 0x0816:
+ return __("Portuguese");
+ case 0x0417:
+ return _("Rhaeto-Romanic");
+ case 0x0418:
+ return __("Romanian");
+ case 0x0419:
+ return __("Russian");
+ case 0x041a:
+ return _("Croato-Serbian (Latin)");
+ case 0x081a:
+ return _("Serbo-Croatian (Cyrillic)");
+ case 0x041b:
+ return __("Slovak");
+ case 0x041c:
+ return __("Albanian");
+ case 0x041d:
+ return __("Swedish");
+ case 0x041e:
+ return __("Thai");
+ case 0x041f:
+ return __("Turkish");
+ case 0x0420:
+ return __("Urdu");
+ case 0x0421:
+ return __("Bahasa");
+ case 0x0422:
+ return __("Ukrainian");
+ case 0x0423:
+ return __("Byelorussian");
+ case 0x0424:
+ return __("Slovenian");
+ case 0x0425:
+ return __("Estonian");
+ case 0x0426:
+ return __("Latvian");
+ case 0x0427:
+ return __("Lithuanian");
+ case 0x0429:
+ return _("Farsi");
+ case 0x042D:
+ return __("Basque");
+ case 0x042F:
+ return __("Macedonian");
+ case 0x0436:
+ return __("Afrikaans");
+ case 0x043E:
+ return __("Malayalam");
+ default:
+ return NULL;
+ }
+}
+
+
+static struct EXTRACTOR_Keywords *
+history_extract(GsfInput * stream,
+ unsigned int lcbSttbSavedBy,
+ unsigned int fcSttbSavedBy,
+ struct EXTRACTOR_Keywords * prev) {
+ unsigned int where = 0;
+ unsigned char * lbuffer;
+ unsigned int i;
+ unsigned int length;
+ char * author;
+ char * filename;
+ char * rbuf;
+ unsigned int nRev;
+
+ // goto offset of revision
+ gsf_input_seek(stream, fcSttbSavedBy, G_SEEK_SET);
+ if (gsf_input_remaining(stream) < lcbSttbSavedBy)
+ return prev;
+ lbuffer = malloc(lcbSttbSavedBy);
+ // read all the revision history
+ gsf_input_read(stream, lcbSttbSavedBy, lbuffer);
+ // there are n strings, so n/2 revisions (author & file)
+ nRev = (lbuffer[2] + (lbuffer[3] << 8)) / 2;
+ where = 6;
+ for (i=0; i < nRev; i++) {
+ if (where >= lcbSttbSavedBy)
+ break;
+ length = lbuffer[where++];
+ if ( (where + 2 * length + 2 >= lcbSttbSavedBy) ||
+ (where + 2 * length + 2 <= where) )
+ break;
+ author = convertToUtf8((const char*) &lbuffer[where],
+ length * 2,
+ "UTF-16BE");
+ where += length * 2 + 1;
+ length = lbuffer[where++];
+ if ( (where + 2 * length >= lcbSttbSavedBy) ||
+ (where + 2 * length + 1 <= where) )
+ break;
+ filename = convertToUtf8((const char*) &lbuffer[where],
+ length * 2,
+ "UTF-16BE");
+ where += length * 2 + 1;
+ rbuf = malloc(strlen(author) + strlen(filename) + 512);
+ snprintf(rbuf, 512 + strlen(author) + strlen(filename),
+ _("Revision #%u: Author '%s' worked on '%s'"),
+ i, author, filename);
+ free(author);
+ free(filename);
+ prev = addKeyword(prev,
+ rbuf,
+ EXTRACTOR_REVISION_HISTORY);
+ free(rbuf);
+ }
+ free(lbuffer);
+ return prev;
+}
+
+
+/* ************** main method *********** */
+
struct EXTRACTOR_Keywords *
libextractor_ole2_extract(const char * filename,
const char * data,
size_t size,
struct EXTRACTOR_Keywords * prev) {
- struct GsfInput *input;
- struct GsfInfileMSOle * infile;
- struct GsfInput * src;
+ GsfInput * input;
+ GsfInfile * infile;
+ GsfInput * src;
+ GError * err = NULL;
const char * name;
- const char * software = 0;
+ const char * software = NULL;
int i;
+ unsigned int lcb;
+ unsigned int fcb;
+ const unsigned char * data512;
+ unsigned int lid;
+ const char * lang;
- input = gsf_input_new((const unsigned char*) data,
- (off_t) size,
- 0);
+ if (size < 512 + 898)
+ return prev; /* can hardly be OLE2 */
+ input = gsf_input_memory_new((const guint8 *) data,
+ (gsf_off_t) size,
+ FALSE);
if (input == NULL)
return prev;
- infile = gsf_infile_msole_new(input);
- if (infile == NULL)
+ infile = gsf_infile_msole_new(input, &err);
+ if (infile == NULL) {
+ g_object_unref(G_OBJECT(input));
return prev;
-
- for (i=0;i<gsf_infile_msole_num_children(infile);i++) {
- name = gsf_infile_msole_name_by_index (infile, i);
+ }
+ lcb = 0;
+ fcb = 0;
+ for (i=0;i<gsf_infile_num_children(infile);i++) {
+ name = gsf_infile_name_by_index (infile, i);
src = NULL;
if (name == NULL)
continue;
if ( (0 == strcmp(name, "\005SummaryInformation"))
|| (0 == strcmp(name, "\005DocumentSummaryInformation")) ) {
- src = gsf_infile_msole_child_by_index (infile, i);
- if (src != NULL)
+ src = gsf_infile_child_by_index (infile, i);
+ if (src != NULL)
prev = process(src,
prev);
}
if (0 == strcmp(name, "SfxDocumentInfo")) {
- src = gsf_infile_msole_child_by_index (infile, i);
+ src = gsf_infile_child_by_index (infile, i);
if (src != NULL)
prev = processSO(src,
prev);
}
if (src != NULL)
- gsf_input_finalize(src);
+ g_object_unref(G_OBJECT(src));
}
- gsf_infile_msole_finalize(infile);
+ data512 = (const unsigned char*) &data[512];
+ lid = data512[6] + (data512[7] << 8);
+ lcb = data512[726] + (data512[727] << 8) + (data512[728] << 16) +
(data512[729] << 24);
+ fcb = data512[722] + (data512[723] << 8) + (data512[724] << 16) +
(data512[725] << 24);
+ lang = lidToLanguage(lid);
+ if (lang != NULL) {
+ prev = addKeyword(prev,
+ lang,
+ EXTRACTOR_LANGUAGE);
+ }
+ if (lcb >= 6) {
+ for (i=0;i<gsf_infile_num_children(infile);i++) {
+ name = gsf_infile_name_by_index (infile, i);
+ if (name == NULL)
+ continue;
+ if ( (0 == strcmp(name, "1Table")) ||
+ (0 == strcmp(name, "0Table")) ) {
+ src = gsf_infile_child_by_index (infile, i);
+ if (src != NULL) {
+ prev = history_extract(src,
+ lcb,
+ fcb,
+ prev);
+ g_object_unref(G_OBJECT(src));
+ }
+ }
+ }
+ }
+ g_object_unref(G_OBJECT(infile));
+
/*
* Hack to return an appropriate mimetype
*/
software = EXTRACTOR_extractLast(EXTRACTOR_SOFTWARE, prev);
- if(NULL == software) {
+ if (NULL == software) {
/*
* when very puzzled, just look at file magic number
*/
- if( (8 < size)
- && (0 == memcmp(data, "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", 8)) )
+ if ( (8 < size)
+ && (0 == memcmp(data, "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", 8)) )
software = "Microsoft Office";
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r2684 - in Extractor: . m4 src/include src/main src/plugins src/plugins/hash src/plugins/ole2,
grothoff <=