[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r743 - Extractor/src/main
From: |
grothoff |
Subject: |
[GNUnet-SVN] r743 - Extractor/src/main |
Date: |
Fri, 6 May 2005 04:17:52 -0700 (PDT) |
Author: grothoff
Date: 2005-05-06 04:17:48 -0700 (Fri, 06 May 2005)
New Revision: 743
Modified:
Extractor/src/main/Extractor.py
Extractor/src/main/libextractor_python.c
Log:
load-unload
Modified: Extractor/src/main/Extractor.py
===================================================================
--- Extractor/src/main/Extractor.py 2005-05-06 10:39:09 UTC (rev 742)
+++ Extractor/src/main/Extractor.py 2005-05-06 11:17:48 UTC (rev 743)
@@ -1,6 +1,31 @@
"""Extractor.py
-Modul docstring...
+ This file is part of libextractor.
+ (C) 2002, 2003, 2004, 2005 Vidyut Samanta and Christian Grothoff
+
+ libextractor is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 2, or (at your
+ option) any later version.
+
+ libextractor is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libextractor; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+
+libextractor is a simple library for keyword extraction. libextractor
+does not support all formats but supports a simple plugging mechanism
+such that you can quickly add extractors for additional formats, even
+without recompiling libextractor. libextractor typically ships with a
+dozen helper-libraries that can be used to obtain keywords from common
+file-types.
+
+libextractor is a part of the GNU project (http://www.gnu.org/).
"""
import _extractor
@@ -13,20 +38,58 @@
class Extractor(object):
"""
+ Main class for extracting meta-data with GNU libextractor.
+
+ You may create multiple instances of Extractor to use
+ different sets of plugins. Initially each Extractor
+ will start with the default set of plugins.
+
+ Use the extract method to obtain keywords from a file.
+
+ Use the load and unload methods to change the list of
+ plugins that should be used.
"""
def __init__(self):
self.__plugins = _extractor.loadDefaultLibraries()
def __del__(self):
- _extractor.removeAll(self.__plugins)
-# def load(plugs):
-# self.__plugins = _extractor.load(self.__plugins, plugs)
-# return None
-# def unload(plugs):
-# self.__plugins = _extractor.unload(self.__plugins, plugs)
-# return None
+ _extractor.removeAll(self.__plugins)
+ def load(self,plugs):
+ """
+ Load certain plugins. Invoke with a string with the names
+ of the plugins that should be loaded. For example,
+
+ 'libextractor_filename:-libextractor_split'
+
+ will prepend the extractor that just adds the filename as a
+ keyword and append (runs last) the extractor that splits
+ keywords at whitespaces and punctuations.
+
+ No errors are reported if any of the listed plugins are not
+ found.
+ """
+ self.__plugins = _extractor.load(self.__plugins, plugs)
+ return None
+ def unload(self,plugs):
+ """
+ Unload a plugin. Pass the name of the plugin that is to
+ be unloaded. Only one plugin can be unloaded at a time.
+ For example,
+
+ 'libextractor_pdf'
+
+ unloads the PDF extractor (if loaded). No errors are
+ reported if no matching plugin is found.
+ """
+ self.__plugins = _extractor.unload(self.__plugins, plugs)
+ return None
def extract(self,filename):
"""Pass a filename to extract keywords.
+
+ This function returns a list of Keyword objects.
+ If the file cannot be opened or cannot be found,
+ the list will be empty. The list can also be empty
+ if no metadata was found for the file.
"""
return _extractor.extract(self.__plugins, filename, Keyword)
Modified: Extractor/src/main/libextractor_python.c
===================================================================
--- Extractor/src/main/libextractor_python.c 2005-05-06 10:39:09 UTC (rev
742)
+++ Extractor/src/main/libextractor_python.c 2005-05-06 11:17:48 UTC (rev
743)
@@ -37,6 +37,42 @@
return Py_None;
}
+static PyObject * EXTRACTOR_PY_load(PyObject * self,
+ PyObject * args) {
+ PyObject * py_exts;
+ char * name;
+ EXTRACTOR_ExtractorList * plugins;
+
+ PyArg_ParseTuple(args,
+ "Os",
+ &py_exts,
+ &name);
+
+ plugins =
+ EXTRACTOR_loadConfigLibraries((EXTRACTOR_ExtractorList*)
PyCObject_AsVoidPtr(py_exts),
+ name);
+ return PyCObject_FromVoidPtr(plugins, NULL);
+}
+
+
+static PyObject * EXTRACTOR_PY_unload(PyObject * self,
+ PyObject * args) {
+ PyObject * py_exts;
+ char * name;
+ EXTRACTOR_ExtractorList * plugins;
+
+ PyArg_ParseTuple(args,
+ "Os",
+ &py_exts,
+ &name);
+
+ plugins =
+ EXTRACTOR_removeLibrary((EXTRACTOR_ExtractorList*)
PyCObject_AsVoidPtr(py_exts),
+ name);
+ return PyCObject_FromVoidPtr(plugins, NULL);
+}
+
+
static PyObject * EXTRACTOR_PY_getKeywordTypeAsString(PyObject * self,
PyObject * args) {
unsigned int type;
@@ -101,6 +137,14 @@
EXTRACTOR_PY_removeAll,
METH_VARARGS,
"unload the given set of libextractor plugins (pass plugins as argument)"
},
+ { "load",
+ EXTRACTOR_PY_load,
+ METH_VARARGS,
+ "load the given set of libextractor plugins (pass plugins names as
argument)" },
+ { "unload",
+ EXTRACTOR_PY_unload,
+ METH_VARARGS,
+ "unload the given libextractor plugin (pass plugin name as argument)" },
{ "extract",
EXTRACTOR_PY_extract,
METH_VARARGS,
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r743 - Extractor/src/main,
grothoff <=