gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r743 - Extractor/src/main


From: grothoff
Subject: [GNUnet-SVN] r743 - Extractor/src/main
Date: Fri, 6 May 2005 04:17:52 -0700 (PDT)

Author: grothoff
Date: 2005-05-06 04:17:48 -0700 (Fri, 06 May 2005)
New Revision: 743

Modified:
   Extractor/src/main/Extractor.py
   Extractor/src/main/libextractor_python.c
Log:
load-unload

Modified: Extractor/src/main/Extractor.py
===================================================================
--- Extractor/src/main/Extractor.py     2005-05-06 10:39:09 UTC (rev 742)
+++ Extractor/src/main/Extractor.py     2005-05-06 11:17:48 UTC (rev 743)
@@ -1,6 +1,31 @@
 """Extractor.py
 
-Modul docstring...
+     This file is part of libextractor.
+     (C) 2002, 2003, 2004, 2005 Vidyut Samanta and Christian Grothoff
+
+     libextractor is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 2, or (at your
+     option) any later version.
+
+     libextractor is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with libextractor; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
+
+libextractor is a simple library for keyword extraction.  libextractor
+does not support all formats but supports a simple plugging mechanism
+such that you can quickly add extractors for additional formats, even
+without recompiling libextractor. libextractor typically ships with a
+dozen helper-libraries that can be used to obtain keywords from common
+file-types.  
+
+libextractor is a part of the GNU project (http://www.gnu.org/).     
 """
 
 import _extractor
@@ -13,20 +38,58 @@
 
 class Extractor(object):
     """
+    Main class for extracting meta-data with GNU libextractor.
+
+    You may create multiple instances of Extractor to use
+    different sets of plugins.  Initially each Extractor
+    will start with the default set of plugins.
+
+    Use the extract method to obtain keywords from a file.
+
+    Use the load and unload methods to change the list of
+    plugins that should be used.
     """
     
     def __init__(self):
         self.__plugins = _extractor.loadDefaultLibraries()
     def __del__(self):
-        _extractor.removeAll(self.__plugins)
-#    def load(plugs):
-#        self.__plugins = _extractor.load(self.__plugins, plugs)
-#        return None
-#    def unload(plugs):
-#        self.__plugins = _extractor.unload(self.__plugins, plugs)
-#        return None
+        _extractor.removeAll(self.__plugins)        
+    def load(self,plugs):
+        """
+        Load certain plugins.  Invoke with a string with the names
+        of the plugins that should be loaded.  For example,
+        
+        'libextractor_filename:-libextractor_split'
+
+        will prepend the extractor that just adds the filename as a
+        keyword and append (runs last) the extractor that splits
+        keywords at whitespaces and punctuations.
+
+        No errors are reported if any of the listed plugins are not
+        found.
+        """
+        self.__plugins = _extractor.load(self.__plugins, plugs)
+        return None
+    def unload(self,plugs):
+        """
+        Unload a plugin.  Pass the name of the plugin that is to
+        be unloaded.  Only one plugin can be unloaded at a time.
+        For example,
+
+        'libextractor_pdf'
+
+        unloads the PDF extractor (if loaded).  No errors are
+        reported if no matching plugin is found.
+        """
+        self.__plugins = _extractor.unload(self.__plugins, plugs)
+        return None
     def extract(self,filename):
         """Pass a filename to extract keywords.
+
+        This function returns a list of Keyword objects.
+        If the file cannot be opened or cannot be found,
+        the list will be empty.  The list can also be empty
+        if no metadata was found for the file.
         """
         return _extractor.extract(self.__plugins, filename, Keyword)
 

Modified: Extractor/src/main/libextractor_python.c
===================================================================
--- Extractor/src/main/libextractor_python.c    2005-05-06 10:39:09 UTC (rev 
742)
+++ Extractor/src/main/libextractor_python.c    2005-05-06 11:17:48 UTC (rev 
743)
@@ -37,6 +37,42 @@
   return Py_None;
 }
 
+static PyObject * EXTRACTOR_PY_load(PyObject * self,
+                                   PyObject * args) {
+  PyObject * py_exts;
+  char * name;
+  EXTRACTOR_ExtractorList * plugins;
+
+  PyArg_ParseTuple(args, 
+                  "Os", 
+                  &py_exts,
+                  &name);
+
+  plugins = 
+    EXTRACTOR_loadConfigLibraries((EXTRACTOR_ExtractorList*) 
PyCObject_AsVoidPtr(py_exts),
+                                 name);
+  return PyCObject_FromVoidPtr(plugins, NULL);
+}
+
+
+static PyObject * EXTRACTOR_PY_unload(PyObject * self,
+                                     PyObject * args) {
+  PyObject * py_exts;
+  char * name;
+  EXTRACTOR_ExtractorList * plugins;
+
+  PyArg_ParseTuple(args, 
+                  "Os", 
+                  &py_exts,
+                  &name);
+
+  plugins = 
+    EXTRACTOR_removeLibrary((EXTRACTOR_ExtractorList*) 
PyCObject_AsVoidPtr(py_exts),
+                           name);
+  return PyCObject_FromVoidPtr(plugins, NULL);
+}
+
+
 static PyObject * EXTRACTOR_PY_getKeywordTypeAsString(PyObject * self,
                                                      PyObject * args) {
   unsigned int type;
@@ -101,6 +137,14 @@
     EXTRACTOR_PY_removeAll,  
     METH_VARARGS,
     "unload the given set of libextractor plugins (pass plugins as argument)" 
},
+  { "load", 
+    EXTRACTOR_PY_load,  
+    METH_VARARGS,
+    "load the given set of libextractor plugins (pass plugins names as 
argument)" },
+  { "unload", 
+    EXTRACTOR_PY_unload,  
+    METH_VARARGS,
+    "unload the given libextractor plugin (pass plugin name as argument)" },
   { "extract", 
     EXTRACTOR_PY_extract,  
     METH_VARARGS,





reply via email to

[Prev in Thread] Current Thread [Next in Thread]