help-smalltalk
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Help-smalltalk] Regex sub -expressions


From: Mike Anderson
Subject: [Help-smalltalk] Regex sub -expressions
Date: Fri, 27 May 2005 07:44:58 +0000
User-agent: Mozilla Thunderbird 0.7.3 (X11/20040803)


For amusement, I was having a go at the Python Challenge (http://www.pythonchallenge.com/) in Smalltalk. That made me thing that our regex implementation really needs to be able to capture sub-expressions, so here is a patch that exposes them.

The reason for passing the C structure to Smalltalk is that it allows the user to access it directly as an optimization, and also to decide that they would prefer the results in something other than a Dictionary of Intervals.

Incidentally, the fact that both regex implementations are in the examples directory suggests that they are not 'official' packages. Could we not elevate one to 'official' status?

Regards to all,

Mike
Common subdirectories: smalltalk-2.1g-orig/examples/.deps and 
smalltalk-2.1g/examples/.deps
Common subdirectories: smalltalk-2.1g-orig/examples/.libs and 
smalltalk-2.1g/examples/.libs
diff -u smalltalk-2.1g-orig/examples/re.c smalltalk-2.1g/examples/re.c
--- smalltalk-2.1g-orig/examples/re.c   2003-09-04 05:48:58.000000000 +0000
+++ smalltalk-2.1g/examples/re.c        2005-05-27 07:22:26.586941880 +0000
@@ -74,6 +74,9 @@
 static int reh_search (OOP srcOOP, OOP patternOOP, int from, int to),
 reh_match (OOP srcOOP, OOP patternOOP, int from, int to);
 
+static struct pre_registers *reh_search_full (OOP srcOOP, OOP patternOOP, int 
from, int to);
+static void reh_free_registers(struct pre_registers *regs);
+
 static RegexCacheEntry cache[REGEX_CACHE_SIZE];
 
 /* Smalltalk globals */
@@ -252,6 +255,38 @@
   return res + 1;
 }
 
+/* Search helper function */
+struct pre_registers *
+reh_search_full (OOP srcOOP, OOP patternOOP, int from, int to)
+{
+  int res = 0;
+  const char *src;
+  struct pre_pattern_buffer *regex;
+  struct pre_registers *regs;
+  RegexCaching caching;
+
+  caching = lookupRegex (patternOOP, &regex);
+  if (caching != REGEX_CACHE_HIT && compileRegex (patternOOP, regex) != NULL)
+    return NULL;
+
+  /* now search */
+  src = &STRING_OOP_AT (OOP_TO_OBJ (srcOOP), 1);
+  regs = (struct pre_registers *) calloc (1, sizeof (struct pre_registers));
+  res = pre_search (regex, src, to, from - 1, to - from + 1, regs);
+
+  if (caching == REGEX_NOT_CACHED)
+    pre_free_pattern (regex);
+
+  return regs;
+}
+
+void 
+reh_free_registers(struct pre_registers *regs)
+{
+       pre_free_registers(regs);
+       free(regs);
+}
+
 /* Match helper function */
 int
 reh_match (OOP srcOOP, OOP patternOOP, int from, int to)
@@ -280,6 +315,8 @@
 {
   vmProxy = proxy;
   vmProxy->defineCFunc ("reh_search", reh_search);
+  vmProxy->defineCFunc ("reh_search_full", reh_search_full);
+  vmProxy->defineCFunc ("reh_free_registers", reh_free_registers);
   vmProxy->defineCFunc ("reh_match", reh_match);
   vmProxy->defineCFunc ("reh_make_cacheable", reh_make_cacheable);
 
Files smalltalk-2.1g-orig/examples/re.o and smalltalk-2.1g/examples/re.o differ
diff -u smalltalk-2.1g-orig/examples/regex.la smalltalk-2.1g/examples/regex.la
--- smalltalk-2.1g-orig/examples/regex.la       2004-11-02 21:07:15.000000000 
+0000
+++ smalltalk-2.1g/examples/regex.la    2005-05-27 07:22:30.489348624 +0000
@@ -32,4 +32,4 @@
 dlpreopen=''
 
 # Directory that this library needs to be installed in:
-libdir='/usr/local/lib/smalltalk'
+libdir='/usr/lib/smalltalk'
diff -u smalltalk-2.1g-orig/examples/regex.st smalltalk-2.1g/examples/regex.st
--- smalltalk-2.1g-orig/examples/regex.st       2003-09-04 05:48:58.000000000 
+0000
+++ smalltalk-2.1g/examples/regex.st    2005-05-27 07:25:22.609182432 +0000
@@ -68,7 +68,31 @@
        defineCFunc: 'reh_match'
        withSelectorArgs: 'lengthOfRegexMatch: pattern from: from to: to'
        returning: #int
-       args: #(#selfSmalltalk #smalltalk #int #int)!
+       args: #(#selfSmalltalk #smalltalk #int #int)
+!
+
+CStruct subclass: #CPreRegisters
+        declaration: #( (#allocated #int)
+                                               (#numRegs #int)
+                                               (#beg (#ptr #int))
+                                               (#end (#ptr #int)) )
+        classVariableNames: ''
+        poolDictionaries: ''
+        category: 'Regex'
+!
+
+String
+       defineCFunc: 'reh_search_full'
+       withSelectorArgs: 'searchRegexFull: pattern from: from to: to'
+       returning: CPreRegisters type
+       args: #(#selfSmalltalk #smalltalk #int #int).
+
+String
+       defineCFunc: 'reh_free_registers'
+       withSelectorArgs: 'freeCPreRegisters: regs'
+       returning: #void
+       args: #(#cObject).
+!
 
 "--------------------------------------------------------------------------"
 
@@ -379,5 +403,21 @@
 tokenize
 
     ^self tokenize: '[\n\t ]+' from: 1 to: self size
-! !
+! 
+
+regexSubExprs: pattern from: from to: to
+       | cregs regs |
+       cregs := self searchRegexFull: pattern from: from to: to.
+       [       cregs beg value value >= 0 ifFalse: [ ^nil ].
+               regs := Dictionary new.
+               0 to: cregs numRegs value - 1 do:
+                       [ :i | 
+                       regs at: i put:
+                               ((cregs beg value + i) value + 1 
+                                       to: (cregs end value + i) value + 1) ]. 
+               ^regs ]
+               ensure:
+               [ self freeCPreRegisters: cregs. ].
+!
+!
 

reply via email to

[Prev in Thread] Current Thread [Next in Thread]