emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/trie 1c2790d 038/111: Replaced wildcard searches with m


From: Stefan Monnier
Subject: [elpa] externals/trie 1c2790d 038/111: Replaced wildcard searches with more powerful and efficient regexp searches.
Date: Mon, 14 Dec 2020 11:35:16 -0500 (EST)

branch: externals/trie
commit 1c2790d230742a22dc0b437d3c7ec35f6d4f9b3d
Author: Toby Cubitt <toby-predictive@dr-qubit.org>
Commit: tsc25 <toby-predictive@dr-qubit.org>

    Replaced wildcard searches with more powerful and efficient regexp searches.
---
 trie.el | 1472 +++++++++++++--------------------------------------------------
 1 file changed, 299 insertions(+), 1173 deletions(-)

diff --git a/trie.el b/trie.el
index 8cd03bb..630a40f 100644
--- a/trie.el
+++ b/trie.el
@@ -5,7 +5,7 @@
 ;; Copyright (C) 2008 Toby Cubitt
 
 ;; Author: Toby Cubitt <toby-predictive@dr-qubit.org>
-;; Version: 0.1
+;; Version: 0.2
 ;; Keywords: trie, ternary search tree, completion
 ;; URL: http://www.dr-qubit.org/emacs.php
 
@@ -48,16 +48,14 @@
 ;; association using `trie-insert', retrieve an association using
 ;; `trie-lookup', and map over a trie using `trie-map', `trie-mapc',
 ;; `trie-mapcar', or `trie-mapf'. You can find completions of a prefix
-;; sequence using `trie-complete', search for keys that match a wildcard
-;; pattern using `trie-wildcard-search', or search for keys matching a
-;; regular expression using `trie-regexp-search'. Using `trie-stack', you
-;; can create an object that allows the contents of the trie to be used
-;; like a stack, useful for building other algorithms on top of tries;
+;; sequence using `trie-complete', or search for keys matching a regular
+;; expression using `trie-regexp-search'. Using `trie-stack', you can
+;; create an object that allows the contents of the trie to be used like
+;; a stack, useful for building other algorithms on top of tries;
 ;; `trie-stack-pop' pops elements off the stack one-by-one, in "lexical"
 ;; order, whilst `trie-stack-push' pushes things onto the
-;; stack. Similarly, `trie-complete-stack', `trie-wildcard-stack' and
-;; `trie-regexp-stack' create "lexically-ordered" stacks of query
-;; results.
+;; stack. Similarly, `trie-complete-stack', and `trie-regexp-stack'
+;; create "lexically-ordered" stacks of query results.
 ;;
 ;; Note that there are two uses for a trie: as a lookup table, in which
 ;; case only the presence or absence of a key in the trie is significant,
@@ -151,6 +149,11 @@
 
 ;;; Change Log:
 ;;
+;; Version 0.2
+;; * Replaced wildcard searches with regexp searches, using the tNFA.el tagged
+;;   non-deterministic finite state automata library. This is both more
+;;   general *and* more efficient.
+;;
 ;; Version 0.1
 ;; * Initial release (complete rewrite from scratch of tstree.el!)
 ;; * Ternary search trees are now implemented as a tree of avl trees, which
@@ -180,6 +183,7 @@
 (eval-when-compile (require 'cl))
 (require 'avl-tree)
 (require 'heap)
+(require 'tNFA)
 
 
 
@@ -1043,8 +1047,8 @@ bind any variables with names commencing \"--\"."
              (pushed '())
              ))
            (:constructor
-            trie--wildcard-stack-create
-            (trie pattern
+            trie--regexp-stack-create
+            (trie regexp
              &optional
              reverse
              &aux
@@ -1053,9 +1057,9 @@ bind any variables with names commencing \"--\"."
              (stack-createfun (trie--stack-createfun trie))
              (stack-popfun (trie--stack-popfun trie))
              (stack-emptyfun (trie--stack-emptyfun trie))
-             (repopulatefun 'trie--wildcard-stack-repopulate)
-             (store (trie--wildcard-stack-construct-store
-                     trie pattern reverse))
+             (repopulatefun 'trie--regexp-stack-repopulate)
+             (store (trie--regexp-stack-construct-store
+                     trie regexp reverse))
              (pushed '())
              ))
            (:copier nil))
@@ -1107,7 +1111,8 @@ element stored in the trie.)"
     (if (trie--stack-pushed trie-stack)
        (pop (trie--stack-pushed trie-stack))
       ;; otherwise, pop first element from trie-stack and repopulate it
-      (let ((first (pop (trie--stack-store trie-stack))))
+      (prog1
+         (pop (trie--stack-store trie-stack))
        (setf (trie--stack-store trie-stack)
              (funcall (trie--stack-repopulatefun trie-stack)
                       (trie--stack-store trie-stack)
@@ -1116,8 +1121,7 @@ element stored in the trie.)"
                       (trie--stack-lookupfun trie-stack)
                       (trie--stack-stack-createfun trie-stack)
                       (trie--stack-stack-popfun trie-stack)
-                      (trie--stack-stack-emptyfun trie-stack)))
-       first))))
+                      (trie--stack-stack-emptyfun trie-stack)))))))
 
 
 (defun trie-stack-push (element trie-stack)
@@ -1202,72 +1206,64 @@ element stored in the trie.)"
   `(cond
     ;; filter, maxnum, resultfun
     ((and ,filter ,maxnum ,resultfun)
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
-        (when (funcall ,filter seq data)
-          (aset trie--accumulate 0
-                (cons (funcall ,resultfun seq data)
-                      (aref trie--accumulate 0)))
-          (and (>= (length (aref trie--accumulate 0)) ,maxnum)
-               (throw 'trie-accumulate--done nil))))))
-    ;; filter, maxnum, !resultfun
-    ((and ,filter ,maxnum (not ,resultfun))
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
-        (when (funcall ,filter seq data)
-          (aset trie--accumulate 0
-                (cons (cons seq data)
-                      (aref trie--accumulate 0)))
-          (and (>= (length (aref trie--accumulate 0)) ,maxnum)
-               (throw 'trie-accumulate--done nil))))))
-    ;; filter, !maxnum, resultfun
-    ((and ,filter (not ,maxnum) ,resultfun)
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
-        (when (funcall ,filter seq data)
-          (aset trie--accumulate 0
-                (cons (funcall ,resultfun seq data)
-                      (aref trie--accumulate 0)))))))
-    ;; filter, !maxnum, !resultfun
-    ((and ,filter (not ,maxnum) (not ,resultfun))
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
-        (when (funcall ,filter seq data)
-          (aset trie--accumulate 0
-                (cons (cons seq data)
-                      (aref trie--accumulate 0)))))))
-    ;; !filter, maxnum, resultfun
-    ((and (not ,filter) ,maxnum ,resultfun)
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
+     (lambda (seq data)
+       (when (funcall ,filter seq data)
         (aset trie--accumulate 0
               (cons (funcall ,resultfun seq data)
                     (aref trie--accumulate 0)))
         (and (>= (length (aref trie--accumulate 0)) ,maxnum)
              (throw 'trie-accumulate--done nil)))))
-    ;; !filter, maxnum, !resultfun
-    ((and (not ,filter) ,maxnum (not ,resultfun))
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
+    ;; filter, maxnum, !resultfun
+    ((and ,filter ,maxnum (not ,resultfun))
+     (lambda (seq data)
+       (when (funcall ,filter seq data)
         (aset trie--accumulate 0
               (cons (cons seq data)
                     (aref trie--accumulate 0)))
         (and (>= (length (aref trie--accumulate 0)) ,maxnum)
              (throw 'trie-accumulate--done nil)))))
-    ;; !filter, !maxnum, resultfun
-    ((and (not ,filter) (not ,maxnum) ,resultfun)
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
+    ;; filter, !maxnum, resultfun
+    ((and ,filter (not ,maxnum) ,resultfun)
+     (lambda (seq data)
+       (when (funcall ,filter seq data)
         (aset trie--accumulate 0
               (cons (funcall ,resultfun seq data)
                     (aref trie--accumulate 0))))))
-    ;; !filter, !maxnum, !resultfun
-    ((and (not ,filter) (not ,maxnum) (not ,resultfun))
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
+    ;; filter, !maxnum, !resultfun
+    ((and ,filter (not ,maxnum) (not ,resultfun))
+     (lambda (seq data)
+       (when (funcall ,filter seq data)
         (aset trie--accumulate 0
               (cons (cons seq data)
                     (aref trie--accumulate 0))))))
+    ;; !filter, maxnum, resultfun
+    ((and (not ,filter) ,maxnum ,resultfun)
+     (lambda (seq data)
+       (aset trie--accumulate 0
+            (cons (funcall ,resultfun seq data)
+                  (aref trie--accumulate 0)))
+       (and (>= (length (aref trie--accumulate 0)) ,maxnum)
+           (throw 'trie-accumulate--done nil))))
+    ;; !filter, maxnum, !resultfun
+    ((and (not ,filter) ,maxnum (not ,resultfun))
+     (lambda (seq data)
+       (aset trie--accumulate 0
+            (cons (cons seq data)
+                  (aref trie--accumulate 0)))
+       (and (>= (length (aref trie--accumulate 0)) ,maxnum)
+           (throw 'trie-accumulate--done nil))))
+    ;; !filter, !maxnum, resultfun
+    ((and (not ,filter) (not ,maxnum) ,resultfun)
+     (lambda (seq data)
+       (aset trie--accumulate 0
+            (cons (funcall ,resultfun seq data)
+                  (aref trie--accumulate 0)))))
+    ;; !filter, !maxnum, !resultfun
+    ((and (not ,filter) (not ,maxnum) (not ,resultfun))
+     (lambda (seq data)
+       (aset trie--accumulate 0
+            (cons (cons seq data)
+                  (aref trie--accumulate 0)))))
     ))
 
 
@@ -1277,30 +1273,26 @@ element stored in the trie.)"
   `(cond
     ;; filter, maxnum
     ((and ,filter ,maxnum)
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
-        (when (funcall ,filter seq data)
-          (heap-add trie--accumulate (cons seq data))
-          (and (> (heap-size trie--accumulate) ,maxnum)
-               (heap-delete-root trie--accumulate))))))
+     (lambda (seq data)
+       (when (funcall ,filter seq data)
+        (heap-add trie--accumulate (cons seq data))
+        (and (> (heap-size trie--accumulate) ,maxnum)
+             (heap-delete-root trie--accumulate)))))
     ;; filter, !maxnum
     ((and ,filter (not ,maxnum))
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
-        (when (funcall ,filter seq data)
-          (heap-add trie--accumulate (cons seq data))))))
+     (lambda (seq data)
+       (when (funcall ,filter seq data)
+        (heap-add trie--accumulate (cons seq data)))))
     ;; !filter, maxnum
     ((and (not ,filter) ,maxnum)
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
-        (heap-add trie--accumulate (cons seq data))
-        (and (> (heap-size trie--accumulate) ,maxnum)
-             (heap-delete-root trie--accumulate)))))
+     (lambda (seq data)
+       (heap-add trie--accumulate (cons seq data))
+       (and (> (heap-size trie--accumulate) ,maxnum)
+           (heap-delete-root trie--accumulate))))
     ;; !filter, !maxnum
     ((and (not ,filter) (not ,maxnum))
-     (lambda (node seq)
-       (let ((data (trie--node-data node)))
-        (heap-add trie--accumulate (cons seq data)))))))
+     (lambda (seq data)
+       (heap-add trie--accumulate (cons seq data))))))
 
 
 
@@ -1431,18 +1423,23 @@ default key-data cons cell."
 
   ;; accumulate completions
   (let (node)
+    (declare (special accumulator))
     (trie--accumulate-results
      rankfun maxnum reverse filter resultfun accumulator nil
      (mapc (lambda (pfx)
             (setq node (trie--node-find (trie--root trie) pfx
                                         (trie--lookupfun trie)))
             (when node
-              (trie--mapc accumulator (trie--mapfun trie) node pfx
-                          (if maxnum reverse (not reverse)))))
+              (trie--mapc
+               (lambda (node seq)
+                 (funcall accumulator seq (trie--node-data node)))
+               (trie--mapfun trie) node pfx
+               (if maxnum reverse (not reverse)))))
           prefix))
     ))
 
 
+
 (defun trie-complete-stack (trie prefix &optional reverse)
   "Return an object that allows completions of PREFIX to be accessed
 as if they were a stack.
@@ -1511,287 +1508,37 @@ it is better to use one of those instead."
 
 
 ;; ================================================================
-;;                        Wildcard search
-
-(defmacro trie--wildcard-literal-p (el) `(vectorp ,el))
-
-(defmacro trie--wildcard-*-p (el) `(eq ,el ?*))
-
-(defmacro trie--wildcard-?-p (el) `(eq ,el ??))
-
-(defmacro trie--wildcard-group-start-p (el)
-  `(eq (car-safe ,el) ?\())
-
-(defmacro trie--wildcard-group-end-p (el)
-  `(eq (car-safe ,el) ?\)))
-
-(defmacro trie--wildcard-char-alt-p (el)
-  `(and (listp ,el)
-       (listp (cdr ,el))
-       (or (= (length ,el) 1)
-           (not (eq (car (last ,el)) ?^)))))
-
-(defmacro trie--wildcard-neg-char-alt-p (el)
-  `(and (listp ,el)
-       (listp (cdr ,el))
-       (not (= (length ,el) 1))
-       (eq (car (last ,el)) ?^)))
-
-(defmacro trie--wildcard-group-count (el)
-  `(cdr ,el))
-
+;;                        Regexp search
 
-;;; ----------------------------------------------------------------
-;;;            The public wildcard search functions
-
-(defun trie-wildcard-match (pattern sequence cmpfun)
-  "Return t if wildcard PATTERN matches SEQ, nil otherwise.
-CMPFUN is used as the comparison function for comparing elements
-of the sequence against the pattern.
-
-PATTERN must be a sequence (vector, list or string) containing
-either elements of the type used to reference data in the trie,
-or any of the characters `*', `?', `[', `]', `(', `)', `^' or
-`\\'. The meaning and syntax of these special characters follows
-shell-glob syntax:
-
-  *  wildcard
-    Matches zero or more characters. May *only* appear at the end
-    of the pattern.
-
-  ?  wildcard
-    Matches any single character.
-
-  [...]  character alternative
-    Matches any of the listed characters.
-
-  [^...]  negated character alternative
-    Matches any character *other* then those listed.
-
-  []...]  character alternative including `]'
-    Matches any of the listed characters, including `]'.
-
-  [^]...]  negated character alternative including `]'
-    Matches any character other than `]' and any others listed.
-
-  \\  quote literal
-    Causes the next element of the pattern sequence to be treated
-    literally; special characters lose their special meaning, for
-    anything else it has no effect.
-
-  (  start group
-    Starts a grouping construct.
-
-  )  end group
-    Ends a grouping construct.
-
-To include a `]' in a character alternative, place it immediately
-after the opening `[', or the opening `[^' in a negated character
-alternative. To include a `^' in a character alternative, negated
-or otherwise, place it anywhere other than immediately after the
-opening `['. To include a literal `\\' in the pattern, quote it
-with another `\\' (remember that `\\' also has to be quoted
-within elisp strings, so as a string this would be
-\"\\\\\\\\\"). The above syntax descriptions are written in terms
-of strings, but the special characters can be used in *any*
-sequence type. E.g. the character alternative \"[abc]\" would be
-\(?[ ?a ?b ?c ?]\) as a list, or [?[ ?a ?b ?c ?]] as a
-vector. The \"characters\" in the alternative can of course be
-any data type that might be stored in the trie, not just actual
-characters.
-
-Grouping constructs have no effect on which SEQUENCE's match the
-PATTERN, but data about which elements matched which group are
-included in the results. When groups are present, the return
-result for a match is a list containing cons cells whose cars and
-cdrs give the start and end indices of the elements that matched
-the corresponding groups, in order."
-  (let ((pat (append pattern nil))  ; convert pattern to list
-       token (idx 0) group-stack groups)
-    (catch 'match
-
-      ;; parse pattern
-      (while (and pat (> (length sequence) 0))
-       (setq pat (trie--wildcard-next-token pat)
-             token (car pat)
-             pat (cdr pat))
-       (cond
-
-        ;; start group (: add current character index to pending groups
-        ((trie--wildcard-group-start-p token)
-         (dotimes (i (trie--wildcard-group-count token))
-           (push idx group-stack)))
-
-        ;; end group ): add current character index to pending groups
-        ((trie--wildcard-group-end-p token)
-         (dotimes (i (trie--wildcard-group-count token))
-           (if (null group-stack)
-               (error "Syntax error in trie wildcard pattern: missing \"(\"")
-             (push (cons (pop group-stack) idx) groups))))
-
-        ;; literal string: compare elements
-        ((trie--wildcard-literal-p token)
-         ;; if literal is longer than remaining string, or literal is at end
-         ;; of pattern and remaining string is too long, match has failed
-         (when (or (> (length token) (length sequence))
-                   (and (null pat) (< (length token) (length sequence))))
-           (throw 'match nil))
-         ;; compare element by element using CMPFUN
-         (dotimes (i (length token))
-           (when (or (funcall cmpfun (elt sequence i) (aref token i))
-                     (funcall cmpfun (aref token i) (elt sequence i)))
-             (throw 'match nil)))
-         (setq sequence (trie--subseq sequence (length token))
-               idx (+ idx (length token))))
-
-        ;; ? wildcard: accept anything
-        ((trie--wildcard-?-p token)
-         (setq sequence (trie--subseq sequence 1)
-               idx (1+ idx)))
-
-        ;; character alternative: check next element matches
-        ((trie--wildcard-char-alt-p token)
-         (while (and token
-                     (or (funcall cmpfun (elt sequence 0) (car token))
-                         (funcall cmpfun (car token) (elt sequence 0))))
-           (setq token (cdr token)))
-         (if token
-             (setq sequence (trie--subseq sequence 1)
-                   idx (1+ idx))
-           (throw 'match nil)))
-
-        ;; negated character alternative: check next element isn't excluded
-        ((trie--wildcard-neg-char-alt-p token)
-         (dolist (c (butlast token))  ; drop final ^
-           (unless (or (funcall cmpfun (elt sequence 0) c)
-                       (funcall cmpfun c (elt sequence 0)))
-             (throw 'match nil))
-           (setq idx (1+ idx))))
-
-        ;; terminal * and possibly ): Houston, we have a match!
-        ((and (trie--wildcard-*-p token)
-              (catch 'not-group
-                (dolist (tok pat)
-                  (unless (eq tok ?\)) (throw 'not-group nil)))
-                t))
-         (setq idx (+ idx (length sequence)))
-         ;; if we have groups, complete them
-         (when pat
-           (while pat
-             (if (null group-stack)
-                 (error "Syntax error in trie wildcard pattern:\
- missing \"(\"")
-               (push (cons (pop group-stack) idx) groups)
-               (setq pat (cdr pat))))
-           (unless (null group-stack)
-             (error "Syntax error in trie wildcard pattern: missing \")\""))
-           (setq groups
-                 (sort groups
-                       (lambda (a b)
-                         (or (< (car a) (car b))
-                             (and (= (car a) (car b))
-                                  (> (cdr a) (cdr b))))))))
-         (throw 'match (or groups t)))
-
-        ;; non-terminal *: not supported for efficiency reasons
-        ((trie--wildcard-*-p token)
-         (error "Syntax error in trie wildcard pattern:\
-non-terminal * wildcards are not supported"))
-
-;;;     ;; * wildcard: oh boy, gonna have to recursively check all possible
-;;;     ;;             search brances
-;;;     ((trie--wildcard-*-p token)
-;;;      (setq sequence (trie--subseq sequence 1))
-;;;      (throw 'match
-;;;             (or (= (length sequence) 0)
-;;;                 (and pat (trie-wildcard-match pat sequence cmpfun))
-;;;                 (trie-wildcard-match pattern sequence cmpfun))))
-        )
-
-       ;; store unparsed pattern for next iteration
-       (setq pattern pat))
-
-      ;; if we got to the end of PATTERN, SEQUENCE matched
-      (if (or pat (> (length sequence) 0)) nil (or groups t))
-      )))
-
-
-
-(defun trie-wildcard-search
-  (trie pattern &optional rankfun maxnum reverse filter resultfun)
-  "Return an alist containing all matches for PATTERN in TRIE
+(defun trie-regexp-search
+  (trie regexp &optional rankfun maxnum reverse filter resultfun type)
+  "Return an alist containing all matches for REGEXP in TRIE
 along with their associated data, in the order defined by
-RANKFUN, defaulting to \"lexical\" order (i.e. the order defined
-by the trie's comparison function). If REVERSE is non-nil, the
+RANKFUN, defauling to \"lexical\" order (i.e. the order defined
+by the trie's comparison function).  If REVERSE is non-nil, the
 completions are sorted in the reverse order. Returns nil if no
 completions are found.
 
-PATTERN must be a sequence (vector, list or string) containing
-either elements of the type used to reference data in the trie,
-or any of the characters `*', `?', `[', `]', `(', `)', `^' or
-`\\'. The meaning and syntax of these special characters follows
-shell-glob syntax:
-
-  *  wildcard
-    Matches zero or more characters. May *only* appear at the end
-    of the pattern.
-
-  ?  wildcard
-    Matches any single character.
-
-  [...]  character alternative
-    Matches any of the listed characters.
-
-  [^...]  negated character alternative
-    Matches any character *other* then those listed.
-
-  []...]  character alternative including `]'
-    Matches any of the listed characters, including `]'.
-
-  [^]...]  negated character alternative including `]'
-    Matches any character other than `]' and any others listed.
-
-  \\  quote literal
-    Causes the next element of the pattern sequence to be treated
-    literally; special characters lose their special meaning, for
-    anything else it has no effect.
-
-  (  start group
-    Starts a grouping construct.
-
-  )  end group
-    Ends a grouping construct.
-
-To include a `]' in a character alternative, place it immediately
-after the opening `[', or the opening `[^' in a negated character
-alternative. To include a `^' in a character alternative, negated
-or otherwise, place it anywhere other than immediately after the
-opening `['. To include a literal `\\' in the pattern, quote it
-with another `\\' (remember that `\\' also has to be quoted
-within elisp strings, so as a string this would be
-\"\\\\\\\\\"). The above syntax descriptions are written in terms
-of strings, but the special characters can be used in *any*
-sequence type. E.g. the character alternative \"[abc]\" would be
-\(?[ ?a ?b ?c ?]\) as a list, or [?[ ?a ?b ?c ?]] as a
-vector. The \"characters\" in the alternative can of course be
-any data type that might be stored in the trie, not just actual
-characters.
-
-Grouping constructs have no effect on which keys match the
-pattern, but data about which sequence elements matched which
-group are included in the results. When groups are present, the
-car of an element in the results alist is no longer a straight
-key. Instead, it is a list whose first element is the matching
-key, and the remainder contains cons cells whose cars and cdrs
-give the start and end indices of the elements that matched the
-corresponding groups, in order.
-
-If PATTERN is a string, it must be possible to apply `string' to
-individual elements of the sequences stored in the trie. The
-matches returned in the alist will be sequences of the same type
-as KEY. If PATTERN is a list of pattern sequences, matches for
-all patterns in the list are included in the returned alist. All
-sequences in the list must be of the same type.
+REGEXP is a regular expression, but it need not necessarily be a
+string. It must be a sequence (vector, list of string) whose
+elements are either elements of the same type as elements of the
+trie keys (which behave as literals in the regexp), or any of the
+usual regexp special characters and backslash constructs. If
+REGEXP is a string, it must be possible to apply `string' to
+individual elements of the keys stored in the trie. The matches
+returned in the alist will be sequences of the same type as KEY.
+
+Back-references and non-greedy postfix operators are *not*
+supported, and the matches are always anchored, so `$' and `^'
+lose their special meanings.
+
+If the regexp contains any non-shy grouping constructs, subgroup
+match data is included in the results. In this case, the car of
+each match (as returned by a call to `trie-stack-pop' is no
+longer just a key. Instead, it is a list whose first element is
+the matching key, and whose remaining elements are cons cells
+whose cars and cdrs give the start and end indices of the
+elements that matched the corresponding groups, in order.
 
 The optional integer argument MAXNUM limits the results to the
 first MAXNUM matches. Otherwise, all matches are returned.
@@ -1812,64 +1559,101 @@ RESULTFUN defines a function used to process results 
before
 adding them to the final result list. If specified, it should
 accept two arguments: a key and its associated data. It's return
 value is what gets added to the final result list, instead of the
-default key-data cons cell.
-
-
-Efficiency concerns:
-
-Wildcard searches on tries are very efficient compared to similar
-searches on other data structures. The supported wildcard
-patterns are the subset of shell-glob patterns that can be
-searched efficiently. Note, however, that supplying a list of
-PATTERN's simply finds matches for each pattern independently,
-and sorts the results (removing any duplicates), which for
-closely-related patterns is inefficient. If you want true
-alternation and a less limited pattern syntax, use
-`trie-regexp-search' instead...but you'll have to implement it
-first!."
+default key-data cons cell."
 
   ;; convert trie from print-form if necessary
   (trie-transform-from-read-warn trie)
 
-  ;; wrap prefix in a list if necessary
-  ;; FIXME: the test for a list of patterns, below, will fail if the PATTERN
-  ;;        sequence is a list, and the first element of PATTERN is itself a
-  ;;        list (there might be no easy way to fully fix this...)
-  (if (or (atom pattern)
-         (and (listp pattern) (not (sequencep (car pattern)))))
-      (setq pattern (list pattern))
-    ;; sort list of patterns if sorting completions lexically
-    (when (null rankfun)
-      (setq pattern
-           (sort pattern (trie-construct-sortfun
-                         (trie--comparison-function trie))))))
+  ;; massage rankfun to cope with grouping data
+  ;; FIXME: could skip this if REGEXP contains no grouping constructs
+  (when rankfun
+    (setq rankfun
+         `(lambda (a b)
+            ;; if car of argument contains a key+group list rather than a
+            ;; straight key, remove group list
+            ;; FIXME: the test for straight key, below, will fail if the key
+            ;;        is a list, and the first element of the key is itself a
+            ;;        list (there might be no easy way to fully fix this...)
+            (unless (or (atom (car a))
+                        (and (listp (car a)) (not (sequencep (caar a)))))
+              (setq a (cons (caar a) (cdr a))))
+            (unless (or (atom (car b))
+                        (and (listp (car b)) (not (sequencep (caar b)))))
+              (setq b (cons (caar b) (cdr b))))
+            ;; call rankfun on massaged arguments
+            (,rankfun a b))))
 
-  ;; construct appropriate rankfun for wildcard search
-  (destructuring-bind (rankfun expect-duplicate-results)
-      (trie--wildcard-construct-rankfun trie pattern rankfun reverse)
-    (let ((seq (cond ((stringp (car pattern)) "")
-                    ((listp (car pattern)) ())
-                    (t []))))
-      ;; accumulate pattern matches
-      (declare (special accumulator))
-      (trie--accumulate-results
-       rankfun maxnum reverse filter resultfun
-       accumulator expect-duplicate-results
-       (mapc (lambda (pat)
-              (trie--do-wildcard-search
-               (trie--root trie)
-               seq pat rankfun maxnum reverse
-               0 nil nil
-               (trie--comparison-function trie)
-               (trie--lookupfun trie)
-               (trie--mapfun trie)))
-            ;; convert patterns to lists
-            (mapcar (lambda (pat) (append pat nil)) pattern))))))
-
-
-
-(defun trie-wildcard-stack (trie pattern &optional reverse)
-  "Return an object that allows matches to PATTERN to be accessed
+  ;; accumulate completions
+  (declare (special accumulator))
+  (trie--accumulate-results
+   rankfun maxnum reverse filter resultfun accumulator nil
+   (trie--do-regexp-search
+    (trie--root trie)
+    (tNFA-from-regexp regexp)
+    (cond ((stringp regexp) "") ((listp regexp) ()) (t []))
+    0 (or (and maxnum reverse) (and (not maxnum) (not reverse)))
+    (trie--comparison-function trie)
+    (trie--lookupfun trie)
+    (trie--mapfun trie))))
+
+
+
+(defun trie--do-regexp-search (--trie--regexp-search--node
+                              tNFA seq pos reverse
+                              comparison-function lookupfun mapfun)
+  ;; Search everything below the node --TRIE--REGEXP-SEARCH-NODE for matches
+  ;; to the regexp encoded in tNFA. SEQ is the sequence corresponding to NODE,
+  ;; POS is it's length. REVERSE is the usual query argument, and the
+  ;; remaining arguments are the corresponding trie functions.
+  (declare (special accumulator))
+  (cond
+   ;; data node
+   ((trie--node-data-p --trie--regexp-search--node)
+    (when (tNFA-match-p tNFA)
+      (let ((groups (tNFA-group-data tNFA)))
+       (funcall accumulator
+                (if groups (cons seq groups) seq)
+                (trie--node-data --trie--regexp-search--node)))))
+
+   ;; wildcard transition: map over all nodes in subtree
+   ((tNFA-wildcard-p tNFA)
+    (let (state groups)
+      (funcall mapfun
+              (lambda (node)
+                (if (trie--node-data-p node)
+                    (when (tNFA-match-p tNFA)
+                      (setq groups (tNFA-group-data tNFA))
+                      (funcall accumulator
+                               (if groups (cons seq groups) seq)
+                               (trie--node-data node)))
+                  (when (setq state (tNFA-next-state
+                                     tNFA (trie--node-split node) pos))
+                    (trie--do-regexp-search
+                     node state
+                     (trie--seq-append seq (trie--node-split node))
+                     (1+ pos) reverse comparison-function lookupfun mapfun))))
+              (trie--node-subtree --trie--regexp-search--node)
+              reverse)))
+
+   (t ;; no wildcard transition: loop over all transitions
+    (let (node state)
+      (dolist (chr (sort (tNFA-transitions tNFA)
+                        (if reverse
+                            `(lambda (a b) (,comparison-function b a))
+                          comparison-function)))
+       (when (and (setq node (trie--node-find
+                              --trie--regexp-search--node
+                              (vector chr) lookupfun))
+                  (setq state (tNFA-next-state tNFA chr pos)))
+         (trie--do-regexp-search
+          node state (trie--seq-append seq chr) (1+ pos)
+          reverse comparison-function lookupfun mapfun)))))
+   ))
+
+
+
+(defun trie-regexp-stack  (trie regexp &optional reverse)
+  "Return an object that allows matches to REGEXP to be accessed
 as if they were a stack.
 
 The stack is sorted in \"lexical\" order, i.e. the order defined
@@ -1877,511 +1661,43 @@ by TRIE's comparison function, or in reverse order if 
REVERSE is
 non-nil. Calling `trie-stack-pop' pops the top element (a cons
 cell containing a key and its associated data) from the stack.
 
-PATTERN must be a sequence (vector, list or string) containing
-either elements of the type used to reference data in the trie,
-or any of the characters `*', `?', `[', `]', `(', `)', `^' or
-`\\'. The meaning and syntax of these special characters follows
-shell-glob syntax:
-
-  *  wildcard
-    Matches zero or more characters. May *only* appear at the end
-    of the pattern.
-
-  ?  wildcard
-    Matches any single character.
-
-  [...]  character alternative
-    Matches any of the listed characters.
-
-  [^...]  negated character alternative
-    Matches any character *other* then those listed.
-
-  []...]  character alternative including `]'
-    Matches any of the listed characters, including `]'.
-
-  [^]...]  negated character alternative including `]'
-    Matches any character other than `]' and any others listed.
-
-  \\  quote literal
-    Causes the next element of the pattern sequence to be treated
-    literally; special characters lose their special meaning, for
-    anything else it has no effect.
-
-  (  start group
-    Starts a grouping construct.
-
-  )  end group
-    Ends a grouping construct.
-
-To include a `]' in a character alternative, place it immediately
-after the opening `[', or the opening `[^' in a negated character
-alternative. To include a `^' in a character alternative, negated
-or otherwise, place it anywhere other than immediately after the
-opening `['. To include a literal `\\' in the pattern, quote it
-with another `\\' (remember that `\\' also has to be quoted
-within elisp strings, so as a string this would be
-\"\\\\\\\\\"). The above syntax descriptions are written in terms
-of strings, but the special characters can be used in *any*
-sequence type. E.g. the character alternative \"[abc]\" would be
-\(?[ ?a ?b ?c ?]\) as a list, or [?[ ?a ?b ?c ?]] as a
-vector. The \"characters\" in the alternative can of course be
-any data type that might be stored in the trie, not just actual
-characters.
-
-Grouping constructs have no effect on which keys match the
-pattern, but data about which sequence elements matched which
-group are included in the results. When groups are present, the
-car of a match result (as returned by a call to `trie-stack-pop')
-is no longer a straight key. Instead, it is a list whose first
-element is the matching key, and the remainder contains cons
-cells whose cars and cdrs give the start and end indices of the
-elements that matched the corresponding groups, in order.
-
-If PATTERN is a string, it must be possible to apply `string' to
-individual elements of the sequences stored in the trie. The
-matches returned in the alist will be sequences of the same type
-as KEY. (Support for lists of PATTERN's has not yet been
-implemented.)
-
-
-Efficiency concerns:
+REGEXP is a regular expression, but it need not necessarily be a
+string. It must be a sequence (vector, list of string) whose
+elements are either elements of the same type as elements of the
+trie keys (which behave as literals in the regexp), or any of the
+usual regexp special characters and backslash constructs. If
+REGEXP is a string, it must be possible to apply `string' to
+individual elements of the keys stored in the trie. The matches
+returned in the alist will be sequences of the same type as KEY.
+
+Back-references and non-greedy postfix operators are *not*
+supported, and the matches are always anchored, so `$' and `^'
+lose their special meanings.
+
+If the regexp contains any non-shy grouping constructs, subgroup
+match data is included in the results. In this case, the car of
+each match (as returned by a call to `trie-stack-pop' is no
+longer just a key. Instead, it is a list whose first element is
+the matching key, and whose remaining elements are cons cells
+whose cars and cdrs give the start and end indices of the
+elements that matched the corresponding groups, in order."
 
-Wildcard searches on tries are very efficient compared to similar
-searches on other data structures. The supported wildcard
-patterns are the subset of shell-glob patterns that can be
-searched efficiently. If you want a less limited pattern syntax,
-use `trie-regexp-stack' instead...but you'll have to implement it
-first!."
   ;; convert trie from print-form if necessary
   (trie-transform-from-read-warn trie)
   ;; if stack functions aren't defined for trie type, throw error
   (if (not (functionp (trie--stack-createfun trie)))
       (error "Trie type does not support stack operations")
-    ;; otherwise, create and initialise a stack
-    (trie--wildcard-stack-create trie pattern reverse)))
-
-
-
-;;; ------------------------------------------------------------------
-;;;              Internal functions (do the real work)
-
-(defun trie--wildcard-next-token (pattern &optional cmpfun)
-  ;; Extract first pattern element from PATTERN (a list), and return it consed
-  ;; with remainder of pattern. If CMPFUN is supplied, it is used to sort
-  ;; character alternatives.
-  (when pattern
-    (let ((token (pop pattern)))
-      (cond
-       ;; *: drop any following *'s
-       ((eq token ?*)
-       (while (eq (car pattern) ?*) (pop pattern)))
-
-       ;; [: gobble up to closing ]
-       ((eq token ?\[)
-       ;; character alternatives are stored in lists
-       (setq token ())
-       (cond
-        ;; gobble ] appearing straight after [
-        ((eq (car pattern) ?\]) (push (pop pattern) token))
-        ;; gobble ] appearing straight after [^
-        ((and (eq (car pattern) ?^) (eq (nth 1 pattern) ?\]))
-         (push (pop pattern) token)
-         (push (pop pattern) token)))
-       ;; gobble everything up to closing ]
-       (while (not (eq (car pattern) ?\]))
-         (push (pop pattern) token)
-         (unless pattern
-           (error "Syntax error in trie wildcard pattern: missing \"]\"")))
-       (pop pattern)  ; dump closing ]
-       ;; if CMPFUN was supplied, sort characters in alternative
-       (when cmpfun
-         ;; leave final ^ at end in negated character alternative
-         (if (eq (car (last token)) ?^)
-             (setq token (concat (sort (butlast token) cmpfun) ?^))
-           (setq token (sort token cmpfun)))))
-
-       ;; ?: nothing to gobble
-       ((eq token ??))
-
-       ;; ]: syntax error (always gobbled when parsing [)
-       ((eq token ?\])
-       (error "Syntax error in trie wildcard pattern: missing \"[\""))
-
-       ;; (: gobble any following ('s
-       ((eq token ?\()
-       (let ((i 1))
-         (while (eq (car pattern) ?\()
-           (incf i)
-           (pop pattern))
-         (setq token (cons ?\( i))))
-
-       ;; ): gobble any following )'s
-       ((eq token ?\))
-       (let ((i 1))
-         (while (eq (car pattern) ?\))
-           (incf i)
-           (pop pattern))
-         (setq token (cons ?\) i))))
-
-       ;; anything else, gobble up to first special character
-       (t
-       (push token pattern)
-       (setq token nil)
-       (while (and pattern
-                   (not (or (eq (car pattern) ?\[) (eq (car pattern) ?\])
-                            (eq (car pattern) ?*)  (eq (car pattern) ??)
-                            (eq (car pattern) ?\() (eq (car pattern) ?\)))))
-         ;; \: dump \ and gobble next character
-         (when (eq (car pattern) ?\\)
-           (pop pattern)
-           (unless pattern
-             (error "Syntax error in trie wildcard pattern:\
- missing character after \"\\\"")))
-         (push (pop pattern) token))
-       ;; fixed strings are stored in vectors
-       (setq token (vconcat (nreverse token)))))
-
-      ;; return first token and remaining pattern
-      (list token pattern))))
-
-
-
-;;; ------------------------------------------------------------------
-;;;                      wildcard search
-
-(defun trie--wildcard-construct-rankfun (trie pattern rankfun reverse)
-  ;; construct appropriate rank function for wildcard search, and return a
-  ;; list containing the rankfun and a flags indicating whether to expect
-  ;; duplicate results
-  (let (pattern-contains-groups
-       ;; multiple patterns: need manual lexical sort and duplicate filtering
-       (manual-lexical-sort (> (length pattern) 1))
-       (expect-duplicate-results (> (length pattern) 1)))
-    ;; convert patterns to lists, and check for groups  ; and * wildcards
-    (setq pattern
-         (mapcar
-          (lambda (pat)
-            ;; convert pattern to list
-            (setq pat (append pat nil))
-;;;         (let ((pos (trie--position ?* pat)))
-;;;           ;; if pattern contains multiple *'s, have to filter out
-;;;           ;; duplicate results
-;;;           (setq expect-duplicate-results
-;;;                 (or expect-duplicate-results
-;;;                     (and pos (trie--position
-;;;                               ?* (trie--subseq pat (1+ pos))))))
-;;;           ;; if *'s appear in middle of pattern (other than any group
-;;;           ;; endings at very end), need to sort manually
-;;;           (setq manual-lexical-sort
-;;;                 (or manual-lexical-sort
-;;;                     (and pos
-;;;                          (catch 'not-group-end
-;;;                            (dolist (c (last pat (- (length pat) pos 1)))
-;;;                              (unless (eq c ?\))
-;;;                                (throw 'not-group-end t)))
-;;;                            nil)))))
-            ;; check if pattern contains groups
-            (setq pattern-contains-groups
-                  (or pattern-contains-groups (trie--position ?\( pat)))
-            ;; return pattern as list
-            pat)
-          pattern))
-
-    ;; construct appropriate rankfun
-    (cond
-     ((and rankfun pattern-contains-groups)
-      (setq rankfun
-           `(lambda (a b)
-              ;; if car of argument contains a key+group list rather than
-              ;; a straight key, remove group list
-              ;; FIXME: the test for straight key, below, will fail if the
-              ;;        key is a list, and the first element of the key is
-              ;;        itself a list (there might be no easy way to fully
-              ;;        fix this...)
-              (unless (or (atom (car a))
-                          (and (listp (car a)) (not (sequencep (caar a)))))
-                (setq a (cons (caar a) (cdr a))))
-              (unless (or (atom (car b))
-                          (and (listp (car b)) (not (sequencep (caar b)))))
-                (setq b (cons (caar b) (cdr b))))
-              ;; call rankfun on massaged arguments
-              (,rankfun a b))))
-
-     ((and (null rankfun) manual-lexical-sort (not pattern-contains-groups))
-      (setq rankfun
-           `(lambda (a b)
-              ;; call lexical rank function on keys
-              (,(trie-construct-sortfun
-                 (trie--comparison-function trie)
-                 reverse)
-               (car a) (car b)))))
-
-     ((and (null rankfun) manual-lexical-sort pattern-contains-groups)
-      (setq rankfun
-           `(lambda (a b)
-              ;; extract key from argument, (car of arg if no group data
-              ;; attached to key, otherwise first element of key+group list
-              ;; in car)
-              ;; FIXME: the test for straight key, below, will fail if the
-              ;;        key is a list, and the first element of the key is
-              ;;        itself a list (there might be no easy way to fully
-              ;;        fix this...)
-              (if (and (listp (car a)) (not (sequencep (caar a))))
-                  (setq a (car a))
-                (setq a (caar a)))
-              (if (and (listp (car b)) (not (sequencep (caar b))))
-                  (setq b (car b))
-                (setq b (caar b)))
-              ;; call lexical rank function on extracted keys
-              (,(trie-construct-sortfun
-                 (trie--comparison-function trie)
-                 reverse)
-               a b)))))
-
-    ;; return rankfun and duplicate results flag
-    (list rankfun expect-duplicate-results)))
-
-
-
-(defun trie--do-wildcard-search
-  (node seq pattern rankfun maxnum reverse
-       idx group-stack groups
-       comparison-function lookupfun mapfun)
-  ;; Perform wildcard search for PATTERN starting at NODE which corresponds to
-  ;; sequence SEQ, where IDX characters have already been matched, GROUP-STACK
-  ;; contains any pending group start locations, and GROUPS contains alist of
-  ;; completed groups. Pass the other query parameters in RANKFUN, MAXNUM and
-  ;; REVERSE, and the trie functions in COMPARISON-FUNCTION, LOOKUPFUN and
-  ;; MAPFUN (note that COMPARISON-FUNCTION should be the
-  ;; trie--comparison-function, *not* the trie--cmpfun)
-  (declare (special accumulator))
+    ;; otherwise, create and initialise a regexp stack
+    (trie--regexp-stack-create trie regexp reverse)))
 
-  ;; if pattern is null, accumulate data from current node
-  (if (null pattern)
-      (progn
-       (unless (null group-stack)
-         (error "Syntax error in trie wildcard pattern: missing \")\""))
-       (when (setq node (trie--find-data-node node lookupfun))
-         (setq groups
-               (sort groups
-                     (lambda (a b)
-                       (or (< (car a) (car b))
-                           (and (= (car a) (car b))
-                                (> (cdr a) (cdr b)))))))
-         (funcall accumulator node (if groups (cons seq groups) seq))))
-
-    ;; otherwise, extract first pattern element and act on it
-    (destructuring-bind (token pattern) (trie--wildcard-next-token pattern)
-      (cond
-
-       ;; literal string: descend to corresponding node
-       ((trie--wildcard-literal-p token)
-       ;; find node corresponding to literal string pattern
-       (when (setq node (trie--node-find node token lookupfun))
-         (trie--do-wildcard-search
-          node (trie--seq-concat seq token)
-          pattern rankfun maxnum reverse
-          (+ idx (length token)) group-stack groups
-          comparison-function lookupfun mapfun)))
-
-       ;; start group (: add current character index to pending groups
-       ((trie--wildcard-group-start-p token)
-       (dotimes (i (trie--wildcard-group-count token))
-         (push idx group-stack))
-       (trie--do-wildcard-search
-        node seq pattern rankfun maxnum reverse
-        idx group-stack groups
-        comparison-function lookupfun mapfun))
-
-       ;; end group ): add completed groups to list
-       ((trie--wildcard-group-end-p token)
-       (dotimes (i (trie--wildcard-group-count token))
-         (if (null group-stack)
-             (error "Syntax error in trie wildcard pattern: missing \"(\"")
-           (push (cons (pop group-stack) idx) groups)))
-       (trie--do-wildcard-search
-        node seq pattern rankfun maxnum reverse
-        idx group-stack groups
-        comparison-function lookupfun mapfun))
-
-       ;; terminal *: accumulate everything below current node
-       ((and (null pattern) (trie--wildcard-*-p token))
-       (unless (null group-stack)
-         (error "Syntax error in trie wildcard pattern: missing \")\""))
-       (let ((grps (sort (copy-sequence groups)
-                         (lambda (a b)
-                           (or (< (car a) (car b))
-                               (and (= (car a) (car b))
-                                    (> (cdr a) (cdr b))))))))
-         (trie--mapc
-          (lambda (node seq) (funcall accumulator node (cons seq grps)))
-          mapfun node seq (if maxnum reverse (not reverse)))))
-
-       ;; terminal * and ): accumulate everything below current node and
-       ;;                   close group(s)
-       ((and (trie--wildcard-*-p token)
-            (catch 'not-group
-              (dolist (tok pattern)
-                (unless (eq tok ?\)) (throw 'not-group nil)))
-              t))
-       (trie--mapc
-        (lambda (node seq)
-          (let ((grp-stack group-stack)
-                (grps (copy-sequence groups))
-                (pat pattern))
-            (while (progn
-                     (if (null grp-stack)
-                         (error "Syntax error in trie wildcard\
- pattern: missing \"(\"")
-                       (push (cons (pop grp-stack) (length seq)) grps)
-                       (pop pat))))
-            (unless (null grp-stack)
-              (error "Syntax error in trie wildcard pattern: missing \")\""))
-            (setq grps
-                  (sort grps
-                        (lambda (a b)
-                          (or (< (car a) (car b))
-                              (and (= (car a) (car b)) (> (cdr a) (cdr b)))))))
-            (funcall accumulator node (cons seq grps))))
-        mapfun node seq  ;; trie--mapc arguments
-        (if maxnum reverse (not reverse))))
-
-       ;; non-terminal *: not supported for efficiency reasons
-       ((trie--wildcard-*-p token)
-         (error "Syntax error in trie wildcard pattern:\
-non-terminal * wildcards are not supported"))
-
-;;;        ;; * wildcard: map over all nodes immediately below current one, 
with
-;;;        ;;             and without using up the *
-;;;        ((trie--wildcard-*-p token)
-;;;    (funcall mapfun
-;;;             (lambda (node)
-;;;               ;; skip data nodes (terminal * dealt with above)
-;;;               (unless (trie--node-data-p node)
-;;;                 ;; using up *
-;;;                 (trie--do-wildcard-search
-;;;                  node (trie--seq-append seq (trie--node-split node))
-;;;                  pattern rankfun maxnum reverse
-;;;                  (1+ idx) group-stack groups
-;;;                  comparison-function lookupfun mapfun)
-;;;                 ;; not using up *
-;;;                 (trie--do-wildcard-search
-;;;                  node (trie--seq-append seq (trie--node-split node))
-;;;                  (cons ?* pattern) rankfun maxnum reverse
-;;;                  (1+ idx) group-stack groups
-;;;                  comparison-function lookupfun mapfun)))
-;;;             (trie--node-subtree node)))
-
-       ;; ? wildcard: map over all child nodes
-       ((trie--wildcard-?-p token)
-       (funcall mapfun
-                (lambda (node)
-                  ;; skip data nodes (note: if we wanted to implement a "0
-                  ;; or 1" wildcard, would accumulate these instead)
-                  (unless (trie--node-data-p node)
-                    (trie--do-wildcard-search
-                     node (trie--seq-append seq (trie--node-split node))
-                     pattern rankfun maxnum reverse
-                     (1+ idx) group-stack groups
-                     comparison-function lookupfun mapfun)
-                    ))
-                (trie--node-subtree node)
-                (if maxnum reverse (not reverse))))
-
-       ;; character alternative: descend to corresponding nodes in turn
-       ((trie--wildcard-char-alt-p token)
-       (let (n)
-         (mapc
-          (lambda (c)
-            (when (setq n (funcall lookupfun (trie--node-subtree node)
-                                   (trie--node-create-dummy c)))
-              (trie--do-wildcard-search
-               n (trie--seq-append seq c)
-               pattern rankfun maxnum reverse
-               (1+ idx) group-stack groups
-               comparison-function lookupfun mapfun)))
-          (if rankfun token
-            (sort token (if (or (and maxnum reverse)  ; no xnor in Elisp!
-                                (and (not maxnum) (not reverse)))
-                            (lambda (a b)
-                              (not (funcall comparison-function a b)))
-                          comparison-function))))))
-
-       ;; negated character alternative: map over all child nodes, skipping
-       ;;                                excluded ones
-       ((trie--wildcard-neg-char-alt-p token)
-       (funcall mapfun
-                (lambda (node)
-                  ;; skip data nodes (note: if we wanted to implement a "0 or
-                  ;; 1" wildcard, would need to accumulate these instead)
-                  (unless (or (trie--node-data-p node)
-                              (catch 'excluded
-                                (dolist (c (butlast token))  ; drop final ^
-                                  (when (eq c (trie--node-split node))
-                                    (throw 'excluded t)))))
-                    (trie--do-wildcard-search
-                     node (trie--seq-append seq (trie--node-split node))
-                     pattern rankfun maxnum reverse
-                     (1+ idx) group-stack groups
-                     comparison-function lookupfun mapfun)
-                    ))
-                (trie--node-subtree node)
-                (if maxnum reverse (not reverse))))
-       ))))
-
-
-
-;;; ------------------------------------------------------------------
-;;;                      wildcard stack
-
-;; FIXME: using a defstruct instead of these macros causes *very* weird
-;;        bugs...why?!?!?!!!
-(defmacro trie--wildcard-stack-el-create
-  (seq pattern node idx group-stack groups)
-  `(vector ,seq ,pattern ,node ,idx ,group-stack ,groups))
-
-(defmacro trie--wildcard-stack-el-seq (el) `(aref ,el 0))
-(defmacro trie--wildcard-stack-el-pattern (el) `(aref ,el 1))
-(defmacro trie--wildcard-stack-el-node (el) `(aref ,el 2))
-(defmacro trie--wildcard-stack-el-idx (el) `(aref ,el 3))
-(defmacro trie--wildcard-stack-el-group-stack (el) `(aref ,el 4))
-(defmacro trie--wildcard-stack-el-groups (el) `(aref ,el 5))
-
-;; ;; structure for internal trie-wildcard-stack elements
-;; (defstruct
-;;   (trie--wildcard-stack-el
-;;    (:type vector)
-;;    (:constructor nil)
-;;    (:constructor trie--wildcard-stack-el-create
-;;                 (seq pattern node idx group-stack groups))
-;;    (:copier nil))
-;;   seq pattern node idx group-stack groups)
-
-
-
-(defun trie--wildcard-stack-construct-store
-  (trie pattern &optional reverse)
-  ;; Construct store for wildcard stack based on TRIE.
-  ;; FIXME: the test for a list of patterns, below, will fail if the PATTERN
-  ;;        sequence is a list, and the first element of PATTERN is itself a
-  ;;        list (there might be no easy way to fully fix this...)
-  (unless (or (atom pattern)
-             (and (listp pattern)
-                  (not (sequencep (car pattern)))))
-    (error "Multiple pattern searches are not currently supported by\
- trie-wildcard-stack's"))
-  (let ((comparison-function (trie--comparison-function trie))
-       (seq (cond ((stringp pattern) "") ((listp pattern) ()) (t [])))
-       cmpfun store)
-    (setq cmpfun (if reverse
-                    `(lambda (a b) (,comparison-function b a))
-                  comparison-function)
-         store (list
-                (trie--wildcard-stack-el-create
-                 seq (trie--wildcard-next-token (append pattern nil) cmpfun)
-                 (trie--root trie) 0 nil nil)))
-    (message "init seq: %s" (trie--wildcard-stack-el-seq (car store)))
-    (trie--wildcard-stack-repopulate
+
+(defun trie--regexp-stack-construct-store (trie regexp &optional reverse)
+  ;; Construct store for regexp stack based on TRIE.
+  (let ((seq (cond ((stringp regexp) "") ((listp regexp) ()) (t [])))
+       store)
+    (push (list seq (trie--root trie) (tNFA-from-regexp regexp) 0)
+         store)
+    (trie--regexp-stack-repopulate
      store reverse
      (trie--comparison-function trie)
      (trie--lookupfun trie)
@@ -2390,268 +1706,78 @@ non-terminal * wildcards are not supported"))
      (trie--stack-emptyfun trie))))
 
 
-
-(defun trie--wildcard-stack-repopulate
+(defun trie--regexp-stack-repopulate
   (store reverse comparison-function lookupfun
         stack-createfun stack-popfun stack-emptyfun)
   ;; Recursively push matching children of the node at the head of STORE onto
-  ;; the front of STORE, until a data node is reached. Sort in (reverse)
-  ;; lexical order if REVERSE is nil (non-nil). The remaining arguments should
-  ;; be the corresponding trie functions (note that COMPARISON-FUNCTION should
-  ;; be the trie--comparison-function, *not* the trie--cmpfun)
-  (let (seq pattern token node idx group-stack groups cmpfun)
-    (setq cmpfun (if reverse
-                    `(lambda (a b) (,comparison-function b a))
-                  comparison-function))
-    (catch 'done
-      (while t
-       ;; nothing to do if stack is empty
-       (unless store (throw 'done nil))
-       ;; wildcard stack elements (other than the final matches, which are
-       ;; of course cons cells containing matching keys and their
-       ;; associated data) are lists containing: the sequence corresponding
-       ;; to the stack element, the index of the last matched character,
-       ;; the remaining pattern to search for, and the node at which to
-       ;; start searching
-       (setq seq         (trie--wildcard-stack-el-seq (car store))
-             pattern     (trie--wildcard-stack-el-pattern (car store))
-             node        (trie--wildcard-stack-el-node (car store))
-             idx         (trie--wildcard-stack-el-idx (car store))
-             group-stack (trie--wildcard-stack-el-group-stack (car store))
-             groups      (trie--wildcard-stack-el-groups (car store))
-             token   (nth 0 pattern)
-             pattern (nth 1 pattern)
-             store (cdr store))
-       (cond
-
-        ;; empty pattern: look for data node
-        ((null token)
-         (unless (null group-stack)
-           (error "Syntax error in trie wildcard pattern: missing \")\""))
-         ;; if we find one, push match onto stack and we're done
-         (when (setq node (trie--find-data-node node lookupfun))
-           (setq groups
-                 (sort (copy-sequence groups)
-                       (lambda (a b)
-                         (or (< (car a) (car b))
-                             (and (= (car a) (car b))
-                                  (> (cdr a) (cdr b)))))))
-           (push (cons (if groups (cons seq groups) seq)
-                       (trie--node-data node)) store)
-           (throw 'done store)))
-
-        ;; start group (: add current character index to pending groups
-        ((trie--wildcard-group-start-p token)
-         (dotimes (i (trie--wildcard-group-count token))
-           (push idx group-stack))
-         (push
-          (trie--wildcard-stack-el-create
-           seq (trie--wildcard-next-token pattern cmpfun)
-           node idx group-stack groups)
-          store))
-
-        ;; end group ): add current character index to pending groups
-        ((trie--wildcard-group-end-p token)
-         (dotimes (i (trie--wildcard-group-count token))
-           (if (null group-stack)
-               (error "Syntax error in trie wildcard pattern: missing \"(\"")
-             (push (cons (pop group-stack) idx) groups)))
-         (push
-          (trie--wildcard-stack-el-create
-           seq (trie--wildcard-next-token pattern cmpfun)
-           node idx group-stack groups)
-          store))
-
-        ;; literal string: descend to corresponding node and continue
-        ((trie--wildcard-literal-p token)
-         (setq node (trie--node-find node token lookupfun))
-         ;; if we found node corresponding to string, push that node onto
-         ;; the stack (otherwise, current branch of search as failed)
-         (when node
-           (push (trie--wildcard-stack-el-create
-                  (trie--seq-concat seq token)
-                  (trie--wildcard-next-token pattern cmpfun)
-                  node (+ idx (length token)) group-stack groups)
-                 store)))
-
-        ;; terminal *: standard repopulation using everything below node
-        ((and (trie--wildcard-*-p token)
-              (catch 'not-group
-                (dolist (tok pattern)
-                  (unless (eq tok ?\)) (throw 'not-group nil)))
-                t))
-         ;; if starting a new * wildcard, push a node stack onto the stack
-         (if (trie--node-p node)
-             (push (trie--wildcard-stack-el-create
-                    seq pattern
-                    (funcall stack-createfun
-                             (trie--node-subtree node) reverse)
-                    idx group-stack groups)
-                   store)
-           ;; otherwise, push node stack back onto the stack
-           (push (trie--wildcard-stack-el-create
-                  seq pattern node idx group-stack groups)
-                 store))
-         (let ((stack (trie--wildcard-stack-el-node (car store))))
-           ;; get first node from wildcard node stack
-           (setq node (funcall stack-popfun stack))
-           (when (funcall stack-emptyfun stack)
-             (setq store (cdr store)))
-           ;; recursively push node stacks for child node (then its child,
-           ;; grandchild, etc.) onto the stack until we find a data node
-           (while (not (trie--node-data-p node))
-             (push
-              (trie--wildcard-stack-el-create
-               (trie--seq-append seq (trie--node-split node))
-               pattern
-               (funcall stack-createfun (trie--node-subtree node) reverse)
-               (1+ idx) group-stack groups)
-              store)
-             (setq seq (trie--wildcard-stack-el-seq (car store))
-                   pattern (trie--wildcard-stack-el-pattern (car store))
-                   stack (trie--wildcard-stack-el-node (car store))
-                   idx (trie--wildcard-stack-el-idx (car store))
-                   group-stack (trie--wildcard-stack-el-group-stack
-                                (car store))
-                   groups (trie--wildcard-stack-el-groups (car store))
-                   node (funcall stack-popfun stack))
-             (when (funcall stack-emptyfun stack)
-               (setq store (cdr store))))
-           ;; add completed groups to list
-           (when pattern
-             (setq pattern (trie--wildcard-next-token pattern))
-             (dotimes (i (trie--wildcard-group-count token))
-               (if (null group-stack)
-                   (error "Syntax error in trie wildcard pattern:\
- missing \"(\"")
-                 (push (cons (pop group-stack) idx) groups)))
-             (unless (null group-stack)
-               (error "Syntax error in trie wildcard pattern:\
- missing \")\"")))
-           ;; sort group list
-           (setq groups
-                 (sort (copy-sequence groups)
-                       (lambda (a b)
-                         (or (< (car a) (car b))
-                             (and (= (car a) (car b))
-                                  (> (cdr a) (cdr b)))))))
-           ;; push result onto stack and we're done
-           (push (cons (if groups (cons seq groups) seq)
-                       (trie--node-data node)) store)
-           (throw 'done store)))
-
-        ;; non-terminal *: not supported for efficiency reasons
-        ((trie--wildcard-*-p token)
-         (error "Syntax error in trie wildcard pattern:\
-non-terminal * wildcards are not supported"))
-
-        ;; ? wildcard: push wildcard node stack onto stack and repopulate
-        ;;             again
-        ((trie--wildcard-?-p token)
-         ;; if we're starting a new ? wildcard, push a node stack onto the
-         ;; stack
-         (if (trie--node-p node)
-             (push (trie--wildcard-stack-el-create
-                    seq pattern
-                    (funcall stack-createfun
-                             (trie--node-subtree node) reverse)
-                    idx group-stack groups)
-                   store)
-           ;; otherwise, push node stack back onto stack
-           (push (trie--wildcard-stack-el-create
-                  seq pattern node idx group-stack groups)
-                 store))
-         ;; get node stack
-         (let ((stack (trie--wildcard-stack-el-node (car store))))
-           ;; get first non-data node from wildcard node stack
-           (setq node (funcall stack-popfun stack))
-           (when (and node (trie--node-data-p node))
-             (setq node (funcall stack-popfun stack)))
-           ;; if wildcard node stack is exhausted, remove it from the stack
-           (when (funcall stack-emptyfun stack)
-             (setq store (cdr store)))
-           ;; push new non-data node onto the stack
-           (when node
-             (push
-              (trie--wildcard-stack-el-create
-               (trie--seq-append seq (trie--node-split node))
-               (trie--wildcard-next-token pattern cmpfun)
-               node (1+ idx) group-stack groups)
-              store))))
-
-        ;; character alternative: push next matching node onto stack and
-        ;;                        repopulate again
-        ((trie--wildcard-char-alt-p token)
-         ;; push node back onto the stack
-         (push (trie--wildcard-stack-el-create
-                seq pattern node idx group-stack groups)
-               store)
-         (let ((c (pop token)))
-           (while (and c
-                       (not (setq node
-                                  (funcall lookupfun
-                                           (trie--node-subtree node)
-                                           (trie--node-create-dummy c)))))
-             (setq c (pop token)))
-           ;; if we've exhausted all characters in the alternative, remove it
-           ;; from the stack
-           (when (null token) (setq store (cdr store)))
-           ;; if we found a match, push matching node onto stack
-           (when node
-             (push
-              (trie--wildcard-stack-el-create
-               (trie--seq-append seq (trie--node-split node))
-               (trie--wildcard-next-token pattern cmpfun)
-               node (1+ idx) group-stack groups)
-              store))))
-
-        ;; negated character alternative: push next non-excluded node onto
-        ;;                                stack and repopulate again
-        ((trie--wildcard-neg-char-alt-p token)
-         ;; if we're starting a new negated character alternative, push a
-         ;; node stack onto the stack
-         (if (trie--node-p node)
-             (push (trie--wildcard-stack-el-create
-                    seq pattern
-                    (funcall stack-createfun
-                             (trie--node-subtree node) reverse)
-                    idx group-stack groups)
-                   store)
-           ;; otherwise, push wildcard node stack back onto the stack
-           (push (trie--wildcard-stack-el-create
-                  seq pattern node idx group-stack groups)
-                 store))
-         ;; get wildcard node stack
-         (let ((stack (trie--wildcard-stack-el-node (car store))))
-           ;; pop nodes from wildcard node stack until we find one that
-           ;; isn't excluded
-           (setq node (funcall stack-popfun stack))
-           (while (and node
-                       (catch 'excluded
-                         (dolist (c (butlast token)) ; drop final ^
-                           (when (eq (trie--node-split node) c)
-                             (throw 'excluded t)))))
-             (setq node (funcall stack-popfun stack)))
-           ;; if wildcard node stack is exhausted, remove it from the stack
-           (when (funcall stack-emptyfun stack)
-             (setq store (cdr store)))
-           ;; if we found match, push node onto stack
-           (when node
-             (push
-              (trie--wildcard-stack-el-create
-               (trie--seq-append seq (trie--node-split node))
-               (trie--wildcard-next-token pattern cmpfun)
-               node (1+ idx) group-stack groups)
-              store)))))
-
-       )))  ; end of infinite loop and catches
-  store)  ; return repopulated store
-
-
-
-
-;; ================================================================
-;;                        Regexp search
+  ;; STORE, until a data node is reached. REVERSE is the usual query argument,
+  ;; and the remaining arguments are the corresponding trie functions.
+  (let (state seq node pos groups n s)
+    (while
+       (progn
+         (setq pos (pop store)
+               seq (nth 0 pos)
+               node (nth 1 pos)
+               state (nth 2 pos)
+               pos (nth 3 pos))
+         (cond
+          ;; if stack is empty, we're done
+          ((null node) nil)
+
+          ;; if stack element is a trie node...
+          ((trie--node-p node)
+           (cond
+            ;; matching data node: add data to the stack and we're done
+            ((trie--node-data-p node)
+             (when (tNFA-match-p state)
+               (setq groups (tNFA-group-data state))
+               (push (cons (if groups (cons groups seq) seq)
+                           (trie--node-data node))
+                     store))
+             nil)  ; return nil to exit loop
+
+            ;; wildcard transition: add new node stack
+            ((tNFA-wildcard-p state)
+             (push (list seq
+                         (funcall stack-createfun
+                                  (trie--node-subtree node) reverse)
+                         state pos)
+                   store))
+
+            (t ;; non-wildcard transition: add all possible next nodes
+             (dolist (chr (sort (tNFA-transitions state)
+                                (if reverse
+                                    comparison-function
+                                  `(lambda (a b)
+                                     (,comparison-function b a)))))
+               (when (and (setq n (trie--node-find
+                                   node (vector chr) lookupfun))
+                          (setq s (tNFA-next-state state chr pos)))
+                 (push (list (trie--seq-append seq chr) n s (1+ pos))
+                       store)))
+             t)))  ; return t to keep looping
+
+          ;; otherwise, stack element is a node stack...
+          (t
+           ;; if node stack is empty, dump it and keep repopulating
+           (if (funcall stack-emptyfun node)
+               t  ; return t to keep looping
+             ;; otherwise, add node stack back, and add next node from stack
+             (push (list seq node state pos) store)
+             (setq node (funcall stack-popfun node)
+                   state (tNFA-next-state state (trie--node-split node) pos))
+             (when state
+               ;; matching data node: add data to the stack and we're done
+               (if (trie--node-data-p node)
+                   (progn
+                     (push (cons seq (trie--node-data node)) store)
+                     nil)  ; return nil to exit loop
+                 ;; normal node: add it to the stack and keep repopulating
+                 (push (list (trie--seq-append seq (trie--node-split node))
+                             node state (1+ pos))
+                       store)))))
+          ))))
+  store)
 
 
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]