emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/trie 1eb515f 078/111: Implement trie fuzzy match and co


From: Stefan Monnier
Subject: [elpa] externals/trie 1eb515f 078/111: Implement trie fuzzy match and completion stacks.
Date: Mon, 14 Dec 2020 11:35:24 -0500 (EST)

branch: externals/trie
commit 1eb515f073c87de7e7a020ba69eb635a3147a2a8
Author: Toby S. Cubitt <toby-predictive@dr-qubit.org>
Commit: Toby S. Cubitt <toby-predictive@dr-qubit.org>

    Implement trie fuzzy match and completion stacks.
---
 trie.el | 430 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 361 insertions(+), 69 deletions(-)

diff --git a/trie.el b/trie.el
index 0cf1dd8..1c50557 100644
--- a/trie.el
+++ b/trie.el
@@ -99,12 +99,12 @@
 ;; efficienct insertion operations, and less efficient deletion
 ;; operations. Splay trees give good average-case complexity and are simpler
 ;; to implement than AVL or red-black trees (which can mean they're faster in
-;; practice!), at the expense of poor worst-case complexity.
+;; practice), at the expense of poor worst-case complexity.
 ;;
 ;; If your tries are going to be static (i.e. created once and rarely
 ;; modified), then using perfectly balanced binary search trees might be
 ;; appropriate. Perfectly balancing the binary trees is very inefficient, but
-;; it only has to be when the trie is first created or modified. Lookup
+;; it only has to be done when the trie is first created or modified. Lookup
 ;; operations will then be as efficient as possible for ternary search trees,
 ;; and the implementation will also be simpler (so probably faster) than a
 ;; self-balancing tree, without the space and time overhead required to keep
@@ -1041,9 +1041,9 @@ bind any variables with names commencing \"--\"."
              &aux
              (comparison-function (trie--comparison-function trie))
              (lookupfun (trie--lookupfun trie))
-             (stack-createfun (trie--stack-createfun trie))
-             (stack-popfun (trie--stack-popfun trie))
-             (stack-emptyfun (trie--stack-emptyfun trie))
+             (stackcreatefun (trie--stack-createfun trie))
+             (stackpopfun (trie--stack-popfun trie))
+             (stackemptyfun (trie--stack-emptyfun trie))
              (repopulatefun #'trie--stack-repopulate)
              (store
               (if (trie-empty trie)
@@ -1054,12 +1054,12 @@ bind any variables with names commencing \"--\"."
                               ((eq type 'string) "")
                               (t []))
                         (funcall
-                         stack-createfun
+                         stackcreatefun
                          (trie--node-subtree (trie--root trie))
                          reverse)))
                  reverse
                  comparison-function lookupfun
-                 stack-createfun stack-popfun stack-emptyfun)))
+                 stackcreatefun stackpopfun stackemptyfun)))
              (pushed '())
              ))
            (:constructor
@@ -1070,9 +1070,9 @@ bind any variables with names commencing \"--\"."
              &aux
              (comparison-function (trie--comparison-function trie))
              (lookupfun (trie--lookupfun trie))
-             (stack-createfun (trie--stack-createfun trie))
-             (stack-popfun (trie--stack-popfun trie))
-             (stack-emptyfun (trie--stack-emptyfun trie))
+             (stackcreatefun (trie--stack-createfun trie))
+             (stackpopfun (trie--stack-popfun trie))
+             (stackemptyfun (trie--stack-emptyfun trie))
              (repopulatefun #'trie--stack-repopulate)
              (store (trie--completion-stack-construct-store
                      trie prefix reverse))
@@ -1086,17 +1086,49 @@ bind any variables with names commencing \"--\"."
              &aux
              (comparison-function (trie--comparison-function trie))
              (lookupfun (trie--lookupfun trie))
-             (stack-createfun (trie--stack-createfun trie))
-             (stack-popfun (trie--stack-popfun trie))
-             (stack-emptyfun (trie--stack-emptyfun trie))
+             (stackcreatefun (trie--stack-createfun trie))
+             (stackpopfun (trie--stack-popfun trie))
+             (stackemptyfun (trie--stack-emptyfun trie))
              (repopulatefun #'trie--regexp-stack-repopulate)
              (store (trie--regexp-stack-construct-store
                      trie regexp reverse))
              (pushed '())
              ))
+           (:constructor
+            trie--fuzzy-match-stack-create
+            (trie string distance
+             &optional
+             reverse
+             &aux
+             (comparison-function (trie--comparison-function trie))
+             (lookupfun (trie--lookupfun trie))
+             (stackcreatefun (trie--stack-createfun trie))
+             (stackpopfun (trie--stack-popfun trie))
+             (stackemptyfun (trie--stack-emptyfun trie))
+             (repopulatefun #'trie--fuzzy-match-stack-repopulate)
+             (store (trie--fuzzy-match-stack-construct-store
+                     trie string distance reverse))
+             (pushed '())
+             ))
+           (:constructor
+            trie--fuzzy-completion-stack-create
+            (trie prefix distance
+             &optional
+             reverse
+             &aux
+             (comparison-function (trie--comparison-function trie))
+             (lookupfun (trie--lookupfun trie))
+             (stackcreatefun (trie--stack-createfun trie))
+             (stackpopfun (trie--stack-popfun trie))
+             (stackemptyfun (trie--stack-emptyfun trie))
+             (repopulatefun #'trie--fuzzy-completion-stack-repopulate)
+             (store (trie--fuzzy-completion-stack-construct-store
+                     trie prefix distance reverse))
+             (pushed '())
+             ))
            (:copier nil))
   reverse comparison-function lookupfun
-  stack-createfun stack-popfun stack-emptyfun
+  stackcreatefun stackpopfun stackemptyfun
   repopulatefun store pushed)
 
 
@@ -1154,9 +1186,9 @@ element stored in the trie.)"
                       (trie--stack-reverse trie-stack)
                       (trie--stack-comparison-function trie-stack)
                       (trie--stack-lookupfun trie-stack)
-                      (trie--stack-stack-createfun trie-stack)
-                      (trie--stack-stack-popfun trie-stack)
-                      (trie--stack-stack-emptyfun trie-stack)))))))
+                      (trie--stack-stackcreatefun trie-stack)
+                      (trie--stack-stackpopfun trie-stack)
+                      (trie--stack-stackemptyfun trie-stack)))))))
 
 
 (defun trie-stack-push (element trie-stack)
@@ -1203,19 +1235,17 @@ element stored in the trie.)"
     (let ((node (funcall stack-popfun (cdar store)))
          (seq (caar store)))
       (when (funcall stack-emptyfun (cdar store))
-       ;; (pop store) here produces irritating compiler warnings
+       ;; using (pop store) here produces irritating compiler warnings
        (setq store (cdr store)))
 
       (while (not (trie--node-data-p node))
        (push
         (cons (trie--seq-append seq (trie--node-split node))
-              (funcall stack-createfun
-                       (trie--node-subtree node) reverse))
+              (funcall stack-createfun (trie--node-subtree node) reverse))
         store)
        (setq node (funcall stack-popfun (cdar store))
              seq (caar store))
        (when (funcall stack-emptyfun (cdar store))
-         ;; (pop store) here produces irritating compiler warnings
          (setq store (cdr store))))
 
       (push (cons seq (trie--node-data node)) store))))
@@ -1921,11 +1951,19 @@ order \(i.e. the order defined by the trie's comparison
 function\). If REVERSE is non-nil, the results are sorted in the
 reverse order. Returns nil if no results are found.
 
+Returns a list of matches, with elements of the form:
+
+    (KEY DIST . DATA)
+
+where KEY is a matching key from the trie, DATA its associated
+data, and DIST is its Lewenstein distance \(edit distance\) from
+STRING.
+
 STRING is a sequence (vector, list or string), whose elements are
 of the same type as elements of the trie keys. If STRING is a
 string, it must be possible to apply `string' to individual
-elements of the keys stored in the trie. The matches returned in
-the alist will be sequences of the same type as STRING.
+elements of the keys stored in the trie. The KEYs returned in the
+list will be sequences of the same type as STRING.
 
 DISTANCE must be an integer.
 
@@ -1934,7 +1972,7 @@ first MAXNUM matches. Otherwise, all matches are returned.
 
 RANKFUN overrides the default ordering of the results. If it is
 `t', matches are instead ordered by increasing Lewenstein
-distance of their prefix \(with same-distance matches ordered
+distance \(with same-distance matches ordered
 lexicographically\).
 
 If RANKFUN is a function, it must accept two arguments, both of
@@ -2011,20 +2049,9 @@ of the default key-dist-data list."
                           (trie--node-data node))))
 
     ;; build next row of Lewenstein table
-    (let ((next-row (make-vector (length row) nil)))
-      (let ((i 0) inscost delcost subcost)
-       (aset next-row 0 (1+ (aref row 0)))
-       (while (< (incf i) (length row))
-         (setq inscost (1+ (aref next-row (1- i)))
-               delcost (1+ (aref row i))
-               subcost (if (funcall equalfun
-                                    (trie--node-split node)
-                                    (elt string (1- i)))
-                           (aref row (1- i))
-                         (1+ (aref row (1- i)))))
-         (aset next-row i (min inscost delcost subcost))))
-      (setq row next-row))
-    (setq seq (trie--seq-append seq (trie--node-split node)))
+    (setq row (trie--Lewenstein-next-row
+              row string (trie--node-split node) equalfun)
+         seq (trie--seq-append seq (trie--node-split node)))
 
     ;; as long as some row entry is < DISTANCE, recursively search below NODE
     (when (< (apply #'min (append row nil)) distance)
@@ -2037,21 +2064,159 @@ of the default key-dist-data list."
               reverse))))
 
 
+(defun trie--Lewenstein-next-row (row string chr equalfun)
+  ;; Compute next row of Lewenstein distance matrix.
+  (let ((next-row (make-vector (length row) nil))
+       (i 0) inscost delcost subcost)
+    (aset next-row 0 (1+ (aref row 0)))
+    (while (< (incf i) (length row))
+      (setq inscost (1+ (aref next-row (1- i)))
+           delcost (1+ (aref row i))
+           subcost (if (funcall equalfun chr (elt string (1- i)))
+                       (aref row (1- i))
+                     (1+ (aref row (1- i)))))
+      (aset next-row i (min inscost delcost subcost)))
+    next-row))
+
+
+
+(defun trie-fuzzy-match-stack (trie string distance &optional reverse)
+  "Return an object that allows fuzzy matches to be accessed
+as if they were a stack.
+
+The stack is sorted in \"lexicographic\" order, i.e. the order
+defined by TRIE's comparison function, or in reverse order if
+REVERSE is non-nil. Calling `trie-stack-pop' pops the top element
+from the stack. Each stack element has the form:
+
+    (KEY DIST . DATA)
+
+where KEY is a matching key from the trie, DATA its associated
+data, and DIST is its Lewenstein distance \(edit distance\) from
+STRING.
+
+STRING is a sequence (vector, list or string), whose elements are
+of the same type as elements of the trie keys. If STRING is a
+string, it must be possible to apply `string' to individual
+elements of the keys stored in the trie. The KEYs in the matches
+returned by `trie-stack-pop' will be sequences of the same type
+as STRING.
+
+DISTANCE is an integer. The fuzzy matches in the stack will be
+within Lewenstein distance \(edit distance\) DISTANCE of STRING."
+
+  ;; convert trie from print-form if necessary
+  (trie-transform-from-read-warn trie)
+  ;; if stack functions aren't defined for trie type, throw error
+  (if (not (functionp (trie--stack-createfun trie)))
+      (error "Trie type does not support stack operations")
+    ;; otherwise, create and initialise a fuzzy stack
+    (trie--fuzzy-match-stack-create trie string distance reverse)))
+
+
+(defun trie--fuzzy-match-stack-construct-store
+    (trie string distance &optional reverse)
+  ;; Construct store for fuzzy stack based on TRIE.
+  (let ((seq (cond ((stringp string) "") ((listp string) ()) (t [])))
+       store)
+    (push (list seq
+               (funcall (trie--stack-createfun trie)
+                        (trie--node-subtree (trie--root trie))
+                        reverse)
+               string distance
+               (apply #'vector (number-sequence 0 (length string))))
+         store)
+    (trie--fuzzy-match-stack-repopulate
+     store reverse
+     (trie--comparison-function trie)
+     (trie--lookupfun trie)
+     (trie--stack-createfun trie)
+     (trie--stack-popfun trie)
+     (trie--stack-emptyfun trie))))
+
+
+(defun trie--fuzzy-match-stack-repopulate
+  (store reverse comparison-function _lookupfun
+        stack-createfun stack-popfun stack-emptyfun)
+  ;; Recursively push matching children of the node at the head of STORE
+  ;; onto STORE, until a data node is reached. REVERSE is the usual
+  ;; query argument, and the remaining arguments are the corresponding
+  ;; trie functions.
+
+  (when store
+    (let ((equalfun (trie--construct-equality-function comparison-function))
+         nextrow)
+
+      (destructuring-bind (seq node string distance row) (car store)
+       (setq node (funcall stack-popfun node))
+       (when (funcall stack-emptyfun (nth 1 (car store)))
+         ;; using (pop store) here produces irritating compiler warnings
+         (setq store (cdr store)))
+
+       ;; push children of node at head of store that are within DISTANCE of
+       ;; STRING, until we find a data node where entire SEQ is within
+       ;; DISTANCE of STRING (i.e. last entry of row is <= DISTANCE)
+       (while (and node
+                   (not (and (trie--node-data-p node)
+                             (<= (aref row (1- (length row))) distance))))
+         ;; drop data nodes whose SEQ is greater than DISTANCE
+         (unless (trie--node-data-p node)
+           (setq nextrow (trie--Lewenstein-next-row
+                          row string (trie--node-split node) equalfun))
+           ;; push children of non-data nodes whose SEQ is less than DISTANCE
+           ;; onto stack
+           (when (< (apply #'min (append row nil)) distance)
+             (push
+              (list (trie--seq-append seq (trie--node-split node))
+                    (funcall stack-createfun
+                             (trie--node-subtree node) reverse)
+                    string distance nextrow)
+              store)))
+         ;; get next node from stack
+         (when (setq node (car store))
+           (setq seq (nth 0 node)
+                 string (nth 2 node)
+                 distance (nth 3 node)
+                 row (nth 4 node)
+                 node (funcall stack-popfun (nth 1 node)))
+           ;; drop head of stack if nodes are exhausted
+           (when (funcall stack-emptyfun (nth 1 (car store)))
+             (setq store (cdr store)))))
+
+       ;; push next fuzzy match onto head of stack
+       (when node
+         (push (cons seq (cons (aref row (1- (length row)))
+                               (trie--node-data node)))
+               store))))))
+
+
+
+
+;; ================================================================
+;;                        Fuzzy completing
 
 (defun trie-fuzzy-complete
   (trie prefix distance &optional rankfun maxnum reverse filter resultfun)
-  "Return matches for PREFIX in TRIE within Lewenstein DISTANCE
-\(edit distance\) of PREFIX along with their associated data, in
-the order defined by RANKFUN, defaulting to \"lexicographic\"
-order \(i.e. the order defined by the trie's comparison
-function\). If REVERSE is non-nil, the results are sorted in the
-reverse order. Returns nil if no results are found.
+  "Return completions of prefixes within Lewenstein DISTANCE of PREFIX
+along with their associated data, in the order defined by
+RANKFUN, defaulting to \"lexicographic\" order \(i.e. the order
+defined by the trie's comparison function\). If REVERSE is
+non-nil, the results are sorted in the reverse order. Returns nil
+if no results are found.
+
+Returns a list of completions, with elements of the form:
+
+    (KEY DIST . DATA)
+
+where KEY is a matching completion from the trie, DATA its
+associated data, and DIST is its Lewenstein distance \(edit
+distance\) from STRING.
 
 PREFIX is a sequence (vector, list or string), whose elements are
 of the same type as elements of the trie keys. If PREFIX is a
 string, it must be possible to apply `string' to individual
-elements of the keys stored in the trie. The matches returned in
-the alist will be sequences of the same type as PREFIX.
+elements of the keys stored in the trie. The KEYs returned in the
+list will be sequences of the same type as PREFIX.
 
 DISTANCE must be an integer.
 
@@ -2063,7 +2228,7 @@ RANKFUN overrides the default ordering of the results. If 
it is
 distance of their prefix \(with same-distance matches ordered
 lexicographically\).
 
-If RANKFUN is a function, it must accepts two arguments, both of
+If RANKFUN is a function, it must accept two arguments, both of
 the form:
 
     (KEY DIST . DATA)
@@ -2129,7 +2294,7 @@ of the default key-dist-data list."
   ;; minimum distance of any prefix of seq. Remaining arguments are
   ;; corresponding trie functions.
 
-  ;; if we're at a data node and SEQ is within DISTANCE of STRING (i.e. last
+  ;; if we're at a data node and SEQ is within DISTANCE of PREFIX (i.e. last
   ;; entry of row is <= DISTANCE), accumulate result
   (if (trie--node-data-p node)
       (when (<= (aref row (1- (length row))) distance)
@@ -2138,21 +2303,10 @@ of the default key-dist-data list."
                           (trie--node-data node))))
 
     ;; build next row of Lewenstein table
-    (let ((next-row (make-vector (length row) nil)))
-      (let ((i 0) inscost delcost subcost)
-       (aset next-row 0 (1+ (aref row 0)))
-       (while (< (incf i) (length row))
-         (setq inscost (1+ (aref next-row (1- i)))
-               delcost (1+ (aref row i))
-               subcost (if (funcall equalfun
-                                    (trie--node-split node)
-                                    (elt prefix (1- i)))
-                           (aref row (1- i))
-                         (1+ (aref row (1- i)))))
-         (aset next-row i (min inscost delcost subcost))))
-      (setq row next-row))
-    (setq seq (trie--seq-append seq (trie--node-split node)))
-    (setq pfxcost (min pfxcost (aref row (1- (length row)))))
+    (setq row (trie--Lewenstein-next-row
+              row prefix (trie--node-split node) equalfun)
+         seq (trie--seq-append seq (trie--node-split node))
+         pfxcost (min pfxcost (aref row (1- (length row)))))
 
     ;; as long as some row entry is < DISTANCE, recursively search below NODE
     (if (< (apply #'min (append row nil)) distance)
@@ -2163,19 +2317,157 @@ of the default key-dist-data list."
                    cmpfun equalfun lookupfun mapfun accumulator))
                 (trie--node-subtree node)
                 reverse)
-    ;; otherwise, accumulate all results below node
-    (if (<= (aref row (1- (length row))) distance)
+
+      ;; otherwise, if we've found a prefix within DISTANCE of PREFIX,
+      ;; accumulate all completions below node
+      (when (<= (aref row (1- (length row))) distance)
        (trie--mapc
         (lambda (n s)
           (funcall accumulator
-                   s (cons (aref row (1- (length row)))
-                           (trie--node-data n))))
-        mapfun node seq reverse)
-      ))))
+                   s (cons pfxcost (trie--node-data n))))
+        mapfun node seq reverse))
+      )))
+
+
+
+(defun trie-fuzzy-complete-stack (trie prefix distance &optional reverse)
+  "Return an object that allows fuzzy completions to be accessed
+as if they were a stack.
+
+The stack is sorted in \"lexicographic\" order, i.e. the order
+defined by TRIE's comparison function, or in reverse order if
+REVERSE is non-nil. Calling `trie-stack-pop' pops the top element
+from the stack. Each stack element has the form:
 
+    (KEY DIST . DATA)
+
+where KEY is a matching completion from the trie, DATA its
+associated data, and DIST is the Lewenstein distance \(edit
+distance\) from PREFIX of the prefix whose completion is KEY.
+
+PREFIX is a sequence (vector, list or string), whose elements are
+of the same type as elements of the trie keys. If PREFIX is a
+string, it must be possible to apply `string' to individual
+elements of the keys stored in the trie. The KEYs in the stack
+elements will be sequences of the same type as PREFIX.
 
+DISTANCE is an integer. The fuzzy completions in the stack will
+have prefixes within Lewenstein distance \(edit distance\)
+DISTANCE of PREFIX."
+
+  ;; convert trie from print-form if necessary
+  (trie-transform-from-read-warn trie)
+  ;; if stack functions aren't defined for trie type, throw error
+  (if (not (functionp (trie--stack-createfun trie)))
+      (error "Trie type does not support stack operations")
+    ;; otherwise, create and initialise a fuzzy stack
+    (trie--fuzzy-completion-stack-create trie prefix distance reverse)))
 
 
+(defun trie--fuzzy-completion-stack-construct-store
+    (trie prefix distance &optional reverse)
+  ;; Construct store for fuzzy completion stack based on TRIE.
+  (let ((seq (cond ((stringp prefix) "") ((listp prefix) ()) (t [])))
+       store)
+    (push (list seq
+               (funcall (trie--stack-createfun trie)
+                        (trie--node-subtree (trie--root trie))
+                        reverse)
+               prefix distance
+               (apply #'vector (number-sequence 0 (length prefix)))
+               (length prefix))
+         store)
+    (trie--fuzzy-completion-stack-repopulate
+     store reverse
+     (trie--comparison-function trie)
+     (trie--lookupfun trie)
+     (trie--stack-createfun trie)
+     (trie--stack-popfun trie)
+     (trie--stack-emptyfun trie))))
+
+
+(defun trie--fuzzy-completion-stack-repopulate
+  (store reverse comparison-function _lookupfun
+        stack-createfun stack-popfun stack-emptyfun)
+  ;; Recursively push matching children of the node at the head of STORE
+  ;; onto STORE, until a data node is reached. REVERSE is the usual
+  ;; query argument, and the remaining arguments are the corresponding
+  ;; trie functions.
+
+  (when store
+    (let ((equalfun (trie--construct-equality-function comparison-function)))
+
+      (destructuring-bind (seq node prefix distance row pfxcost) (car store)
+       (setq node (funcall stack-popfun node))
+       (when (funcall stack-emptyfun (nth 1 (car store)))
+         ;; using (pop store) here produces irritating compiler warnings
+         (setq store (cdr store)))
+
+       ;; push children of node at head of store that are within DISTANCE of
+       ;; PREFIX, until we either find a data node whose entire SEQ is within
+       ;; DISTANCE of PREFIX (i.e. last entry of row is <= DISTANCE), or
+       ;; we've found a prefix within DISTANCE of PREFIX and are gathering
+       ;; all its completions
+       (while (and node
+                   (not (and (trie--node-data-p node)
+                             (or (eq distance t)  ; completing a prefix
+                                 (<= (aref row (1- (length row))) distance))
+                             )))
+          ;; drop data nodes whose SEQ is greater than DISTANCE
+         (unless (trie--node-data-p node)
+           ;; build next row of Lewenstein table
+           (setq row (trie--Lewenstein-next-row
+                      row prefix (trie--node-split node) equalfun)
+                 seq (trie--seq-append seq (trie--node-split node))
+                 pfxcost (min pfxcost (aref row (1- (length row)))))
+
+           (cond
+            ;; if we're completing a prefix, always push next node onto stack
+            ((eq distance t)
+             (push
+              (list seq
+                    (funcall stack-createfun
+                             (trie--node-subtree node) reverse)
+                    prefix t row pfxcost)
+              store))
+
+            ;; if we've found a prefix within DISTANCE of PREFIX, then
+            ;; everything below node belongs on stack
+            ((<= (aref row (1- (length row))) distance)
+             (push
+              (list seq
+                    (funcall stack-createfun
+                             (trie--node-subtree node) reverse)
+                    ;; t in distance slot indicates completing
+                    prefix t row pfxcost)
+              store))
+
+            ;; if some row entry for non-data node is < DISTANCE, push node
+            ;; onto stack
+            ((< (apply #'min (append row nil)) distance)
+             (push
+              (list seq
+                    (funcall stack-createfun
+                             (trie--node-subtree node) reverse)
+                    prefix distance row pfxcost)
+              store))))
+
+         ;; get next node from stack
+         (when (setq node (car store))
+           (setq seq (nth 0 node)
+                 prefix (nth 2 node)
+                 distance (nth 3 node)
+                 row (nth 4 node)
+                 node (funcall stack-popfun (nth 1 node)))
+           ;; drop head of stack if nodes are exhausted
+           (when (funcall stack-emptyfun (nth 1 (car store)))
+             (setq store (cdr store)))))
+
+
+       ;; push next fuzzy completion onto head of stack
+       (when node
+         (push (cons seq (cons pfxcost (trie--node-data node)))
+               store))))))
 
 
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]