[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/trie 5e8e73f 081/111: Fix data wrapping handling in fuz
From: |
Stefan Monnier |
Subject: |
[elpa] externals/trie 5e8e73f 081/111: Fix data wrapping handling in fuzzy query functions. |
Date: |
Mon, 14 Dec 2020 11:35:25 -0500 (EST) |
branch: externals/trie
commit 5e8e73fd06c54246dc1fb8681a6fabcfaff0ac77
Author: Toby S. Cubitt <toby-predictive@dr-qubit.org>
Commit: Toby S. Cubitt <toby-predictive@dr-qubit.org>
Fix data wrapping handling in fuzzy query functions.
Also, cache fuzzy queries ranked by Lewenstein distance.
---
trie.el | 225 +++++++++++++++++++++++++++++++++++++++++-----------------------
1 file changed, 145 insertions(+), 80 deletions(-)
diff --git a/trie.el b/trie.el
index 69abe6f..41fa9b7 100644
--- a/trie.el
+++ b/trie.el
@@ -272,6 +272,97 @@
(,comparison-function b a))))))
+;; massage rankfun arguments for `trie-regexp-search' results
+(if (trie-lexical-binding-p)
+ (defun trie--wrap-regexp-search-rankfun (rankfun)
+ (lambda (a b)
+ ;; if car of argument contains a key+group list rather than a straight
+ ;; key, remove group list
+ ;; FIXME: the test for straight key, below, will fail if the key is a
+ ;; list, and the first element of the key is itself a list
+ ;; (there might be no easy way to fully fix this...)
+ (unless (or (atom (car a))
+ (and (listp (car a))
+ (not (sequencep (caar a)))))
+ (setq a (cons (caar a) (cdr a))))
+ (unless (or (atom (car b))
+ (and (listp (car b))
+ (not (sequencep (caar b)))))
+ (setq b (cons (caar b) (cdr b))))
+ (funcall rankfun a b)))
+ (defun trie--wrap-regexp-search-rankfun (rankfun)
+ `(lambda (a b)
+ ;; if car of argument contains a key+group list rather than a straight
+ ;; key, remove group list
+ ;; FIXME: the test for straight key, below, will fail if the key is a
+ ;; list, and the first element of the key is itself a list
+ ;; (there might be no easy way to fully fix this...)
+ (unless (or (atom (car a))
+ (and (listp (car a))
+ (not (sequencep (caar a)))))
+ (setq a (cons (caar a) (cdr a))))
+ (unless (or (atom (car b))
+ (and (listp (car b))
+ (not (sequencep (caar b)))))
+ (setq b (cons (caar b) (cdr b))))
+ (,rankfun a b))))
+
+
+(if (trie-lexical-binding-p)
+ (defun trie--wrap-regexp-search-filter (filter)
+ (lambda (seq data)
+ ;; if car of argument contains a key+group list rather than a straight
+ ;; key, remove group list
+ ;; FIXME: the test for straight key, below, will fail if the key is a
+ ;; list, and the first element of the key is itself a list
+ ;; (there might be no easy way to fully fix this...)
+ (unless (or (atom (car seq))
+ (and (listp (car seq))
+ (not (sequencep (caar seq)))))
+ (setq seq (caar seq))
+ ;; call filter on massaged arguments
+ (funcall filter seq data))))
+ (defun trie--wrap-regexp-search-filter (filter)
+ `(lambda (seq data)
+ ;; if car of argument contains a key+group list rather than a straight
+ ;; key, remove group list
+ ;; FIXME: the test for straight key, below, will fail if the key is a
+ ;; list, and the first element of the key is itself a list
+ ;; (there might be no easy way to fully fix this...)
+ (unless (or (atom (car seq))
+ (and (listp (car seq))
+ (not (sequencep (caar seq)))))
+ (setq seq (caar seq))
+ ;; call filter on massaged arguments
+ (,filter seq data)))))
+
+
+;; create Lewenstein rank function from trie comparison function
+(if (trie-lexical-binding-p)
+ (defun trie--construct-Lewenstein-rankfun (comparison-function)
+ (let ((compfun (trie-construct-sortfun comparison-function)))
+ (lambda (a b)
+ (cond
+ ((< (cdar a) (cdar b)) t)
+ ((> (cdar a) (cdar b)) nil)
+ (t (funcall compfun (caar a) (caar b)))))))
+ (defun trie--construct-Lewenstein-rankfun (comparison-function)
+ `(lambda (a b)
+ (cond
+ ((< (cdar a) (cdar b)) t)
+ ((> (cdar a) (cdar b)) nil)
+ (t ,(trie-construct-sortfun comparison-function)
+ (caar a) (caar b))))))
+
+
+;; create Lewenstein rank function from trie comparison function
+(if (trie-lexical-binding-p)
+ (defun trie--wrap-fuzzy-filter (filter)
+ (lambda (match data) (funcall filter (car match) (cdr match) data)))
+ (defun trie--wrap-fuzzy-filter (filter)
+ `(lambda (match data) (,filter (car match) (cdr match) data))))
+
+
;;; ----------------------------------------------------------------
@@ -1648,48 +1739,29 @@ default key-data cons cell."
;; convert trie from print-form if necessary
(trie-transform-from-read-warn trie)
- ;; massage rankfun to cope with grouping data
;; FIXME: could skip this if REGEXP contains no grouping constructs
- ;; FIXME: crazy variable name is not needed with lexical scoping
- (let ((--trie-regexp-search--rankfun rankfun))
- (when rankfun
- (setq rankfun
- (lambda (a b)
- ;; if car of argument contains a key+group list rather than a
- ;; straight key, remove group list
- ;; FIXME: the test for straight key, below, will fail if the key
- ;; is a list, and the first element of the key is itself
- ;; a list (there might be no easy way to fully fix
- ;; this...)
- (unless (or (atom (car a))
- (and (listp (car a))
- (not (sequencep (caar a)))))
- (setq a (cons (caar a) (cdr a))))
- (unless (or (atom (car b))
- (and (listp (car b))
- (not (sequencep (caar b)))))
- (setq b (cons (caar b) (cdr b))))
- ;; call rankfun on massaged arguments
- (funcall --trie-regexp-search--rankfun a b))))
-
- ;; accumulate results
- (trie--accumulate-results
- rankfun maxnum reverse filter resultfun accumulator nil
- (trie--do-regexp-search
- (trie--root trie)
- (tNFA-from-regexp regexp :test (trie--construct-equality-function
- (trie--comparison-function trie)))
- (cond ((stringp regexp) "") ((listp regexp) ()) (t [])) 0
- (or (and maxnum reverse) (and (not maxnum) (not reverse)))
- ;; FIXME: Is this a case where it would pay to replace these arguments
- ;; with dynamically-scoped variables, to save stack space during
- ;; the recursive calls to `trie--do-regexp-search'?
- ;; Alternatively, with lexical scoping, we could use a closure
- ;; for `trie--do-regexp-search' instead of a function.
- (trie--comparison-function trie)
- (trie--lookupfun trie)
- (trie--mapfun trie)
- accumulator))))
+ ;; massage rankfun and filter to cope with grouping data
+ (when rankfun (setq rankfun (trie--wrap-regexp-search-rankfun rankfun)))
+ (when filter (setq filter (trie--wrap-regexp-search-filter filter)))
+
+ ;; accumulate results
+ (trie--accumulate-results
+ rankfun maxnum reverse filter resultfun accumulator nil
+ (trie--do-regexp-search
+ (trie--root trie)
+ (tNFA-from-regexp regexp :test (trie--construct-equality-function
+ (trie--comparison-function trie)))
+ (cond ((stringp regexp) "") ((listp regexp) ()) (t [])) 0
+ (or (and maxnum reverse) (and (not maxnum) (not reverse)))
+ ;; FIXME: Is this a case where it would pay to replace these arguments
+ ;; with dynamically-scoped variables, to save stack space during
+ ;; the recursive calls to `trie--do-regexp-search'? Alternatively,
+ ;; with lexical scoping, we could use a closure for
+ ;; `trie--do-regexp-search' instead of a function.
+ (trie--comparison-function trie)
+ (trie--lookupfun trie)
+ (trie--mapfun trie)
+ accumulator)))
@@ -1999,7 +2071,7 @@ reverse order. Returns nil if no results are found.
Returns a list of matches, with elements of the form:
- (KEY DIST . DATA)
+ ((KEY . DIST) . DATA)
where KEY is a matching key from the trie, DATA its associated
data, and DIST is its Lewenstein distance \(edit distance\) from
@@ -2024,7 +2096,7 @@ lexicographically\).
If RANKFUN is a function, it must accept two arguments, both of
the form:
- (KEY DIST . DATA)
+ ((KEY . DIST) . DATA)
where KEY is a key from the trie, DIST is its Lewenstein
distances from STRING, and DATA is its associated data. RANKFUN
@@ -2032,14 +2104,14 @@ should return non-nil if first argument is ranked
strictly higher
than the second, nil otherwise.
The FILTER argument sets a filter function for the matches. If
-supplied, it is called for each possible match with two
-arguments: a KEY and a (DIST . DATA) cons cell. If the filter
-function returns nil, the match is not included in the results,
-and does not count towards MAXNUM.
+supplied, it is called for each possible match with three
+arguments: KEY, DIST and DATA. If the filter function returns
+nil, the match is not included in the results, and does not count
+towards MAXNUM.
RESULTFUN defines a function used to process results before
adding them to the final result list. If specified, it should
-accept two arguments: a KEY and a (DIST . DATA) cons cell. Its
+accept two arguments: a (KEY . DIST) cons cell and DATA. Its
return value is what gets added to the final result list, instead
of the default key-dist-data list."
@@ -2048,13 +2120,10 @@ of the default key-dist-data list."
;; construct rankfun to sort by Lewenstein distance if requested
(when (eq rankfun t)
- (setq rankfun `(lambda (a b)
- (cond
- ((< (cadr a) (cadr b)) t)
- ((> (cadr a) (cadr b)) nil)
- (t ,(trie-construct-sortfun
- (trie--comparison-function trie))
- (car a) (car b))))))
+ (setq rankfun (trie--construct-Lewenstein-rankfun
+ (trie--comparison-function trie))))
+ ;; massage filter function arguments
+ (when filter (setq filter (trie--wrap-fuzzy-filter filter)))
;; accumulate results
(trie--accumulate-results
@@ -2091,8 +2160,8 @@ of the default key-dist-data list."
(if (trie--node-data-p node)
(when (<= (aref row (1- (length row))) distance)
(funcall accumulator
- seq (cons (aref row (1- (length row)))
- (trie--node-data node))))
+ (cons seq (aref row (1- (length row))))
+ (trie--node-data node)))
;; build next row of Lewenstein table
(setq row (Lewenstein--next-row
@@ -2120,7 +2189,7 @@ defined by TRIE's comparison function, or in reverse
order if
REVERSE is non-nil. Calling `trie-stack-pop' pops the top element
from the stack. Each stack element has the form:
- (KEY DIST . DATA)
+ ((KEY . DIST) . DATA)
where KEY is a matching key from the trie, DATA its associated
data, and DIST is its Lewenstein distance \(edit distance\) from
@@ -2216,8 +2285,8 @@ within Lewenstein distance \(edit distance\) DISTANCE of
STRING."
;; push next fuzzy match onto head of stack
(when node
- (push (cons seq (cons (aref row (1- (length row)))
- (trie--node-data node)))
+ (push (cons (cons seq (aref row (1- (length row))))
+ (trie--node-data node))
store))))))
@@ -2237,7 +2306,7 @@ if no results are found.
Returns a list of completions, with elements of the form:
- (KEY DIST . DATA)
+ ((KEY . DIST) . DATA)
where KEY is a matching completion from the trie, DATA its
associated data, and DIST is its Lewenstein distance \(edit
@@ -2262,7 +2331,7 @@ lexicographically\).
If RANKFUN is a function, it must accept two arguments, both of
the form:
- (KEY DIST . DATA)
+ ((KEY . DIST) . DATA)
where KEY is a key from the trie, DIST is its Lewenstein
distances from PREFIX, and DATA is its associated data. RANKFUN
@@ -2270,14 +2339,14 @@ should return non-nil if first argument is ranked
strictly higher
than the second, nil otherwise.
The FILTER argument sets a filter function for the matches. If
-supplied, it is called for each possible match with two
-arguments: a KEY and a (DIST . DATA) cons cell. If the filter
-function returns nil, the match is not included in the results,
-and does not count towards MAXNUM.
+supplied, it is called for each possible match with three
+arguments: KEY, DIST and DATA. If the filter function returns
+nil, the match is not included in the results, and does not count
+towards MAXNUM.
RESULTFUN defines a function used to process results before
adding them to the final result list. If specified, it should
-accept two arguments: a KEY and a (DIST . DATA) cons cell. Its
+accept two arguments: a (KEY . DIST) cons cell and DATA. Its
return value is what gets added to the final result list, instead
of the default key-dist-data list."
@@ -2286,13 +2355,10 @@ of the default key-dist-data list."
;; construct rankfun to sort by Lewenstein distance if requested
(when (eq rankfun t)
- (setq rankfun `(lambda (a b)
- (cond
- ((< (cadr a) (cadr b)) t)
- ((> (cadr a) (cadr b)) nil)
- (t ,(trie-construct-sortfun
- (trie--comparison-function trie))
- (car a) (car b))))))
+ (setq rankfun (trie--construct-Lewenstein-rankfun
+ (trie--comparison-function trie))))
+ ;; massage filter function arguments
+ (when filter (setq filter (trie--wrap-fuzzy-filter filter)))
;; accumulate results
(trie--accumulate-results
@@ -2330,8 +2396,8 @@ of the default key-dist-data list."
(if (trie--node-data-p node)
(when (<= (aref row (1- (length row))) distance)
(funcall accumulator
- seq (cons (aref row (1- (length row)))
- (trie--node-data node))))
+ (cons seq (aref row (1- (length row))))
+ (trie--node-data node)))
;; build next row of Lewenstein table
(setq row (Lewenstein--next-row
@@ -2354,8 +2420,7 @@ of the default key-dist-data list."
(when (<= (aref row (1- (length row))) distance)
(trie--mapc
(lambda (n s)
- (funcall accumulator
- s (cons pfxcost (trie--node-data n))))
+ (funcall accumulator (cons s pfxcost) (trie--node-data n)))
mapfun node seq reverse))
)))
@@ -2370,7 +2435,7 @@ defined by TRIE's comparison function, or in reverse
order if
REVERSE is non-nil. Calling `trie-stack-pop' pops the top element
from the stack. Each stack element has the form:
- (KEY DIST . DATA)
+ ((KEY . DIST) . DATA)
where KEY is a matching completion from the trie, DATA its
associated data, and DIST is the Lewenstein distance \(edit
@@ -2497,7 +2562,7 @@ DISTANCE of PREFIX."
;; push next fuzzy completion onto head of stack
(when node
- (push (cons seq (cons pfxcost (trie--node-data node)))
+ (push (cons (cons seq pfxcost) (trie--node-data node))
store))))))
- [elpa] externals/trie 71f8273 098/111: Significantly improve efficiency of trie-fuzzy-complete., (continued)
- [elpa] externals/trie 71f8273 098/111: Significantly improve efficiency of trie-fuzzy-complete., Stefan Monnier, 2020/12/14
- [elpa] externals/trie c2b5e26 105/111: Myriad bug fixes and code refactoring in new fuzzy and ngram completion., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 63da3b1 111/111: * trie.el: Fix header which likely suffered a `M-q` accident, Stefan Monnier, 2020/12/14
- [elpa] externals/trie ff5e05f 040/111: Bumped copyright year, Stefan Monnier, 2020/12/14
- [elpa] externals/trie 2281926 020/111: Minor code reformatting and rearrangement, Stefan Monnier, 2020/12/14
- [elpa] externals/trie d99fb00 055/111: Simplified advice-based edebug pretty-printing of tries and dictionaries., Stefan Monnier, 2020/12/14
- [elpa] externals/trie b4d81bf 064/111: Trivial whitespace tidying., Stefan Monnier, 2020/12/14
- [elpa] externals/trie d45e9d5 062/111: Added autoload cookies., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 1c2790d 038/111: Replaced wildcard searches with more powerful and efficient regexp searches., Stefan Monnier, 2020/12/14
- [elpa] externals/trie bbfecae 085/111: Do lexbind test at compile-time instead of load-time., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 5e8e73f 081/111: Fix data wrapping handling in fuzzy query functions.,
Stefan Monnier <=
- [elpa] externals/trie 2a9d7ec 099/111: Port efficiency improvements to trie-fuzzy-match., Stefan Monnier, 2020/12/14
- [elpa] externals/trie a2554d6 094/111: Fix function symbol quoting., Stefan Monnier, 2020/12/14
- [elpa] externals/trie c6ddbb9 096/111: Bump version numbers., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 94a1a86 087/111: Bump version numbers since we've added iterator generators., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 4001f61 097/111: Fix corresponding bug in trie-fuzzy-complete-stack., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 91d299c 104/111: Pretty-print trie nodes in edebug., Stefan Monnier, 2020/12/14
- [elpa] externals/trie fc9b218 032/111: Removed support for non-terminal * wildcards, Stefan Monnier, 2020/12/14
- [elpa] externals/trie 5a064c0 092/111: Fix bug in trie-delete return value., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 9f49d95 086/111: Implement iterator generators on collection data structures., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 2957aec 103/111: Fix bugs in trie-fuzzy-match/complete., Stefan Monnier, 2020/12/14