[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] master 376f5df: Customizable char-fold with char-fold-symm
From: |
Juri Linkov |
Subject: |
[Emacs-diffs] master 376f5df: Customizable char-fold with char-fold-symmetric, char-fold-include (bug#35689) |
Date: |
Tue, 23 Jul 2019 16:27:37 -0400 (EDT) |
branch: master
commit 376f5df3cca0dbf186823e5b329d76b52019473d
Author: Juri Linkov <address@hidden>
Commit: Juri Linkov <address@hidden>
Customizable char-fold with char-fold-symmetric, char-fold-include
(bug#35689)
* doc/emacs/search.texi (Lax Search): Document
char-fold-symmetric, char-fold-include, char-fold-exclude.
* lisp/char-fold.el (char-fold--default-include)
(char-fold--default-exclude, char-fold--default-symmetric)
(char-fold--previous): New defconsts.
(char-fold-include, char-fold-exclude, char-fold-symmetric):
New defcustoms.
(char-fold-make-table): Use them.
(char-fold-update-table): New function called at top-level.
* test/lisp/char-fold-tests.el (char-fold--test-no-match-exactly)
(char-fold--permutation): New functions.
(char-fold--test-without-customization)
(char-fold--test-with-customization): New tests.
---
doc/emacs/search.texi | 19 +++++--
etc/NEWS | 9 +++
lisp/char-fold.el | 131 ++++++++++++++++++++++++++++++++++++++++---
test/lisp/char-fold-tests.el | 75 +++++++++++++++++++++++++
4 files changed, 222 insertions(+), 12 deletions(-)
diff --git a/doc/emacs/search.texi b/doc/emacs/search.texi
index b47d51a..66af5d4 100644
--- a/doc/emacs/search.texi
+++ b/doc/emacs/search.texi
@@ -1354,10 +1354,21 @@ folding, but only for that search. (Replace commands
have a different
default, controlled by a separate option; see @ref{Replacement and Lax
Matches}.)
- Like with case folding, typing an explicit variant of a character,
-such as @code{@"a}, as part of the search string disables character
-folding for that search. If you delete such a character from the
-search string, this effect ceases.
+@vindex char-fold-symmetric
+ By default, typing an explicit variant of a character, such as
+@code{@"a}, as part of the search string doesn't match its base
+character, such as @code{a}. But if you customize the variable
+@code{char-fold-symmetric} to @code{t}, then search commands treat
+equivalent characters the same and use of any of a set of equivalent
+characters in a search string finds any of them in the text being
+searched, so typing an accented character @code{@"a} matches the
+letter @code{a} as well as all the other variants like @code{@'a}.
+
+@vindex char-fold-include
+@vindex char-fold-exclude
+ You can add new foldings using the customizable variable
+@code{char-fold-include}, or remove the existing ones using the
+customizable variable @code{char-fold-exclude}.
@node Replace
@section Replacement Commands
diff --git a/etc/NEWS b/etc/NEWS
index 6a02c38..5313270 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1175,6 +1175,15 @@ rather than stopping after one level, such that
searching for
e.g. GREEK SMALL LETTER IOTA will now also find GREEK SMALL LETTER
IOTA WITH OXIA.
++++
+*** New char-folding options: 'char-fold-include' lets you add ad hoc
+foldings, 'char-fold-exclude' to remove foldings from default decomposition,
+and 'char-fold-symmetric' to search for any of an equivalence class of
+characters. For example, with a 'nil' value of 'char-fold-symmetric'
+you can search for "e" to find "é", but not vice versa. With a non-nil
+value you can search for either, for example, you can search for "é"
+to find "e".
+
** Debugger
+++
diff --git a/lisp/char-fold.el b/lisp/char-fold.el
index a5c4e5e..f379229 100644
--- a/lisp/char-fold.el
+++ b/lisp/char-fold.el
@@ -22,7 +22,18 @@
;;; Code:
-(eval-and-compile (put 'char-fold-table 'char-table-extra-slots 1))
+(eval-and-compile
+ (put 'char-fold-table 'char-table-extra-slots 1)
+ (defconst char-fold--default-include
+ '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷"
"🙶" "🙸" "«" "»")
+ (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›")
+ (?` "❛" "‘" "‛" "" "❮" "‹")))
+ (defconst char-fold--default-exclude nil)
+ (defconst char-fold--default-symmetric nil)
+ (defconst char-fold--previous (list char-fold--default-include
+ char-fold--default-exclude
+ char-fold--default-symmetric)))
+
(eval-and-compile
(defun char-fold-make-table ()
@@ -116,21 +127,70 @@
(aref equiv (car simpler-decomp)))))))))))
table)
- ;; Add some manual entries.
- (dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“"
"„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
- (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›")
- (?` "❛" "‘" "‛" "" "❮" "‹")))
+ ;; Add some entries to default decomposition
+ (dolist (it (or (bound-and-true-p char-fold-include)
+ char-fold--default-include))
(let ((idx (car it))
(chars (cdr it)))
(aset equiv idx (append chars (aref equiv idx)))))
+ ;; Remove some entries from default decomposition
+ (dolist (it (or (bound-and-true-p char-fold-exclude)
+ char-fold--default-exclude))
+ (let ((idx (car it))
+ (chars (cdr it)))
+ (when (aref equiv idx)
+ (dolist (char chars)
+ (aset equiv idx (remove char (aref equiv idx)))))))
+
+ ;; Add symmetric entries
+ (when (or (bound-and-true-p char-fold-symmetric)
+ char-fold--default-symmetric)
+ (let ((symmetric (make-hash-table :test 'eq)))
+ ;; Initialize hashes
+ (map-char-table
+ (lambda (char decomp-list)
+ (puthash char (make-hash-table :test 'equal) symmetric)
+ (dolist (decomp decomp-list)
+ (puthash (string-to-char decomp) (make-hash-table :test 'equal)
symmetric)))
+ equiv)
+
+ (map-char-table
+ (lambda (char decomp-list)
+ (dolist (decomp decomp-list)
+ (if (< (length decomp) 2)
+ ;; Add single-char symmetric pairs to hash
+ (let ((decomp-list (cons (char-to-string char) decomp-list))
+ (decomp-hash (gethash (string-to-char decomp)
symmetric)))
+ (dolist (decomp2 decomp-list)
+ (unless (equal decomp decomp2)
+ (puthash decomp2 t decomp-hash)
+ (puthash decomp t (gethash (string-to-char decomp2)
symmetric)))))
+ ;; Add multi-char symmetric pairs to equiv-multi char-table
+ (let ((decomp-list (cons (char-to-string char) decomp-list))
+ (prefix (string-to-char decomp))
+ (suffix (substring decomp 1)))
+ (puthash decomp t (gethash char symmetric))
+ (dolist (decomp2 decomp-list)
+ (if (< (length decomp2) 2)
+ (aset equiv-multi prefix
+ (cons (cons suffix (regexp-quote decomp2))
+ (aref equiv-multi prefix)))))))))
+ equiv)
+
+ ;; Update equiv char-table from hash
+ (maphash
+ (lambda (char decomp-hash)
+ (let (schars)
+ (maphash (lambda (schar _) (push schar schars)) decomp-hash)
+ (aset equiv char schars)))
+ symmetric)))
+
;; Convert the lists of characters we compiled into regexps.
(map-char-table
(lambda (char decomp-list)
(let ((re (regexp-opt (cons (char-to-string char) decomp-list))))
- (if (consp char) ; FIXME: char never is consp?
- (set-char-table-range equiv char re)
- (aset equiv char re))))
+ (aset equiv char re)))
equiv)
equiv)))
@@ -159,6 +219,61 @@ For instance, the default alist for ?f includes:
Exceptionally for the space character (32), ALIST is ignored.")
+
+(defun char-fold-update-table ()
+ (let ((new (list (or (bound-and-true-p char-fold-include)
+ char-fold--default-include)
+ (or (bound-and-true-p char-fold-exclude)
+ char-fold--default-exclude)
+ (or (bound-and-true-p char-fold-symmetric)
+ char-fold--default-symmetric))))
+ (unless (equal char-fold--previous new)
+ (setq char-fold-table (char-fold-make-table)
+ char-fold--previous new))))
+
+(defcustom char-fold-include char-fold--default-include
+ "Additional character foldings to include.
+Each entry is a list of a character and the strings that fold into it."
+ :type '(alist :key-type (character :tag "Fold to character")
+ :value-type (repeat (string :tag "Fold from string")))
+ :initialize #'custom-initialize-default
+ :set (lambda (sym val)
+ (custom-set-default sym val)
+ (char-fold-update-table))
+ :group 'isearch
+ :version "27.1")
+
+(defcustom char-fold-exclude char-fold--default-exclude
+ "Character foldings to remove from default decompisitions.
+Each entry is a list of a character and the strings to remove from folding."
+ :type '(alist :key-type (character :tag "Fold to character")
+ :value-type (repeat (string :tag "Fold from string")))
+ :initialize #'custom-initialize-default
+ :set (lambda (sym val)
+ (custom-set-default sym val)
+ (char-fold-update-table))
+ :group 'isearch
+ :version "27.1")
+
+(defcustom char-fold-symmetric char-fold--default-symmetric
+ "Non-nil means char-fold searching treats equivalent chars the same.
+That is, use of any of a set of char-fold equivalent chars in a search
+string finds any of them in the text being searched.
+
+If nil then only the \"base\" or \"canonical\" char of the set matches
+any of them. The others match only themselves, even when char-folding
+is turned on."
+ :type 'boolean
+ :initialize #'custom-initialize-default
+ :set (lambda (sym val)
+ (custom-set-default sym val)
+ (char-fold-update-table))
+ :group 'isearch
+ :version "27.1")
+
+(char-fold-update-table)
+
+
(defun char-fold--make-space-string (n)
"Return a string that matches N spaces."
(format "\\(?:%s\\|%s\\)"
diff --git a/test/lisp/char-fold-tests.el b/test/lisp/char-fold-tests.el
index e9dfd2b..e519435 100644
--- a/test/lisp/char-fold-tests.el
+++ b/test/lisp/char-fold-tests.el
@@ -44,6 +44,16 @@
(should (string-match (char-fold--ascii-upcase re) (downcase it)))
(should (string-match (char-fold--ascii-downcase re) (upcase it)))))))
+(defun char-fold--test-no-match-exactly (string &rest strings-to-match)
+ (let ((re (concat "\\`" (char-fold-to-regexp string) "\\'")))
+ (dolist (it strings-to-match)
+ (should-not (string-match re it)))
+ ;; Case folding
+ (let ((case-fold-search t))
+ (dolist (it strings-to-match)
+ (should-not (string-match (char-fold--ascii-upcase re) (downcase it)))
+ (should-not (string-match (char-fold--ascii-downcase re) (upcase
it)))))))
+
(defun char-fold--test-search-with-contents (contents string)
(with-temp-buffer
(insert contents)
@@ -53,6 +63,11 @@
(should (char-fold-search-forward string nil 'noerror))
(should (char-fold-search-backward string nil 'noerror))))
+(defun char-fold--permutation (strings)
+ (mapcar (lambda (string)
+ (cons string (remove string strings)))
+ strings))
+
(ert-deftest char-fold--test-consistency ()
(dotimes (n 30)
@@ -132,5 +147,65 @@
;; Ensure it took less than a second.
(should (< (- (time-to-seconds) time) 1))))))
+(ert-deftest char-fold--test-without-customization ()
+ (let* ((matches
+ '(
+ ("e" "ℯ" "ḗ" "ë" "ë")
+ ("ι"
+ "ί" ;; 1 level decomposition
+ "ί" ;; 2 level decomposition
+ ;; FIXME:
+ ;; "ΐ" ;; 3 level decomposition
+ )
+ )))
+ (dolist (strings matches)
+ (apply 'char-fold--test-match-exactly strings))))
+
+(ert-deftest char-fold--test-with-customization ()
+ :tags '(:expensive-test)
+ (let* ((char-fold-include
+ '(
+ (?ß "ss") ;; de
+ (?o "ø") ;; da no nb nn
+ (?l "ł") ;; pl
+ ))
+ ;; FIXME: move language-specific settings to defaults
+ (char-fold-exclude
+ '(
+ (?a "å") ;; sv da no nb nn
+ (?a "ä") ;; sv fi et
+ (?o "ö") ;; sv fi et
+ (?n "ñ") ;; es
+ (?и "й") ;; ru
+ ))
+ (char-fold-symmetric t)
+ (char-fold-table (char-fold-make-table))
+ (matches
+ '(
+ ("e" "ℯ" "ḗ" "ë" "ë")
+ ("е" "ё" "ё")
+ ("ι" "ί" "ί"
+ ;; FIXME: "ΐ"
+ )
+ ("ß" "ss")
+ ("o" "ø")
+ ("l" "ł")
+
+ ))
+ (no-matches
+ '(
+ ("a" "å")
+ ("a" "ä")
+ ("o" "ö")
+ ("n" "ñ")
+ ("и" "й")
+ )))
+ (dolist (strings matches)
+ (dolist (permutation (char-fold--permutation strings))
+ (apply 'char-fold--test-match-exactly permutation)))
+ (dolist (strings no-matches)
+ (dolist (permutation (char-fold--permutation strings))
+ (apply 'char-fold--test-no-match-exactly permutation)))))
+
(provide 'char-fold-tests)
;;; char-fold-tests.el ends here
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Emacs-diffs] master 376f5df: Customizable char-fold with char-fold-symmetric, char-fold-include (bug#35689),
Juri Linkov <=