emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master 82b4e48: Allow characters and single-char strings in rx charsets


From: Mattias Engdegård
Subject: master 82b4e48: Allow characters and single-char strings in rx charsets
Date: Fri, 13 Dec 2019 07:30:29 -0500 (EST)

branch: master
commit 82b4e48c590cf2c0448a751e641b0ee7a6a02438
Author: Mattias Engdegård <address@hidden>
Commit: Mattias Engdegård <address@hidden>

    Allow characters and single-char strings in rx charsets
    
    The `not' and `intersection' forms, and `or' inside these forms,
    now accept characters and single-character strings as arguments.
    Previously, they had to be wrapped in `any' forms.
    This does not add expressive power but is a convenience and is easily
    understood.
    
    * doc/lispref/searching.texi (Rx Constructs): Amend the documentation.
    * etc/NEWS: Announce the change.
    * lisp/emacs-lisp/rx.el (rx--charset-p, rx--translate-not)
    (rx--charset-intervals, rx): Accept characters and 1-char strings in
    more places.
    * test/lisp/emacs-lisp/rx-tests.el (rx-not, rx-charset-or)
    (rx-def-in-charset-or, rx-intersection): Test the change.
---
 doc/lispref/searching.texi       | 11 ++++++-----
 etc/NEWS                         |  3 +++
 lisp/emacs-lisp/rx.el            | 26 ++++++++++++++++++++------
 test/lisp/emacs-lisp/rx-tests.el | 20 ++++++++++++++------
 4 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi
index 0c6c7cc..700880c 100644
--- a/doc/lispref/searching.texi
+++ b/doc/lispref/searching.texi
@@ -1214,8 +1214,9 @@ Corresponding string regexp: @samp{[@dots{}]}
 @item @code{(not @var{charspec})}
 @cindex @code{not} in rx
 Match a character not included in @var{charspec}.  @var{charspec} can
-be an @code{any}, @code{not}, @code{or}, @code{intersection},
-@code{syntax} or @code{category} form, or a character class.
+be a character, a single-character string, an @code{any}, @code{not},
+@code{or}, @code{intersection}, @code{syntax} or @code{category} form,
+or a character class.
 If @var{charspec} is an @code{or} form, its arguments have the same
 restrictions as those of @code{intersection}; see below.@*
 Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}},
@@ -1224,9 +1225,9 @@ Corresponding string regexp: @samp{[^@dots{}]}, 
@samp{\S@var{code}},
 @item @code{(intersection @var{charset}@dots{})}
 @cindex @code{intersection} in rx
 Match a character included in all of the @var{charset}s.
-Each @var{charset} can be an @code{any} form without character
-classes, or an @code{intersection}, @code{or} or @code{not} form whose
-arguments are also @var{charset}s.
+Each @var{charset} can be a character, a single-character string, an
+@code{any} form without character classes, or an @code{intersection},
+@code{or} or @code{not} form whose arguments are also @var{charset}s.
 
 @item @code{not-newline}, @code{nonl}
 @cindex @code{not-newline} in rx
diff --git a/etc/NEWS b/etc/NEWS
index 1e0422c..a7f3c3d 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -2124,6 +2124,9 @@ Both match any single character; 'anychar' is more 
descriptive.
 With 'or' and 'not', it can be used to compose character-matching
 expressions from simpler parts.
 
++++
+*** 'not' argument can now be a character or single-char string.
+
 ** Frames
 
 +++
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index a5cab1d..43f7a4e 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -309,6 +309,8 @@ and set operations."
                     (rx--every (lambda (x) (not (symbolp x))) (cdr form)))
                (and (memq (car form) '(not or | intersection))
                     (rx--every #'rx--charset-p (cdr form)))))
+      (characterp form)
+      (and (stringp form) (= (length form) 1))
       (and (or (symbolp form) (consp form))
            (let ((expanded (rx--expand-def form)))
              (and expanded
@@ -521,6 +523,11 @@ If NEGATED, negate the sense (thus making it positive)."
      ((eq arg 'word-boundary)
       (rx--translate-symbol
        (if negated 'word-boundary 'not-word-boundary)))
+     ((characterp arg)
+      (rx--generate-alt (not negated) (list (cons arg arg)) nil))
+     ((and (stringp arg) (= (length arg) 1))
+      (let ((char (string-to-char arg)))
+        (rx--generate-alt (not negated) (list (cons char char)) nil)))
      ((let ((expanded (rx--expand-def arg)))
         (and expanded
              (rx--translate-not negated (list expanded)))))
@@ -571,8 +578,8 @@ If NEGATED, negate the sense (thus making it positive)."
 (defun rx--charset-intervals (charset)
   "Return a sorted list of non-adjacent disjoint intervals from CHARSET.
 CHARSET is any expression allowed in a character set expression:
-either `any' (no classes permitted), or `not', `or' or `intersection'
-forms whose arguments are charsets."
+characters, single-char strings, `any' forms (no classes permitted),
+or `not', `or' or `intersection' forms whose arguments are charsets."
   (pcase charset
     (`(,(or 'any 'in 'char) . ,body)
      (let ((parsed (rx--parse-any body)))
@@ -584,6 +591,11 @@ forms whose arguments are charsets."
     (`(not ,x) (rx--complement-intervals (rx--charset-intervals x)))
     (`(,(or 'or '|) . ,body) (rx--charset-union body))
     (`(intersection . ,body) (rx--charset-intersection body))
+    ((pred characterp)
+     (list (cons charset charset)))
+    ((guard (and (stringp charset) (= (length charset) 1)))
+     (let ((char (string-to-char charset)))
+       (list (cons char char))))
     (_ (let ((expanded (rx--expand-def charset)))
          (if expanded
              (rx--charset-intervals expanded)
@@ -1161,10 +1173,12 @@ CHAR           Match a literal character.
                 character, a string, a range as string \"A-Z\" or cons
                 (?A . ?Z), or a character class (see below).  Alias: in, char.
 (not CHARSPEC)  Match one character not matched by CHARSPEC.  CHARSPEC
-                can be (any ...), (or ...), (intersection ...),
-                (syntax ...), (category ...), or a character class.
-(intersection CHARSET...) Intersection of CHARSETs.
-                CHARSET is (any...), (not...), (or...) or (intersection...).
+                can be a character, single-char string, (any ...), (or ...),
+                (intersection ...), (syntax ...), (category ...),
+                or a character class.
+(intersection CHARSET...) Match all CHARSETs.
+                CHARSET is (any...), (not...), (or...) or (intersection...),
+                a character or a single-char string.
 not-newline     Match any character except a newline.  Alias: nonl.
 anychar         Match any character.  Alias: anything.
 unmatchable     Never match anything at all.
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el
index 344f467..a82f1f8 100644
--- a/test/lisp/emacs-lisp/rx-tests.el
+++ b/test/lisp/emacs-lisp/rx-tests.el
@@ -272,7 +272,9 @@
   (should (equal (rx (not (category tone-mark)) (not (category lao)))
                  "\\C4\\Co"))
   (should (equal (rx (not (not ascii)) (not (not (not (any "a-z")))))
-                 "[[:ascii:]][^a-z]")))
+                 "[[:ascii:]][^a-z]"))
+  (should (equal (rx (not ?a) (not "b") (not (not "c")) (not (not ?d)))
+                 "[^a][^b]cd")))
 
 (ert-deftest rx-charset-or ()
   (should (equal (rx (or))
@@ -294,13 +296,17 @@
                  "[a-ru-z]"))
   (should (equal (rx (or (intersection (any "c-z") (any "a-g"))
                          (not (any "a-k"))))
-                 "[^abh-k]")))
+                 "[^abh-k]"))
+  (should (equal (rx (or ?f (any "b-e") "a") (not (or ?x "y" (any "s-w"))))
+                 "[a-f][^s-y]")))
 
 (ert-deftest rx-def-in-charset-or ()
   (rx-let ((a (any "badc"))
-           (b (| a (any "def"))))
-    (should (equal (rx (or b (any "q")))
-                   "[a-fq]")))
+           (b (| a (any "def")))
+           (c ?a)
+           (d "b"))
+    (should (equal (rx (or b (any "q")) (or c d))
+                   "[a-fq][ab]")))
   (rx-let ((diff-| (a b) (not (or (not a) b))))
     (should (equal (rx (diff-| (any "a-z") (any "gr")))
                    "[a-fh-qs-z]"))))
@@ -326,7 +332,9 @@
                  "[e-m]"))
   (should (equal (rx (intersection (or (any "a-f") (any "f-t"))
                                    (any "e-w")))
-                 "[e-t]")))
+                 "[e-t]"))
+  (should (equal (rx (intersection ?m (any "a-z") "m"))
+                 "m")))
 
 (ert-deftest rx-def-in-intersection ()
   (rx-let ((a (any "a-g"))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]