emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/tNFA f150b88 06/23: Added support for \{...\} postfix r


From: Stefan Monnier
Subject: [elpa] externals/tNFA f150b88 06/23: Added support for \{...\} postfix repetition operator
Date: Mon, 14 Dec 2020 12:08:28 -0500 (EST)

branch: externals/tNFA
commit f150b88cff55b1c8383234c6d08a7a319b4bb4bd
Author: Toby Cubitt <toby-predictive@dr-qubit.org>
Commit: tsc25 <toby-predictive@dr-qubit.org>

    Added support for \{...\} postfix repetition operator
---
 tNFA.el | 282 +++++++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 207 insertions(+), 75 deletions(-)

diff --git a/tNFA.el b/tNFA.el
index a5be4f1..87847b5 100644
--- a/tNFA.el
+++ b/tNFA.el
@@ -148,19 +148,20 @@
 
 (defun tNFA--NFA-state-patch (attach state)
   ;; patch STATE onto ATTACH. Return value is meaningless
-  (setf (tNFA--NFA-state-type attach)  (tNFA--NFA-state-type state)
-       (tNFA--NFA-state-label attach) (tNFA--NFA-state-label state)
-       (tNFA--NFA-state-next attach)  (tNFA--NFA-state-next state)
-       (tNFA--NFA-state-count state)  (incf (tNFA--NFA-state-in-degree state))
-       ))
+  (setf
+   (tNFA--NFA-state-type attach)  (tNFA--NFA-state-type state)
+   (tNFA--NFA-state-label attach) (tNFA--NFA-state-label state)
+   (tNFA--NFA-state-next attach)  (tNFA--NFA-state-next state)
+   (tNFA--NFA-state-count state)  (incf (tNFA--NFA-state-in-degree state))))
 
 
 (defun tNFA--NFA-state-make-epsilon (state next)
   ;; create an epsilon transition from STATE to NEXT
-  (setf (tNFA--NFA-state-type state)  'epsilon
-       (tNFA--NFA-state-label state) nil
-       (tNFA--NFA-state-next state)  next
-       (tNFA--NFA-state-count next)  (incf (tNFA--NFA-state-in-degree next))))
+  (setf
+   (tNFA--NFA-state-type state)  'epsilon
+   (tNFA--NFA-state-label state) nil
+   (tNFA--NFA-state-next state)  next
+   (tNFA--NFA-state-count next)  (incf (tNFA--NFA-state-in-degree next))))
 
 
 (defun tNFA--NFA-state-make-branch (state next)
@@ -172,6 +173,16 @@
     (setf (tNFA--NFA-state-count n) (incf (tNFA--NFA-state-in-degree n)))))
 
 
+(defun tNFA--NFA-state-copy (state)
+  ;; Return a copy of STATE. The next link is *not* copied, it is `eq' to the
+  ;; original next link. Use `tNFA--fragment-copy' if you want to recursively
+  ;; copy a chain of states. Note: NFA--state-id must be bound to something
+  ;; appropriate when this function is called.
+  (let ((copy (copy-sequence state)))
+    (setf (tNFA--NFA-state-id copy) (incf NFA--state-id))
+    copy))
+
+
 
 ;;; ----------------------------------------------------------------
 ;;;                        NFA fragments
@@ -192,6 +203,47 @@
   (setf (tNFA--fragment-final frag1) (tNFA--fragment-final frag2)))
 
 
+(defun tNFA--fragment-copy (fragment)
+  ;; return a copy of FRAGMENT.
+  (declare (special copied-states))
+  (let (copied-states)
+    (tNFA--fragment-create
+     (tNFA--do-fragment-copy (tNFA--fragment-initial fragment))
+     (cdr (assq (tNFA--fragment-final fragment) copied-states)))))
+
+
+(defun tNFA--do-fragment-copy (state)
+  ;; return a copy of STATE, recursively following and copying links
+  ;; (note: NFA--state-id must be bound to something appropriate when this is
+  ;; called)
+  (declare (special copied-states))
+  (let ((copy (tNFA--NFA-state-copy state)))
+    (push (cons state copy) copied-states)
+
+    ;; if STATE is a branch, copy all links
+    (cond
+     ((eq (tNFA--NFA-state-type copy) 'branch)
+      (setf (tNFA--NFA-state-next copy)
+           (mapcar (lambda (next)
+                     (or (cdr (assq next copied-states))
+                         (tNFA--do-fragment-copy next)))
+                   (tNFA--NFA-state-next copy))))
+
+     ;; if state doesn't have a next link, return
+     ((or (eq (tNFA--NFA-state-type copy) 'match)
+         (null (tNFA--NFA-state-type copy))))
+
+     ;; otherwise, copy next link
+     ((tNFA--NFA-state-type copy)
+      ;; for a non-branch STATE, copy next link
+      (setf (tNFA--NFA-state-next copy)
+           ;; if we've already copied next state, set next link to that
+           (or (cdr (assq (tNFA--NFA-state-next copy) copied-states))
+               ;; otherwise, recursively copy next state
+               (tNFA--do-fragment-copy (tNFA--NFA-state-next copy))))))
+    copy))
+
+
 
 ;;; ----------------------------------------------------------------
 ;;;                      DFA states
@@ -371,6 +423,15 @@ individual elements of STRING are identical. The default 
is `eq'."
       )))
 
 
+(defmacro tNFA--regexp-postfix-p (regexp)
+  ;; return t if next token in REGEXP is a postfix operator, nil otherwise
+  `(or (eq (car ,regexp) ?*)
+       (eq (car ,regexp) ?+)
+       (eq (car ,regexp) ??)
+       (and (eq (car ,regexp) ?\\)
+           (cdr ,regexp)
+           (eq (cadr ,regexp) ?{))))
+
 
 (defun tNFA--from-regexp (regexp num-tags min-tags max-tags
                                 &optional top-level shy-group)
@@ -387,7 +448,7 @@ individual elements of STRING are identical. The default is 
`eq'."
 
   (let* ((new (tNFA--NFA-state-create))
         (fragment-stack (list (tNFA--fragment-create new new)))
-        fragment attach token type group-end-tag)
+        fragment copy attach token type group-end-tag)
 
     (catch 'constructed
       (while t
@@ -513,67 +574,92 @@ individual elements of STRING are identical. The default 
is `eq'."
 
        ;; ----- attach new fragment -----
        (when fragment
-         (setq attach (tNFA--fragment-final (car fragment-stack)))
-         (if (or (eq (car regexp) ?*)
-                 (eq (car regexp) ?+)
-                 (eq (car regexp) ??))
-             (if (eq type 'alternation)
-                 (error "Syntax error in regexp: unexpected \"%s\""
-                        (char-to-string token))
-
-               ;; if next token is a postfix operator, splice new fragment
-               ;; into NFA as appropriate
-               (setq regexp (tNFA--regexp-next-token regexp)
-                     type   (nth 0 regexp)
-                     token  (nth 1 regexp)
-                     regexp (nth 2 regexp))
-               (setq new (tNFA--NFA-state-create))
-
-               (cond
-
-                ;;    .--fragment--.
-                ;;   /              \
-                ;;   \        ______/
-                ;;    \      /
-                ;;  ---attach-----new---
-                ;;
-                ((eq type 'postfix*)
-                 (tNFA--NFA-state-make-branch
-                  attach (list (tNFA--fragment-initial fragment) new))
-                 (tNFA--NFA-state-make-epsilon
-                  (tNFA--fragment-final fragment) attach)
-                 (setf (tNFA--fragment-final (car fragment-stack)) new))
-
-                ;;      .----.
-                ;;     /      \
-                ;;    /        \
-                ;;    \        /
-                ;;  ---fragment-----new---
-                ;;
-                ((eq type 'postfix+)
-                 (tNFA--NFA-state-patch
-                  attach (tNFA--fragment-initial fragment))
-                 (tNFA--NFA-state-make-branch
-                  (tNFA--fragment-final fragment) (list attach new))
-                 (setf (tNFA--fragment-final (car fragment-stack)) new))
-
-                ;;            .--fragment--.
-                ;;           /              \
-                ;;  ---attach                new---
-                ;;           \______________/
-                ;;
-                ((eq type 'postfix?)
-                 (tNFA--NFA-state-make-branch
-                  attach (list (tNFA--fragment-initial fragment) new))
-                 (tNFA--NFA-state-make-epsilon
-                  (tNFA--fragment-final fragment) new)
-                 (setf (tNFA--fragment-final (car fragment-stack)) new))
-                ))
-
-
-           ;; if next token is not a postfix operator, attach new fragment
-           ;; onto end of current NFA fragment
-           (tNFA--fragment-patch (car fragment-stack) fragment))
+         ;; if next token is not a postfix operator, attach new fragment onto
+         ;; end of current NFA fragment
+         (if (not (tNFA--regexp-postfix-p regexp))
+             (tNFA--fragment-patch (car fragment-stack) fragment)
+
+           ;; if next token is a postfix operator, splice new fragment into
+           ;; NFA as appropriate
+           (when (eq type 'alternation)
+             (error "Syntax error in regexp: unexpected \"%s\""
+                    (char-to-string token)))
+           (setq regexp (tNFA--regexp-next-token regexp)
+                 type   (nth 0 regexp)
+                 token  (nth 1 regexp)
+                 regexp (nth 2 regexp))
+
+           (while fragment
+             (setq attach (tNFA--fragment-final (car fragment-stack)))
+             (setq new (tNFA--NFA-state-create))
+             (cond
+
+              ;; * postfix = \{0,\}:
+              ;;
+              ;;    .--fragment--.
+              ;;   /              \
+              ;;   \        ______/
+              ;;    \      /
+              ;;  ---attach-----new---
+              ;;
+              ((and (eq (car token) 0) (null (cdr token)))
+               (tNFA--NFA-state-make-branch
+                attach (list (tNFA--fragment-initial fragment) new))
+               (tNFA--NFA-state-make-epsilon
+                (tNFA--fragment-final fragment) attach)
+               (setf (tNFA--fragment-final (car fragment-stack)) new)
+               (setq fragment nil))
+
+              ;; + postfix = \{1,\}:
+              ;;
+              ;;      .----.
+              ;;     /      \
+              ;;    /        \
+              ;;    \        /
+              ;;  ---fragment-----new---
+              ;;
+              ((and (eq (car token) 1) (null (cdr token)))
+               (tNFA--NFA-state-patch
+                attach (tNFA--fragment-initial fragment))
+               (tNFA--NFA-state-make-branch
+                (tNFA--fragment-final fragment) (list attach new))
+               (setf (tNFA--fragment-final (car fragment-stack)) new)
+               (setq fragment nil))
+
+              ;; \{0,n\} (note: ? postfix = \{0,1\}):
+              ;;
+              ;;            .--fragment--.
+              ;;           /              \
+              ;;  ---attach                new---
+              ;;           \______________/
+              ;;
+              ((eq (car token) 0)
+               ;; ? postfix = \{0,1\}: after this we're done
+               (if (eq (cdr token) 1)
+                   (setq copy nil)
+                 (setq copy (tNFA--fragment-copy fragment)))
+               ;; attach fragment
+               (tNFA--NFA-state-make-branch
+                attach (list (tNFA--fragment-initial fragment) new))
+               (tNFA--NFA-state-make-epsilon
+                (tNFA--fragment-final fragment) new)
+               (setf (tNFA--fragment-final (car fragment-stack)) new)
+               ;; prepare for next iteration
+               (decf (cdr token))
+               (setq fragment copy))
+
+              ;; \{n,\} or \{n,m\}:
+              ;;
+              ;;  ---attach----fragment----new---
+              ;;
+              (t
+               (setq copy (tNFA--fragment-copy fragment))
+               (tNFA--fragment-patch (car fragment-stack) fragment)
+               ;; prepare for next iteration
+               (decf (car token))
+               (when (cdr token) (decf (cdr token)))
+               (setq fragment copy))
+              )))
 
 
          ;; if ending a group, add a maximize tag to end
@@ -626,9 +712,9 @@ individual elements of STRING are identical. The default is 
`eq'."
        (error "Syntax error in regexp: missing \"[\""))
 
        ;; . * + ?: set appropriate type
-       ((eq token ?*) (setq type 'postfix*))
-       ((eq token ?+) (setq type 'postfix+))
-       ((eq token ??) (setq type 'postfix?))
+       ((eq token ?*) (setq type 'postfix token (cons 0 nil)))
+       ((eq token ?+) (setq type 'postfix token (cons 1 nil)))
+       ((eq token ??) (setq type 'postfix token (cons 0 1)))
        ((eq token ?.) (setq type 'wildcard))
 
        ;; \: look at next character
@@ -636,15 +722,61 @@ individual elements of STRING are identical. The default 
is `eq'."
        (unless (setq token (pop regexp))
          (error "Syntax error in regexp: missing character after \"\\\""))
        (cond
+        ;; |: alternation
         ((eq token ?|) (setq type 'alternation))
+        ;; \(?: shy group start
         ((and (eq token ?\() (eq (car regexp) ??))
          (setq type 'shy-group-start)
          (pop regexp))
+        ;; \)?: shy group end
         ((and (eq token ?\)) (eq (car regexp) ??))
          (setq type 'shy-group-end)
          (pop regexp))
+        ;; \(: group start
         ((eq token ?\() (setq type 'group-start))
-        ((eq token ?\)) (setq type 'group-end))))
+        ;; \): group end
+        ((eq token ?\)) (setq type 'group-end))
+
+        ;; \{: postfix repetition operator
+        ((eq token ?{)
+         (setq type 'postfix token (cons nil nil))
+         ;; extract first number from repetition operator
+         (while (if (null regexp)
+                    (error "Syntax error in regexp: malformed \\{...\\}")
+                  (not (or (eq (car regexp) ?,) (eq (car regexp) ?\\))))
+           (setcar token (concat (car token) (char-to-string (pop regexp)))))
+         (if (null (car token))
+             (setcar token 0)
+           (unless (string-match "[0-9]+" (car token))
+             (error "Syntax error in regexp: malformed \\{...\\}"))
+           (setcar token (string-to-number (car token))))
+         (cond
+          ;; if next character is "\", we expect "}" to follow
+          ((eq (car regexp) ?\\)
+           (pop regexp)
+           (unless (eq (car regexp) ?})
+             (error "Syntax error in regexp: expected \"}\""))
+           (pop regexp)
+           (unless (car token)
+             (error "Syntax error in regexp: malformed \\{...\\}"))
+           (setcdr token (car token)))
+          ;; if next character is ",", we expect a second number to follow
+          ((eq (car regexp) ?,)
+           (pop regexp)
+           (while (if (null regexp)
+                      (error "Syntax error in regexp: malformed \\{...\\}")
+                    (not (eq (car regexp) ?\\)))
+             (setcdr token
+                     (concat (cdr token) (char-to-string (pop regexp)))))
+           (unless (null (cdr token))
+             (unless (string-match "[0-9]+" (cdr token))
+               (error "Syntax error in regexp: malformed \\{...\\}"))
+             (setcdr token (string-to-number (cdr token))))
+           (pop regexp)
+           (unless (eq (car regexp) ?})
+             (error "Syntax error in regexp: expected \"}\""))
+           (pop regexp))))
+        ))
        )
 
       ;; return first token type, token, and remaining regexp



reply via email to

[Prev in Thread] Current Thread [Next in Thread]