emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/tNFA 7b44eeb 02/23: Bug-fix in tNFA--from-regexp: add t


From: Stefan Monnier
Subject: [elpa] externals/tNFA 7b44eeb 02/23: Bug-fix in tNFA--from-regexp: add tag transitions *outside* their group fragment,
Date: Mon, 14 Dec 2020 12:08:27 -0500 (EST)

branch: externals/tNFA
commit 7b44eeb47ee617287e363ab0354c8c7de6e1743a
Author: Toby Cubitt <toby-predictive@dr-qubit.org>
Commit: tsc25 <toby-predictive@dr-qubit.org>

    Bug-fix in tNFA--from-regexp: add tag transitions *outside* their group 
fragment,
    so that any postfix operators won't create a loop that passes back through 
tags.
---
 tNFA.el | 235 ++++++++++++++++++++++++++++++++++------------------------------
 1 file changed, 126 insertions(+), 109 deletions(-)

diff --git a/tNFA.el b/tNFA.el
index cef5342..6cd59f0 100644
--- a/tNFA.el
+++ b/tNFA.el
@@ -124,6 +124,8 @@
   count tNFA-state  ; used internally in NFA evolution algorithms
   next)
 
+
+;; tag number for a tagged epsilon transition is stored in label slot
 (defalias 'tNFA-NFA-state-tag 'tNFA-NFA-state-label)
 
 (defmacro tNFA-NFA-state-tags (state)
@@ -131,7 +133,7 @@
 
 
 (defun tNFA-NFA-state-patch (attach state)
-  "Patch STATE onto ATTACH. Return value is meaningless."
+  ;; patch STATE onto ATTACH. Return value is meaningless
   (setf (tNFA-NFA-state-type attach)  (tNFA-NFA-state-type state)
        (tNFA-NFA-state-label attach) (tNFA-NFA-state-label state)
        (tNFA-NFA-state-next attach)  (tNFA-NFA-state-next state)
@@ -140,7 +142,7 @@
 
 
 (defun tNFA-NFA-state-make-epsilon (state next)
-  "Create an epsilon transition from STATE to NEXT."
+  ;; create an epsilon transition from STATE to NEXT
   (setf (tNFA-NFA-state-type state)  'epsilon
        (tNFA-NFA-state-label state) nil
        (tNFA-NFA-state-next state)  next
@@ -148,7 +150,7 @@
 
 
 (defun tNFA-NFA-state-make-branch (state next)
-  "Create a branch from STATE to all states in NEXT list."
+  ;; create a branch from STATE to all states in NEXT list
   (setf (tNFA-NFA-state-type state)  'branch
        (tNFA-NFA-state-label state) nil
        (tNFA-NFA-state-next state)  next)
@@ -161,18 +163,19 @@
 ;;;                        NFA fragments
 
 (defstruct
-  (NFA-fragment
+  (tNFA-fragment
    (:type vector)
    (:constructor nil)
-   (:constructor NFA-fragment-create (initial final))
+   (:constructor tNFA-fragment-create (initial final))
    (:copier nil))
   initial final)
 
 
-(defun NFA-fragment-patch (frag1 frag2)
-  "Patch FRAG2 onto end of FRAG1. Return value is meaningless."
-  (tNFA-NFA-state-patch (NFA-fragment-final frag1) (NFA-fragment-initial 
frag2))
-  (setf (NFA-fragment-final frag1) (NFA-fragment-final frag2)))
+(defun tNFA-fragment-patch (frag1 frag2)
+  ;; patch FRAG2 onto end of FRAG1; return value is meaningless
+  (tNFA-NFA-state-patch (tNFA-fragment-final frag1)
+                       (tNFA-fragment-initial frag2))
+  (setf (tNFA-fragment-final frag1) (tNFA-fragment-final frag2)))
 
 
 
@@ -180,7 +183,7 @@
 ;;;                      tag tables
 
 (defun tNFA-tags-create (num-tags min-tags max-tags)
-  "Construct a new tags table."
+  ;; construct a new tags table
   (let ((vec (make-vector num-tags nil)))
     (dolist (tag min-tags)
       (aset vec tag (cons -1 'min)))
@@ -190,7 +193,7 @@
 
 
 (defun tNFA-tags-copy (tags)
-  "Return a copy of TAGS table."
+  ;; return a copy of TAGS table
   (let* ((len (length tags))
         (vec (make-vector len nil)))
     (dotimes (i len)
@@ -200,24 +203,23 @@
 
 
 (defmacro tNFA-tags-set (tags tag val)
-  "Set value of TAG in TAGS table to VAL."
+  ;; set value of TAG in TAGS table to VAL
   `(setcar (aref ,tags ,tag) ,val))
 
 
 (defmacro tNFA-tags-get (tags tag)
-  "Get value of TAG in TAGS table."
+  ;; get value of TAG in TAGS table
   `(car (aref ,tags ,tag)))
 
 
 (defmacro tNFA-tags-type (tags tag)
-  "Return the symbol `min' if TAG in TAGS table is a minimize tag,
-`max' if it is a maximize tag."
+  ;; return tag type ('min or 'max)
   `(cdr (aref ,tags ,tag)))
 
 
 (defun tNFA-tags< (val tag tags)
-  "Return non-nil if VAL takes precedence over the value of TAG in TAGS table,
-otherwise return nil."
+  ;; return non-nil if VAL takes precedence over the value of TAG in TAGS
+  ;; table, nil otherwise
   (setq tag (aref tags tag))
   (or (and (eq (cdr tag) 'min)
           (< val (car tag)))
@@ -226,6 +228,26 @@ otherwise return nil."
           ))
 
 
+(defun tNFA-tags-to-groups (tags)
+  "Convert TAGS table to a list of indices of group matches.
+The nth element of the list is a cons cell, whose car is the
+starting index of the nth group and whose cdr is its end
+index. If a group didn't match, the corresponding list element
+will by null."
+  (let ((groups (make-list (/ (length tags) 2) nil))
+       group-stack
+       (grp 0))
+    (dotimes (i (length tags))
+      (if (eq (tNFA-tags-type tags i) 'max)
+         (unless (= (tNFA-tags-get tags i) -1)
+           (setf (nth (caar group-stack) groups)
+                 (cons (cdr (pop group-stack)) (tNFA-tags-get tags i))))
+       (unless (= (tNFA-tags-get tags i) -1)
+         (push (cons grp (tNFA-tags-get tags i)) group-stack))
+       (incf grp)))
+    groups))
+
+
 
 ;;; ----------------------------------------------------------------
 ;;;                      DFA states
@@ -284,10 +306,6 @@ otherwise return nil."
                         :test test))
 
 
-(defun tNFA-DFA-state-failed-p (state)
-  "Return t if STATE is a failed match, otherwise returns nil."
-  (null (tNFA-DFA-state-list state)))
-
 (defalias 'tNFA-DFA-state-match-p 'tNFA-DFA-state-match
   "Return non-nil if STATE is a matching state, otherwise returns nil.")
 
@@ -306,7 +324,7 @@ matches are always anchored, so `$' and `^' lose their 
special meanings.
 The return value is the initial state of the tagged NFA.
 
 The :test keyword argument specifies how to test whether two
-individual elements of a string are identical. The default is `eq'."
+individual elements of STRING are identical. The default is `eq'."
 
   ;; convert regexp to list, build NFA, and return initial state
   (declare (special NFA--state-id))
@@ -315,20 +333,31 @@ individual elements of a string are identical. The 
default is `eq'."
        (tNFA--from-regexp (append regexp nil) 0 '() '() 'top-level))
     (if regexp
        (error "Syntax error in regexp: missing \"(\"")
-      (setf (tNFA-NFA-state-type (NFA-fragment-final fragment)) 'match)
+      (setf (tNFA-NFA-state-type (tNFA-fragment-final fragment)) 'match)
       (tNFA-DFA-state-create-initial
        (tNFA-state-create-initial
-       (NFA-fragment-initial fragment) num-tags min-tags max-tags)
+       (tNFA-fragment-initial fragment) num-tags min-tags max-tags)
        :test test)
       )))
 
 
 
 (defun tNFA--from-regexp (regexp num-tags min-tags max-tags
-                                &optional top-level)
+                                &optional top-level shy-group)
+  ;; Construct a tagged NFA fragment from REGEXP, up to first end-group
+  ;; character or end of REGEXP. The TAGS arguments are used to pass the tags
+  ;; created so far. A non-nil TOP-LEVEL indicates that REGEXP is the complete
+  ;; regexp, so we're constructing the entire tNFA. A non-nil SHY-GROUP
+  ;; indicates that we're constructing a shy subgroup fragment. (Both optional
+  ;; arguments are only used for spotting syntax errors in REGEXP.)
+  ;;
+  ;; Returns a list: (FRAGMENT NUM-TAGS MIN-TAGS MAX-TAGS REGEXP). FRAGMENT is
+  ;; the constructed tNFA fragment, REGEXP is the remaining, unused portion of
+  ;; the regexp, and the TAGS return values give the tags created so far.
+
   (let* ((new (tNFA-NFA-state-create))
-        (fragment-stack (list (NFA-fragment-create new new)))
-        fragment attach token type)
+        (fragment-stack (list (tNFA-fragment-create new new)))
+        fragment attach token type group-end-tag)
 
     (catch 'constructed
       (while t
@@ -348,61 +377,68 @@ individual elements of a string are identical. The 
default is `eq'."
         ((and (eq type 'shy-group-end) top-level)
          (error "Syntax error in regexp: extra \")\" or missing \"(\""))
 
+        ;; syntax error: ) ending a shy group
+        ((and (eq type 'shy-group-end) (not shy-group))
+         (error "Syntax error in regexp: \"(\" matched with \")?\""))
+
+        ;; syntax error: )? ending a group
+        ((and (eq type 'group-end) shy-group)
+         (error "Syntax error in regexp: \"(?\" matched with \")\""))
+
         ;; syntax error: postfix operator not after atom
         ((or (eq type 'postfix*) (eq type 'postfix+) (eq type 'postfix?))
          (error "Syntax error in regexp: unexpected \"%s\""
                 (char-to-string token)))
 
+
         ;; regexp atom: construct new literal fragment
         ((or (eq type 'literal) (eq type 'wildcard)
              (eq type 'char-alt) (eq type 'neg-char-alt))
-         (setq new (tNFA-NFA-state-create type token (tNFA-NFA-state-create))
-               fragment (NFA-fragment-create new (tNFA-NFA-state-next new))))
+         (setq new
+               (tNFA-NFA-state-create type token (tNFA-NFA-state-create))
+               fragment
+               (tNFA-fragment-create new (tNFA-NFA-state-next new))))
 
         ;; shy subgroup start: recursively construct subgroup fragment
         ((eq type 'shy-group-start)
-         (setq new (tNFA--from-regexp regexp num-tags min-tags max-tags)
+         (setq new (tNFA--from-regexp regexp num-tags min-tags max-tags
+                                      nil t)
                num-tags (nth 1 new)
                min-tags (nth 2 new)
                max-tags (nth 3 new)
                regexp   (nth 4 new)
                fragment (nth 0 new)))
 
-        ;; subgroup start: recursively construct subgroup fragment, attaching
-        ;;                 minimize tag to the front
+        ;; subgroup start: add minimize tag to current fragment, and
+        ;;                 recursively construct subgroup fragment
         ((eq type 'group-start)
          (setq new (tNFA-NFA-state-create))
          (setq fragment
-               (NFA-fragment-create
+               (tNFA-fragment-create
                 (tNFA-NFA-state-create-tag
                  (car (push (1- (incf num-tags)) min-tags))
                  new)
                 new))
+         (tNFA-fragment-patch (car fragment-stack) fragment)
+         ;; reserve next tag number for subgroup end tag
+         (setq group-end-tag num-tags)
+         (incf num-tags)
+         ;; recursively construct subgroup fragment
          (setq new (tNFA--from-regexp regexp num-tags min-tags max-tags)
                num-tags (nth 1 new)
                min-tags (nth 2 new)
                max-tags (nth 3 new)
                regexp   (nth 4 new)
-               new      (nth 0 new))
-         (NFA-fragment-patch fragment new))
+               fragment (nth 0 new)))
 
 
         ;; end of regexp or subgroup: ...
         ((or (null type) (eq type 'shy-group-end) (eq type 'group-end))
 
-         ;; if fragment-stack contains only one fragment...
+         ;; if fragment-stack contains only one fragment, throw fragment up
+         ;; to recursion level above
          (cond
           ((null (nth 1 fragment-stack))
-           ;; if ending a group, add a maximize tag to end of fragment
-           (when (eq type 'group-end)
-             (setq new (tNFA-NFA-state-create)
-                   fragment (NFA-fragment-create
-                             (tNFA-NFA-state-create-tag
-                              (car (push (1- (incf num-tags)) max-tags))
-                              new)
-                             new))
-             (NFA-fragment-patch (car fragment-stack) fragment))
-           ;; throw fragment up to recursion level above
            (throw 'constructed
                   (list (car fragment-stack)
                         num-tags min-tags max-tags regexp)))
@@ -419,30 +455,20 @@ individual elements of a string are identical. The 
default is `eq'."
           ;;        \       .        /
           ;;                .
           (t
-           ;; create a new fragment containing start and end of alternation;
-           ;; if ending a group, make end of alternation a maximize tag
+           ;; create a new fragment containing start and end of alternation
            (setq fragment
-                 (NFA-fragment-create
+                 (tNFA-fragment-create
                   (tNFA-NFA-state-create-branch)
-                  (if (eq type 'group-end)
-                      (tNFA-NFA-state-create-tag
-                       (car (push (1- (incf num-tags)) max-tags))
-                       (tNFA-NFA-state-create))
-                    (tNFA-NFA-state-create))))
+                  (tNFA-NFA-state-create)))
            ;; patch alternation fragments into new fragment
            (dolist (frag fragment-stack)
-             (push (NFA-fragment-initial frag)
-                   (tNFA-NFA-state-next (NFA-fragment-initial fragment)))
-             (setf (tNFA-NFA-state-count (NFA-fragment-initial frag))
+             (push (tNFA-fragment-initial frag)
+                   (tNFA-NFA-state-next (tNFA-fragment-initial fragment)))
+             (setf (tNFA-NFA-state-count (tNFA-fragment-initial frag))
                    (incf (tNFA-NFA-state-in-degree
-                          (NFA-fragment-initial frag))))
-             (tNFA-NFA-state-make-epsilon (NFA-fragment-final frag)
-                                     (NFA-fragment-final fragment)))
-           ;; if ending a group, step the end of the fragment along one link,
-           ;; to the blank state linked from the tag
-           (when (eq type 'group-end)
-             (setf (NFA-fragment-final fragment)
-                   (tNFA-NFA-state-next (NFA-fragment-final fragment))))
+                          (tNFA-fragment-initial frag))))
+             (tNFA-NFA-state-make-epsilon (tNFA-fragment-final frag)
+                                     (tNFA-fragment-final fragment)))
            ;; throw constructed fragment up to recursion level above
            (throw 'constructed
                   (list fragment num-tags min-tags max-tags regexp)))
@@ -451,12 +477,12 @@ individual elements of a string are identical. The 
default is `eq'."
         ;; | alternation: start new fragment
         ((eq type 'alternation)
          (setq new (tNFA-NFA-state-create))
-         (push (NFA-fragment-create new new) fragment-stack)))
+         (push (tNFA-fragment-create new new) fragment-stack)))
 
 
        ;; ----- attach new fragment -----
        (when fragment
-         (setq attach (NFA-fragment-final (car fragment-stack)))
+         (setq attach (tNFA-fragment-final (car fragment-stack)))
          (if (or (eq (car regexp) ?*)
                  (eq (car regexp) ?+)
                  (eq (car regexp) ??))
@@ -482,10 +508,10 @@ individual elements of a string are identical. The 
default is `eq'."
                 ;;
                 ((eq type 'postfix*)
                  (tNFA-NFA-state-make-branch
-                  attach (list (NFA-fragment-initial fragment) new))
+                  attach (list (tNFA-fragment-initial fragment) new))
                  (tNFA-NFA-state-make-epsilon
-                  (NFA-fragment-final fragment) attach)
-                 (setf (NFA-fragment-final (car fragment-stack)) new))
+                  (tNFA-fragment-final fragment) attach)
+                 (setf (tNFA-fragment-final (car fragment-stack)) new))
 
                 ;;      .----.
                 ;;     /      \
@@ -495,10 +521,10 @@ individual elements of a string are identical. The 
default is `eq'."
                 ;;
                 ((eq type 'postfix+)
                  (tNFA-NFA-state-patch
-                  attach (NFA-fragment-initial fragment))
+                  attach (tNFA-fragment-initial fragment))
                  (tNFA-NFA-state-make-branch
-                  (NFA-fragment-final fragment) (list attach new))
-                 (setf (NFA-fragment-final (car fragment-stack)) new))
+                  (tNFA-fragment-final fragment) (list attach new))
+                 (setf (tNFA-fragment-final (car fragment-stack)) new))
 
                 ;;            .--fragment--.
                 ;;           /              \
@@ -507,16 +533,26 @@ individual elements of a string are identical. The 
default is `eq'."
                 ;;
                 ((eq type 'postfix?)
                  (tNFA-NFA-state-make-branch
-                  attach (list (NFA-fragment-initial fragment) new))
+                  attach (list (tNFA-fragment-initial fragment) new))
                  (tNFA-NFA-state-make-epsilon
-                  (NFA-fragment-final fragment) new)
-                 (setf (NFA-fragment-final (car fragment-stack)) new))
+                  (tNFA-fragment-final fragment) new)
+                 (setf (tNFA-fragment-final (car fragment-stack)) new))
                 ))
 
 
            ;; if next token is not a postfix operator, attach new fragment
            ;; onto end of current NFA fragment
-           (NFA-fragment-patch (car fragment-stack) fragment)))
+           (tNFA-fragment-patch (car fragment-stack) fragment))
+
+
+         ;; if ending a group, add a maximize tag to end
+         (when group-end-tag
+           (setq new (tNFA-NFA-state-create)
+                 fragment (tNFA-fragment-create
+                           (tNFA-NFA-state-create-tag group-end-tag new)
+                           new))
+           (push group-end-tag max-tags)
+           (tNFA-fragment-patch (car fragment-stack) fragment)))
        ))  ; end of infinite loop and catch
     ))
 
@@ -655,14 +691,14 @@ individual elements of a string are identical. The 
default is `eq'."
   ;; STATE-SET itself.)
   (let ((queue (queue-create))
        (result '())
-       (seen '())
+       (reset '())
        state next tags)
     ;; temporarily link the NFA states to their corresponding tNFA states, and
     ;; add them to the queue
     (dolist (t-state state-set)
       (setf state (tNFA-state-NFA-state t-state)
            (tNFA-NFA-state-tNFA-state state) t-state)
-      (push t-state seen)
+      (push state reset)
       (queue-enqueue queue state))
 
     (while (setq state (queue-dequeue queue))
@@ -677,7 +713,7 @@ individual elements of a string are identical. The default 
is `eq'."
            (setf (tNFA-NFA-state-tNFA-state next)
                  (tNFA-state-create
                   next (tNFA-tags-copy (tNFA-NFA-state-tags state))))
-           (push (tNFA-NFA-state-tNFA-state next) seen)
+           (push next reset)
            ;; if next state hasn't already been seen in-degree times, add it
            ;; to the end of the queue
            (if (/= (decf (tNFA-NFA-state-count next)) 0)
@@ -706,7 +742,7 @@ individual elements of a string are identical. The default 
is `eq'."
            (tNFA-tags-set tags (tNFA-NFA-state-tag state) pos)
            (setf (tNFA-NFA-state-tNFA-state next)
                  (tNFA-state-create next tags))
-           (push (tNFA-NFA-state-tNFA-state next) seen))
+           (push next reset))
          ;; if next state hasn't already been seen in-degree times, add it to
          ;; the end of the queue
          (if (/= (decf (tNFA-NFA-state-count next)) 0)
@@ -721,10 +757,9 @@ individual elements of a string are identical. The default 
is `eq'."
        ))
 
     ;; reset temporary NFA state link and count
-    (dolist (state seen)
-      (setf (tNFA-NFA-state-tNFA-state (tNFA-state-NFA-state state)) nil
-           (tNFA-NFA-state-count (tNFA-state-NFA-state state))
-             (tNFA-NFA-state-in-degree (tNFA-state-NFA-state state))))
+    (dolist (state reset)
+      (setf (tNFA-NFA-state-tNFA-state state) nil
+           (tNFA-NFA-state-count state) (tNFA-NFA-state-in-degree state)))
     ;; sort result states
     (sort result (lambda (a b) (< (tNFA-state-id a) (tNFA-state-id b))))
     ))
@@ -734,12 +769,15 @@ individual elements of a string are identical. The 
default is `eq'."
 ;;; ================================================================
 ;;;                       tNFA matching
 
-(defun tNFA-regexp-match (regexp string)
+(defun* tNFA-regexp-match (regexp string &key (test 'eq))
   "Return non-nil if STRING matches REGEXP, nil otherwise.
 Sets the match data if there was a match; see `match-beginning',
-`match-end' and `match-string'."
+`match-end' and `match-string'.
 
-  (let ((tNFA (tNFA-from-regexp regexp))
+The :test keyword argument specifies how to test whether two
+individual elements of STRING are identical. The default is `eq'."
+
+  (let ((tNFA (tNFA-from-regexp regexp :test test))
        (i -1) tags match-data group-stack (grp 0))
 
     ;; evolve tNFA according to characters of STRING
@@ -769,25 +807,4 @@ Sets the match data if there was a match; see 
`match-beginning',
        tags))))
 
 
-
-(defun tNFA-tags-to-groups (tags)
-  "Convert TAGS table to a list of indices of group matches.
-The nth element of the list is a cons cell, whose car is the
-starting index of the nth group and whose cdr is its end
-index. If a group didn't match, the corresponding list element
-will by null."
-  (let ((groups (make-list (/ (length tags) 2) nil))
-       group-stack
-       (grp 0))
-    (dotimes (i (length tags))
-      (if (eq (tNFA-tags-type tags i) 'max)
-         (unless (= (tNFA-tags-get tags i) -1)
-           (setf (nth (caar group-stack) groups)
-                 (cons (cdr (pop group-stack)) (tNFA-tags-get tags i))))
-       (unless (= (tNFA-tags-get tags i) -1)
-         (push (cons grp (tNFA-tags-get tags i)) group-stack))
-       (incf grp)))
-    groups))
-
-
 ;;; tNFA.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]