[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/tNFA 9a742f6 01/23: Implementation of tagged non-determ
From: |
Stefan Monnier |
Subject: |
[elpa] externals/tNFA 9a742f6 01/23: Implementation of tagged non-deterministic finite state automata, for regular expression matching |
Date: |
Mon, 14 Dec 2020 12:08:27 -0500 (EST) |
branch: externals/tNFA
commit 9a742f636b4e17c4f7cceb0c2b446c5a9f14f7e9
Author: Toby Cubitt <toby-predictive@dr-qubit.org>
Commit: tsc25 <toby-predictive@dr-qubit.org>
Implementation of tagged non-deterministic finite state automata, for
regular expression matching
---
tNFA.el | 793 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 793 insertions(+)
diff --git a/tNFA.el b/tNFA.el
new file mode 100644
index 0000000..cef5342
--- /dev/null
+++ b/tNFA.el
@@ -0,0 +1,793 @@
+
+;;; tNFA.el --- tagged non-deterministic finite-state automata package
+
+
+;; Copyright (C) 2008 Toby Cubitt
+
+;; Author: Toby Cubitt <toby-predictive@dr-qubit.org>
+;; Version: 0.1
+;; Keywords: TNFA, NFA, tagged, non-deterministic, finite state, automata
+;; URL: http://www.dr-qubit.org/emacs.php
+
+
+;; This file is NOT part of Emacs.
+;;
+;; This program is free software; you can redistribute it and/or
+;; modify it under the terms of the GNU General Public License
+;; as published by the Free Software Foundation; either version 2
+;; of the License, or (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program; if not, write to the Free Software
+;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+;; MA 02110-1301, USA.
+
+
+;;; Commentary:
+;;
+
+
+;;; Change Log:
+;;
+;; Version 0.1
+;; * initial version
+
+
+
+;;; Code:
+
+(eval-when-compile (require 'cl))
+
+(require 'queue)
+(provide 'tNFA)
+
+
+;;; ================================================================
+;;; Data structures
+
+;;; ----------------------------------------------------------------
+;;; tagged NFA states
+
+(defstruct
+ (tNFA-state
+ (:constructor nil)
+ (:constructor tNFA-state-create-initial
+ (NFA-state num-tags min-tags max-tags
+ &aux (tags (tNFA-tags-create num-tags min-tags max-tags))))
+ (:constructor tNFA-state-create (NFA-state tags))
+ (:copier nil))
+ NFA-state tags)
+
+(defmacro tNFA-state-id (state)
+ `(tNFA-NFA-state-id (tNFA-state-NFA-state ,state)))
+
+(defmacro tNFA-state-type (state)
+ `(tNFA-NFA-state-type (tNFA-state-NFA-state ,state)))
+
+(defmacro tNFA-state-label (state)
+ `(tNFA-NFA-state-label (tNFA-state-NFA-state ,state)))
+
+(defmacro tNFA-state-in-degree (state)
+ `(tNFA-NFA-state-in-degree (tNFA-state-NFA-state ,state)))
+
+(defmacro tNFA-state-next (state)
+ `(tNFA-NFA-state-next (tNFA-state-NFA-state ,state)))
+
+(defmacro tNFA-state-count (state)
+ `(tNFA-NFA-state-count (tNFA-state-NFA-state ,state)))
+
+
+
+;;; ----------------------------------------------------------------
+;;; NFA states
+
+(declare (special NFA--state-id))
+
+(defstruct
+ (tNFA-NFA-state
+ (:type vector)
+ (:constructor nil)
+ (:constructor tNFA-NFA-state-create
+ (&optional type label next
+ &aux
+ (in-degree 0)
+ (count 0)
+ (id (incf NFA--state-id))
+ (dummy (when next
+ (setf (tNFA-NFA-state-count next)
+ (incf (tNFA-NFA-state-in-degree next)))))))
+ (:constructor tNFA-NFA-state-create-branch
+ (&rest next
+ &aux
+ (type 'branch)
+ (in-degree 0)
+ (count 0)
+ (id (incf NFA--state-id))))
+ (:constructor tNFA-NFA-state-create-tag
+ (tag &optional next
+ &aux
+ (type 'tag)
+ (label (progn (message "%d" tag) tag))
+ (in-degree 0)
+ (count 0)
+ (id (incf NFA--state-id))
+ (dummy (when next
+ (setf (tNFA-NFA-state-count next)
+ (incf (tNFA-NFA-state-in-degree next)))))))
+ (:copier nil))
+ id type label in-degree
+ count tNFA-state ; used internally in NFA evolution algorithms
+ next)
+
+(defalias 'tNFA-NFA-state-tag 'tNFA-NFA-state-label)
+
+(defmacro tNFA-NFA-state-tags (state)
+ `(tNFA-state-tags (tNFA-NFA-state-tNFA-state ,state)))
+
+
+(defun tNFA-NFA-state-patch (attach state)
+ "Patch STATE onto ATTACH. Return value is meaningless."
+ (setf (tNFA-NFA-state-type attach) (tNFA-NFA-state-type state)
+ (tNFA-NFA-state-label attach) (tNFA-NFA-state-label state)
+ (tNFA-NFA-state-next attach) (tNFA-NFA-state-next state)
+ (tNFA-NFA-state-count state) (incf (tNFA-NFA-state-in-degree state))
+ ))
+
+
+(defun tNFA-NFA-state-make-epsilon (state next)
+ "Create an epsilon transition from STATE to NEXT."
+ (setf (tNFA-NFA-state-type state) 'epsilon
+ (tNFA-NFA-state-label state) nil
+ (tNFA-NFA-state-next state) next
+ (tNFA-NFA-state-count next) (incf (tNFA-NFA-state-in-degree next))))
+
+
+(defun tNFA-NFA-state-make-branch (state next)
+ "Create a branch from STATE to all states in NEXT list."
+ (setf (tNFA-NFA-state-type state) 'branch
+ (tNFA-NFA-state-label state) nil
+ (tNFA-NFA-state-next state) next)
+ (dolist (n next)
+ (setf (tNFA-NFA-state-count n) (incf (tNFA-NFA-state-in-degree n)))))
+
+
+
+;;; ----------------------------------------------------------------
+;;; NFA fragments
+
+(defstruct
+ (NFA-fragment
+ (:type vector)
+ (:constructor nil)
+ (:constructor NFA-fragment-create (initial final))
+ (:copier nil))
+ initial final)
+
+
+(defun NFA-fragment-patch (frag1 frag2)
+ "Patch FRAG2 onto end of FRAG1. Return value is meaningless."
+ (tNFA-NFA-state-patch (NFA-fragment-final frag1) (NFA-fragment-initial
frag2))
+ (setf (NFA-fragment-final frag1) (NFA-fragment-final frag2)))
+
+
+
+;;; ----------------------------------------------------------------
+;;; tag tables
+
+(defun tNFA-tags-create (num-tags min-tags max-tags)
+ "Construct a new tags table."
+ (let ((vec (make-vector num-tags nil)))
+ (dolist (tag min-tags)
+ (aset vec tag (cons -1 'min)))
+ (dolist (tag max-tags)
+ (aset vec tag (cons -1 'max)))
+ vec))
+
+
+(defun tNFA-tags-copy (tags)
+ "Return a copy of TAGS table."
+ (let* ((len (length tags))
+ (vec (make-vector len nil)))
+ (dotimes (i len)
+ (aset vec i (cons (car (aref tags i))
+ (cdr (aref tags i)))))
+ vec))
+
+
+(defmacro tNFA-tags-set (tags tag val)
+ "Set value of TAG in TAGS table to VAL."
+ `(setcar (aref ,tags ,tag) ,val))
+
+
+(defmacro tNFA-tags-get (tags tag)
+ "Get value of TAG in TAGS table."
+ `(car (aref ,tags ,tag)))
+
+
+(defmacro tNFA-tags-type (tags tag)
+ "Return the symbol `min' if TAG in TAGS table is a minimize tag,
+`max' if it is a maximize tag."
+ `(cdr (aref ,tags ,tag)))
+
+
+(defun tNFA-tags< (val tag tags)
+ "Return non-nil if VAL takes precedence over the value of TAG in TAGS table,
+otherwise return nil."
+ (setq tag (aref tags tag))
+ (or (and (eq (cdr tag) 'min)
+ (< val (car tag)))
+ ;;(and (eq (cdr tag) 'max)
+ (> val (car tag));)
+ ))
+
+
+
+;;; ----------------------------------------------------------------
+;;; DFA states
+
+(defstruct
+ (tNFA-DFA-state
+ :named
+ (:constructor nil)
+ (:constructor tNFA--DFA-state-create
+ (list pool
+ &key (test 'eq)
+ &aux
+ (transitions (make-hash-table :test test))))
+ (:constructor tNFA-DFA-state-create-failed ())
+ (:copier nil))
+ list transitions wildcard match pool)
+
+
+(defun* tNFA-DFA-state-create (state-list state-pool &key (test 'eq))
+ ;; create DFA state and add it to the state pool
+ (let ((DFA-state (tNFA--DFA-state-create
+ state-list state-pool :test test)))
+ (puthash state-list DFA-state (tNFA-DFA-state-pool DFA-state))
+
+ (dolist (state state-list)
+ ;; if state in state list is...
+ (cond
+ ;; literal state: add literal transition
+ ((eq (tNFA-state-type state) 'literal)
+ (puthash (tNFA-state-label state) t
+ (tNFA-DFA-state-transitions DFA-state)))
+
+ ;; character alternative: add transitions for all alternatives
+ ((eq (tNFA-state-type state) 'char-alt)
+ (dolist (c (tNFA-state-label state))
+ (puthash c t (tNFA-DFA-state-transitions DFA-state))))
+
+ ;; wildcard or negated character alternative: add wildcard transistion
+ ((or (eq (tNFA-state-type state) 'wildcard)
+ (eq (tNFA-state-type state) 'neg-char-alt))
+ (setf (tNFA-DFA-state-wildcard DFA-state) t))
+
+ ;; match state: set match tags
+ ((eq (tNFA-state-type state) 'match)
+ (setf (tNFA-DFA-state-match DFA-state)
+ (tNFA-state-tags state)))))
+
+ ;; return constructed state
+ DFA-state))
+
+
+(defun* tNFA-DFA-state-create-initial (initial-state &key (test 'eq))
+ ;; create initial DFA state from initial tNFA state INITIAL-STATE
+ (tNFA-DFA-state-create (list initial-state)
+ (make-hash-table :test 'equal)
+ :test test))
+
+
+(defun tNFA-DFA-state-failed-p (state)
+ "Return t if STATE is a failed match, otherwise returns nil."
+ (null (tNFA-DFA-state-list state)))
+
+(defalias 'tNFA-DFA-state-match-p 'tNFA-DFA-state-match
+ "Return non-nil if STATE is a matching state, otherwise returns nil.")
+
+
+
+
+;;; ================================================================
+;;; Regexp -> tNFA
+
+(defun* tNFA-from-regexp (regexp &key (test 'eq))
+ "Create a tagged NFA that recognizes the regular expression REGEXP.
+
+Back-references and non-greedy postfix operators are *not* supported, and the
+matches are always anchored, so `$' and `^' lose their special meanings.
+
+The return value is the initial state of the tagged NFA.
+
+The :test keyword argument specifies how to test whether two
+individual elements of a string are identical. The default is `eq'."
+
+ ;; convert regexp to list, build NFA, and return initial state
+ (declare (special NFA--state-id))
+ (destructuring-bind (fragment num-tags min-tags max-tags regexp)
+ (let ((NFA--state-id -1))
+ (tNFA--from-regexp (append regexp nil) 0 '() '() 'top-level))
+ (if regexp
+ (error "Syntax error in regexp: missing \"(\"")
+ (setf (tNFA-NFA-state-type (NFA-fragment-final fragment)) 'match)
+ (tNFA-DFA-state-create-initial
+ (tNFA-state-create-initial
+ (NFA-fragment-initial fragment) num-tags min-tags max-tags)
+ :test test)
+ )))
+
+
+
+(defun tNFA--from-regexp (regexp num-tags min-tags max-tags
+ &optional top-level)
+ (let* ((new (tNFA-NFA-state-create))
+ (fragment-stack (list (NFA-fragment-create new new)))
+ fragment attach token type)
+
+ (catch 'constructed
+ (while t
+ (setq regexp (NFA-regexp-next-token regexp)
+ type (nth 0 regexp)
+ token (nth 1 regexp)
+ regexp (nth 2 regexp))
+ (setq fragment nil)
+
+ ;; ----- construct new fragment -----
+ (cond
+ ;; syntax error: missing )
+ ((and (null type) (not top-level))
+ (error "Syntax error in regexp: extra \"(\" or missing \")\""))
+
+ ;; syntax error: extra )
+ ((and (eq type 'shy-group-end) top-level)
+ (error "Syntax error in regexp: extra \")\" or missing \"(\""))
+
+ ;; syntax error: postfix operator not after atom
+ ((or (eq type 'postfix*) (eq type 'postfix+) (eq type 'postfix?))
+ (error "Syntax error in regexp: unexpected \"%s\""
+ (char-to-string token)))
+
+ ;; regexp atom: construct new literal fragment
+ ((or (eq type 'literal) (eq type 'wildcard)
+ (eq type 'char-alt) (eq type 'neg-char-alt))
+ (setq new (tNFA-NFA-state-create type token (tNFA-NFA-state-create))
+ fragment (NFA-fragment-create new (tNFA-NFA-state-next new))))
+
+ ;; shy subgroup start: recursively construct subgroup fragment
+ ((eq type 'shy-group-start)
+ (setq new (tNFA--from-regexp regexp num-tags min-tags max-tags)
+ num-tags (nth 1 new)
+ min-tags (nth 2 new)
+ max-tags (nth 3 new)
+ regexp (nth 4 new)
+ fragment (nth 0 new)))
+
+ ;; subgroup start: recursively construct subgroup fragment, attaching
+ ;; minimize tag to the front
+ ((eq type 'group-start)
+ (setq new (tNFA-NFA-state-create))
+ (setq fragment
+ (NFA-fragment-create
+ (tNFA-NFA-state-create-tag
+ (car (push (1- (incf num-tags)) min-tags))
+ new)
+ new))
+ (setq new (tNFA--from-regexp regexp num-tags min-tags max-tags)
+ num-tags (nth 1 new)
+ min-tags (nth 2 new)
+ max-tags (nth 3 new)
+ regexp (nth 4 new)
+ new (nth 0 new))
+ (NFA-fragment-patch fragment new))
+
+
+ ;; end of regexp or subgroup: ...
+ ((or (null type) (eq type 'shy-group-end) (eq type 'group-end))
+
+ ;; if fragment-stack contains only one fragment...
+ (cond
+ ((null (nth 1 fragment-stack))
+ ;; if ending a group, add a maximize tag to end of fragment
+ (when (eq type 'group-end)
+ (setq new (tNFA-NFA-state-create)
+ fragment (NFA-fragment-create
+ (tNFA-NFA-state-create-tag
+ (car (push (1- (incf num-tags)) max-tags))
+ new)
+ new))
+ (NFA-fragment-patch (car fragment-stack) fragment))
+ ;; throw fragment up to recursion level above
+ (throw 'constructed
+ (list (car fragment-stack)
+ num-tags min-tags max-tags regexp)))
+
+ ;; if fragment-stack contains multiple alternation fragments,
+ ;; attach them all together
+ ;;
+ ;; .--fragment--.
+ ;; / \
+ ;; /----fragment----\
+ ;; / \
+ ;; ---o------fragment------o--->
+ ;; \ . /
+ ;; \ . /
+ ;; .
+ (t
+ ;; create a new fragment containing start and end of alternation;
+ ;; if ending a group, make end of alternation a maximize tag
+ (setq fragment
+ (NFA-fragment-create
+ (tNFA-NFA-state-create-branch)
+ (if (eq type 'group-end)
+ (tNFA-NFA-state-create-tag
+ (car (push (1- (incf num-tags)) max-tags))
+ (tNFA-NFA-state-create))
+ (tNFA-NFA-state-create))))
+ ;; patch alternation fragments into new fragment
+ (dolist (frag fragment-stack)
+ (push (NFA-fragment-initial frag)
+ (tNFA-NFA-state-next (NFA-fragment-initial fragment)))
+ (setf (tNFA-NFA-state-count (NFA-fragment-initial frag))
+ (incf (tNFA-NFA-state-in-degree
+ (NFA-fragment-initial frag))))
+ (tNFA-NFA-state-make-epsilon (NFA-fragment-final frag)
+ (NFA-fragment-final fragment)))
+ ;; if ending a group, step the end of the fragment along one link,
+ ;; to the blank state linked from the tag
+ (when (eq type 'group-end)
+ (setf (NFA-fragment-final fragment)
+ (tNFA-NFA-state-next (NFA-fragment-final fragment))))
+ ;; throw constructed fragment up to recursion level above
+ (throw 'constructed
+ (list fragment num-tags min-tags max-tags regexp)))
+ ))
+
+ ;; | alternation: start new fragment
+ ((eq type 'alternation)
+ (setq new (tNFA-NFA-state-create))
+ (push (NFA-fragment-create new new) fragment-stack)))
+
+
+ ;; ----- attach new fragment -----
+ (when fragment
+ (setq attach (NFA-fragment-final (car fragment-stack)))
+ (if (or (eq (car regexp) ?*)
+ (eq (car regexp) ?+)
+ (eq (car regexp) ??))
+ (if (eq type 'alternation)
+ (error "Syntax error in regexp: unexpected \"%s\""
+ (char-to-string token))
+
+ ;; if next token is a postfix operator, splice new fragment
+ ;; into NFA as appropriate
+ (setq regexp (NFA-regexp-next-token regexp)
+ type (nth 0 regexp)
+ token (nth 1 regexp)
+ regexp (nth 2 regexp))
+ (setq new (tNFA-NFA-state-create))
+
+ (cond
+
+ ;; .--fragment--.
+ ;; / \
+ ;; \ ______/
+ ;; \ /
+ ;; ---attach-----new---
+ ;;
+ ((eq type 'postfix*)
+ (tNFA-NFA-state-make-branch
+ attach (list (NFA-fragment-initial fragment) new))
+ (tNFA-NFA-state-make-epsilon
+ (NFA-fragment-final fragment) attach)
+ (setf (NFA-fragment-final (car fragment-stack)) new))
+
+ ;; .----.
+ ;; / \
+ ;; / \
+ ;; \ /
+ ;; ---fragment-----new---
+ ;;
+ ((eq type 'postfix+)
+ (tNFA-NFA-state-patch
+ attach (NFA-fragment-initial fragment))
+ (tNFA-NFA-state-make-branch
+ (NFA-fragment-final fragment) (list attach new))
+ (setf (NFA-fragment-final (car fragment-stack)) new))
+
+ ;; .--fragment--.
+ ;; / \
+ ;; ---attach new---
+ ;; \______________/
+ ;;
+ ((eq type 'postfix?)
+ (tNFA-NFA-state-make-branch
+ attach (list (NFA-fragment-initial fragment) new))
+ (tNFA-NFA-state-make-epsilon
+ (NFA-fragment-final fragment) new)
+ (setf (NFA-fragment-final (car fragment-stack)) new))
+ ))
+
+
+ ;; if next token is not a postfix operator, attach new fragment
+ ;; onto end of current NFA fragment
+ (NFA-fragment-patch (car fragment-stack) fragment)))
+ )) ; end of infinite loop and catch
+ ))
+
+
+
+(defun NFA-regexp-next-token (regexp)
+ ;; if regexp is empty, return null values for next token type, token and
+ ;; remaining regexp
+ (if (null regexp)
+ (list nil nil nil)
+
+ (let ((token (pop regexp))
+ (type 'literal)) ; assume token is literal initially
+ (cond
+
+ ;; [: gobble up to closing ]
+ ((eq token ?\[)
+ ;; character alternatives are stored in lists
+ (setq token '())
+ (cond
+ ;; gobble ] appearing straight after [
+ ((eq (car regexp) ?\]) (push (pop regexp) token))
+ ;; gobble ] appearing straight after [^
+ ((and (eq (car regexp) ?^) (eq (nth 1 regexp) ?\]))
+ (push (pop regexp) token)
+ (push (pop regexp) token)))
+ ;; gobble everything up to closing ]
+ (while (not (eq (car regexp) ?\]))
+ (push (pop regexp) token)
+ (unless regexp
+ (error "Syntax error in regexp: missing \"]\"")))
+ (pop regexp) ; dump closing ]
+ (if (not (eq (car (last token)) ?^))
+ (setq type 'char-alt)
+ (setq type 'neg-char-alt)
+ (setq token (butlast token))))
+
+ ;; ]: syntax error (always gobbled when parsing [)
+ ((eq token ?\])
+ (error "Syntax error in regexp: missing \"[\""))
+
+ ;; . * + ?: set appropriate type
+ ((eq token ?*) (setq type 'postfix*))
+ ((eq token ?+) (setq type 'postfix+))
+ ((eq token ??) (setq type 'postfix?))
+ ((eq token ?.) (setq type 'wildcard))
+
+ ;; \: look at next character
+ ((eq token ?\\)
+ (unless (setq token (pop regexp))
+ (error "Syntax error in regexp: missing character after \"\\\""))
+ (cond
+ ((eq token ?|) (setq type 'alternation))
+ ((and (eq token ?\() (eq (car regexp) ??))
+ (setq type 'shy-group-start)
+ (pop regexp))
+ ((and (eq token ?\)) (eq (car regexp) ??))
+ (setq type 'shy-group-end)
+ (pop regexp))
+ ((eq token ?\() (setq type 'group-start))
+ ((eq token ?\)) (setq type 'group-end))))
+ )
+
+ ;; return first token type, token, and remaining regexp
+ (list type token regexp))))
+
+
+
+;;; ================================================================
+;;; tNFA evolution
+
+(defun tNFA-next-state (DFA-state chr pos)
+ (let (state)
+ ;; if there is a transition for character CHR...
+ (cond
+ ((setq state (gethash chr (tNFA-DFA-state-transitions DFA-state)))
+ ;; if next state has not already been computed, do so
+ (unless (tNFA-DFA-state-p state)
+ (setq state (tNFA--DFA-next-state DFA-state chr pos nil))
+ (puthash chr state (tNFA-DFA-state-transitions DFA-state))))
+
+ ;; if there's a wildcard transition...
+ ((setq state (tNFA-DFA-state-wildcard DFA-state))
+ ;; if next state has not already been computed, do so
+ (unless (tNFA-DFA-state-p state)
+ (setq state (tNFA--DFA-next-state DFA-state chr pos t))
+ (setf (tNFA-DFA-state-wildcard DFA-state) state))))
+ state))
+
+
+
+(defun tNFA--DFA-next-state (DFA-state chr pos wildcard)
+ (let (state-list state)
+ ;; add all states reached by a CHR transition from DFA-STATE to state list
+ (if wildcard
+ (dolist (state (tNFA-DFA-state-list DFA-state))
+ (when (or (eq (tNFA-state-type state) 'wildcard)
+ (and (eq (tNFA-state-type state) 'neg-char-alt)
+ (not (memq chr (tNFA-state-label state)))))
+ (push (tNFA-state-create (tNFA-state-next state)
+ (tNFA-tags-copy (tNFA-state-tags state)))
+ state-list)))
+ (dolist (state (tNFA-DFA-state-list DFA-state))
+ (when (or (and (eq (tNFA-state-type state) 'literal)
+ (eq chr (tNFA-state-label state)))
+ (and (eq (tNFA-state-type state) 'char-alt)
+ (memq chr (tNFA-state-label state)))
+ (and (eq (tNFA-state-type state) 'neg-char-alt)
+ (not (memq chr (tNFA-state-label state))))
+ (eq (tNFA-state-type state) 'wildcard))
+ (push (tNFA-state-create (tNFA-state-next state)
+ (tNFA-tags-copy (tNFA-state-tags state)))
+ state-list))))
+
+ ;; if state list is empty, return empty, failure DFA state
+ (when state-list
+ ;; otherwise, construct new DFA state and add it to the pool if it's not
+ ;; already there
+ (setq state-list (tNFA-epsilon-boundary state-list (1+ pos)))
+ (setq state
+ (or (gethash state-list (tNFA-DFA-state-pool DFA-state))
+ (tNFA-DFA-state-create
+ state-list
+ (tNFA-DFA-state-pool DFA-state)
+ :test
+ (hash-table-test (tNFA-DFA-state-transitions DFA-state)))))
+ ;; return next state
+ state)))
+
+
+
+(defun tNFA-epsilon-boundary (state-set pos)
+ ;; Return the tagged epsilon-closure of the tNFA states listed in STATE-SET,
+ ;; that is the set of all states that can be reached via only epsilon
+ ;; transitions from some state in STATE-SET. (This includes all states in
+ ;; STATE-SET itself.)
+ (let ((queue (queue-create))
+ (result '())
+ (seen '())
+ state next tags)
+ ;; temporarily link the NFA states to their corresponding tNFA states, and
+ ;; add them to the queue
+ (dolist (t-state state-set)
+ (setf state (tNFA-state-NFA-state t-state)
+ (tNFA-NFA-state-tNFA-state state) t-state)
+ (push t-state seen)
+ (queue-enqueue queue state))
+
+ (while (setq state (queue-dequeue queue))
+ (cond
+ ;; branch or epsilon: add next states as necessary, copying tags across
+ ((or (eq (tNFA-NFA-state-type state) 'branch)
+ (eq (tNFA-NFA-state-type state) 'epsilon))
+ (dolist (next (if (eq (tNFA-NFA-state-type state) 'epsilon)
+ (list (tNFA-NFA-state-next state))
+ (tNFA-NFA-state-next state)))
+ (unless (tNFA-NFA-state-tNFA-state next)
+ (setf (tNFA-NFA-state-tNFA-state next)
+ (tNFA-state-create
+ next (tNFA-tags-copy (tNFA-NFA-state-tags state))))
+ (push (tNFA-NFA-state-tNFA-state next) seen)
+ ;; if next state hasn't already been seen in-degree times, add it
+ ;; to the end of the queue
+ (if (/= (decf (tNFA-NFA-state-count next)) 0)
+ (queue-enqueue queue next)
+ ;; if it has now been seen in-degree times, reset count and add
+ ;; it back to the front of the queue
+ (setf (tNFA-NFA-state-count next)
+ (tNFA-NFA-state-in-degree next))
+ (queue-prepend queue next)))))
+
+ ;; tag: add next state if necessary, updating tags if necessary
+ ((eq (tNFA-NFA-state-type state) 'tag)
+ (setq next (tNFA-NFA-state-next state))
+ ;; if next state is not already in results list, or it is already in
+ ;; results but new tag value takes precedence...
+ (when (or (not (tNFA-NFA-state-tNFA-state next))
+ (tNFA-tags< pos (tNFA-NFA-state-tag state)
+ (tNFA-NFA-state-tags next)))
+ ;; if next state is already in results, update tag value
+ (if (tNFA-NFA-state-tNFA-state next)
+ (tNFA-tags-set (tNFA-NFA-state-tags next)
+ (tNFA-NFA-state-tag state) pos)
+ ;; if state is not already in results, copy tags, updating tag
+ ;; value, and add next state to results list
+ (setq tags (tNFA-tags-copy (tNFA-NFA-state-tags state)))
+ (tNFA-tags-set tags (tNFA-NFA-state-tag state) pos)
+ (setf (tNFA-NFA-state-tNFA-state next)
+ (tNFA-state-create next tags))
+ (push (tNFA-NFA-state-tNFA-state next) seen))
+ ;; if next state hasn't already been seen in-degree times, add it to
+ ;; the end of the queue
+ (if (/= (decf (tNFA-NFA-state-count next)) 0)
+ (queue-enqueue queue next)
+ ;; if it has now been seen in-degree times, reset count and add it
+ ;; back to the front of the queue
+ (setf (tNFA-NFA-state-count next) (tNFA-NFA-state-in-degree next))
+ (queue-prepend queue next))))
+
+ ;; anything else is a non-epsilon-transition state, so add it to result
+ (t (push (tNFA-NFA-state-tNFA-state state) result))
+ ))
+
+ ;; reset temporary NFA state link and count
+ (dolist (state seen)
+ (setf (tNFA-NFA-state-tNFA-state (tNFA-state-NFA-state state)) nil
+ (tNFA-NFA-state-count (tNFA-state-NFA-state state))
+ (tNFA-NFA-state-in-degree (tNFA-state-NFA-state state))))
+ ;; sort result states
+ (sort result (lambda (a b) (< (tNFA-state-id a) (tNFA-state-id b))))
+ ))
+
+
+
+;;; ================================================================
+;;; tNFA matching
+
+(defun tNFA-regexp-match (regexp string)
+ "Return non-nil if STRING matches REGEXP, nil otherwise.
+Sets the match data if there was a match; see `match-beginning',
+`match-end' and `match-string'."
+
+ (let ((tNFA (tNFA-from-regexp regexp))
+ (i -1) tags match-data group-stack (grp 0))
+
+ ;; evolve tNFA according to characters of STRING
+ (catch 'fail
+ (dolist (chr (append string nil))
+ (unless (setq tNFA (tNFA-next-state tNFA chr (incf i)))
+ (throw 'fail nil)))
+
+ ;; if REGEXP matched...
+ (when (setq tags (tNFA-DFA-state-match tNFA))
+ (setq match-data (make-list (+ (length tags) 2) nil))
+ ;; set match data
+ (setf (nth 0 match-data) 0
+ (nth 1 match-data) (length string))
+ ;; set group match data if there were any groups
+ (dotimes (i (length tags))
+ (if (eq (tNFA-tags-type tags i) 'max)
+ (unless (= (tNFA-tags-get tags i) -1)
+ (setf (nth (1+ (* 2 (pop group-stack))) match-data)
+ (tNFA-tags-get tags i)))
+ (incf grp)
+ (unless (= (tNFA-tags-get tags i) -1)
+ (push grp group-stack)
+ (setf (nth (* 2 grp) match-data)
+ (tNFA-tags-get tags i)))))
+ (set-match-data match-data)
+ tags))))
+
+
+
+(defun tNFA-tags-to-groups (tags)
+ "Convert TAGS table to a list of indices of group matches.
+The nth element of the list is a cons cell, whose car is the
+starting index of the nth group and whose cdr is its end
+index. If a group didn't match, the corresponding list element
+will by null."
+ (let ((groups (make-list (/ (length tags) 2) nil))
+ group-stack
+ (grp 0))
+ (dotimes (i (length tags))
+ (if (eq (tNFA-tags-type tags i) 'max)
+ (unless (= (tNFA-tags-get tags i) -1)
+ (setf (nth (caar group-stack) groups)
+ (cons (cdr (pop group-stack)) (tNFA-tags-get tags i))))
+ (unless (= (tNFA-tags-get tags i) -1)
+ (push (cons grp (tNFA-tags-get tags i)) group-stack))
+ (incf grp)))
+ groups))
+
+
+;;; tNFA.el ends here
- [elpa] externals/tNFA 9e1ca74 13/23: Added changelog entries, and bumped tNFA version number., (continued)
- [elpa] externals/tNFA 9e1ca74 13/23: Added changelog entries, and bumped tNFA version number., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 83ab8b3 10/23: Re-filled to 72 chars/line, for mailing to gnu-emacs-sources list, Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA b457403 14/23: Trivial docstring and comment fixes., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 1af1e58 22/23: Implement trie-fuzzy-match and trie-fuzzy-complete functions., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 7b44eeb 02/23: Bug-fix in tNFA--from-regexp: add tag transitions *outside* their group fragment,, Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 5463a53 07/23: Bug-fix to \{...\} postfix operator processing in tNFA--from-regexp, Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 4771c2f 12/23: Redefined tNFA--NFA-state-create and tNFA--NFA-state-create-tag using defun, Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 5f3bdf7 21/23: Enable lexical binding, and fix issues it picks up., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 7e38f4c 19/23: Add missing autoload cookies., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 454c544 09/23: Added commentary, Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 9a742f6 01/23: Implementation of tagged non-deterministic finite state automata, for regular expression matching,
Stefan Monnier <=
- [elpa] externals/tNFA b035e48 11/23: Removed left-over debugging code and other minor tidying., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA ff30781 18/23: More minor whitespace and commentary changes., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA c5004e1 08/23: Updated docstrings for regexp-related functions and others., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 87c6223 15/23: Updated Package-Version, Package-Requires, and Keywords package headers., Stefan Monnier, 2020/12/14