emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/parser-generator 23d56a077d 05/19: More work on lexer w


From: Christian Johansson
Subject: [elpa] externals/parser-generator 23d56a077d 05/19: More work on lexer without global state
Date: Wed, 10 Jan 2024 15:35:22 -0500 (EST)

branch: externals/parser-generator
commit 23d56a077d2007f0d02f830e21ed2feef7876143
Author: Christian Johansson <christian@cvj.se>
Commit: Christian Johansson <christian@cvj.se>

    More work on lexer without global state
---
 docs/Lexical-Analysis.md                   | 27 ++++++++--
 parser-generator-lex-analyzer.el           | 55 ++++++++++++++------
 test/parser-generator-lex-analyzer-test.el | 10 ++--
 test/parser-generator-lr-test.el           | 83 +++++++++++++++++-------------
 4 files changed, 117 insertions(+), 58 deletions(-)

diff --git a/docs/Lexical-Analysis.md b/docs/Lexical-Analysis.md
index 4be8e6dba0..ac17c11e00 100644
--- a/docs/Lexical-Analysis.md
+++ b/docs/Lexical-Analysis.md
@@ -4,13 +4,29 @@ Set lexical analysis function by setting variable 
`parser-generator-lex-analyzer
 
 The lexical analysis is internally indexed on a local variable 
`parser-generator-lex-analyzer--index` and has it optional state in the local 
variable `parser-generation-lex-analyzer--state`. The initial values for the 
index and state can be set in variables 
`parser-generation-lex-analyzer--index-init` and 
`parser-generator-lex-analyzer--state-init`.
 
-All parsers expect a list as response from lexical-analysis, the first item in 
the list should be a list of tokens. The second is "move index"-flag, if it is 
non-nil it is expected to be a integer representing the index to move the 
lex-analyzer to. Third item is the new index after the lex. The fourth item is 
the new state after the lex.
+All parsers expect a list as response from lexical-analysis, the first item in 
the list should be a list of one or more tokens. The second is "move 
index"-flag, if it is non-nil it is expected to be a integer representing the 
index to move the lex-analyzer to and perform a new lex. Third item is the new 
index after the lex. The fourth item is the new state after the lex.
 
 To enable exporting, the functions need to be specified in a way that the 
entire body is within the same block, do that using `(let)` or `(progn)` for 
example.
 
+```emacs-lisp
+  (setq
+   parser-generator-lex-analyzer--function
+   (lambda (index _state)
+     (let* ((string '(("a" 1 . 2) ("a" 2 . 3) ("b" 3 . 4) ("b" 4 . 5)))
+            (string-length (length string))
+            (max-index index)
+            (tokens))
+       (while (and
+               (< (1- index) string-length)
+               (< (1- index) max-index))
+         (push (nth (1- index) string) tokens)
+         (setq index (1+ index)))
+       (list tokens nil index nil))))
+```
+
 ## Token
 
-A token is defined as a list with 3 elements, first is a string or symbol, 
second is the start index of token in stream and third is the end index of 
token in stream, second and third element have a dot between them, this 
structure is to be compatible with Emacs Semantic system. Example token:
+A token is defined as a list with 3 elements, first is a string or symbol, 
second is the start index (including) of the token in the stream and third is 
the end index (excluding) of token in stream, second and third element have a 
dot between them, this structure is to be compatible with Emacs Semantic 
system. Example token:
 
 ``` emacs-lisp
 '("a" 1 . 2)
@@ -22,6 +38,8 @@ or
 '(a 1 . 2)
 ```
 
+Which is a token that starts before position 1 and ends after position 2.
+
 ## Peek next look-ahead
 
 Returns the look-ahead number of next terminals in stream, if end of stream is 
reached a EOF-identifier is returned. Result is expected to be a list with each 
token in it.
@@ -78,12 +96,13 @@ Returns the next token in stream and moves the lexical 
analyzer index one point
      (let* ((string '(("a" 1 . 2) ("b" 2 . 3)))
             (string-length (length string))
             (max-index index)
-            (tokens))
+            (tokens)
+            (new-index))
        (while (and
                (< (1- index) string-length)
                (< (1- index) max-index))
+         (setq new-index (cdr (cdr (nth (1- index) string))))
          (push (nth (1- index) string) tokens)
-         (list (nreverse tokens) nil new-index nil)
          (setq index (1+ index)))
        (list (nreverse tokens) nil new-index nil))))
 (parser-generator-lex-analyzer--reset)
diff --git a/parser-generator-lex-analyzer.el b/parser-generator-lex-analyzer.el
index 74d9d95840..e011ecb262 100644
--- a/parser-generator-lex-analyzer.el
+++ b/parser-generator-lex-analyzer.el
@@ -97,12 +97,16 @@
   (unless parser-generator--look-ahead-number
     (error "Missing look-ahead-number when peeking!"))
   (let ((look-ahead)
-        (look-ahead-length 0)
-        (index parser-generator-lex-analyzer--index)
-        (state parser-generation-lex-analyzer--state)
-        (k (max
-            1
-            parser-generator--look-ahead-number)))
+        (look-ahead-length
+         0)
+        (index
+         parser-generator-lex-analyzer--index)
+        (state
+         parser-generation-lex-analyzer--state)
+        (k
+         (max
+          1
+          parser-generator--look-ahead-number)))
     (while (<
             look-ahead-length
             k)
@@ -117,6 +121,8 @@
                     (nth 0 result-list))
                    (move-to-index-flag
                     (nth 1 result-list))
+                   (new-index
+                    (nth 2 result-list))
                    (new-state
                     (nth 3 result-list)))
               (if move-to-index-flag
@@ -129,23 +135,42 @@
                      new-state))
                 (if token
                     (progn
+                      (setq index new-index)
                       (unless (listp (car token))
                         (setq token (list token)))
-                      (dolist (next-look-ahead-item token)
-                        (when (<
-                               look-ahead-length
-                               k)
-                          (push next-look-ahead-item look-ahead)
-                          (setq look-ahead-length (1+ look-ahead-length))
-                          (setq index (cdr (cdr next-look-ahead-item))))))
+                      (let ((token-count (length token))
+                            (token-index 0))
+                        (while
+                            (and
+                             (<
+                              look-ahead-length
+                              k)
+                             (<
+                              token-index
+                              token-count))
+                          (let ((next-look-ahead-item
+                                 (nth token-index token)))
+                            (push
+                             next-look-ahead-item
+                             look-ahead)
+                            (setq
+                             look-ahead-length
+                             (1+ look-ahead-length))
+                            (setq
+                             token-index
+                             (1+ token-index))))))
+
+                  ;; Fill up look-ahead with EOF-identifier if we found nothing
                   (push (list parser-generator--eof-identifier) look-ahead)
                   (setq look-ahead-length (1+ look-ahead-length))
                   (setq index (1+ index))))))
+
         (error
          (error
-          "Lex-analyze failed to peek next look-ahead at %s, error: %s"
+          "Lex-analyze failed to peek next look-ahead at %s, error: %s, 
look-ahead: %S"
           index
-          error))))
+          error
+          look-ahead))))
     (nreverse look-ahead)))
 
 (defun parser-generator-lex-analyzer--pop-token ()
diff --git a/test/parser-generator-lex-analyzer-test.el 
b/test/parser-generator-lex-analyzer-test.el
index a1cd79022f..44ee9a7ad3 100644
--- a/test/parser-generator-lex-analyzer-test.el
+++ b/test/parser-generator-lex-analyzer-test.el
@@ -29,14 +29,16 @@
             (string-length (length string))
             (max-index index)
             (tokens)
-            (next-token))
+            (next-token)
+            (new-index))
        (while (and
                (< (1- index) string-length)
                (< (1- index) max-index))
          (setq next-token (nth (1- index) string))
+         (setq new-index (cdr (cdr (nth (1- index) string))))
          (push next-token tokens)
          (setq index (1+ index)))
-       (list (nreverse tokens) nil nil nil))))
+       (list (nreverse tokens) nil new-index nil))))
   (should-error
    (parser-generator-lex-analyzer--peek-next-look-ahead))
   (parser-generator-lex-analyzer--reset)
@@ -71,6 +73,7 @@
             (string-length (length string))
             (max-index index)
             (tokens)
+            (new-index)
             (next-token))
        (while (and
                (< (1- index) string-length)
@@ -78,9 +81,10 @@
          (setq next-token (nth (1- index) string))
          (when (string= (car next-token) "d")
            (error "Invalid token: %s" next-token))
+         (setq new-index (cdr (cdr (nth (1- index) string))))
          (push next-token tokens)
          (setq index (1+ index)))
-       (list (nreverse tokens) nil nil nil))))
+       (list (nreverse tokens) nil new-index nil))))
 
   (should-error
     (parser-generator-lex-analyzer--peek-next-look-ahead))
diff --git a/test/parser-generator-lr-test.el b/test/parser-generator-lr-test.el
index da5be50b33..a5d9d287dd 100644
--- a/test/parser-generator-lr-test.el
+++ b/test/parser-generator-lr-test.el
@@ -945,7 +945,7 @@
   (parser-generator-lr-generate-parser-tables)
   (setq
    parser-generator-lex-analyzer--function
-   (lambda (index)
+   (lambda (index _state)
      (let* ((string '((a 1 . 2) (a 2 . 3) (b 3 . 4) (b 4 . 5)))
             (string-length (length string))
             (max-index index)
@@ -955,7 +955,7 @@
                (< (1- index) max-index))
          (push (nth (1- index) string) tokens)
          (setq index (1+ index)))
-       (nreverse tokens))))
+       (list (car tokens) nil index nil))))
   (setq
    parser-generator-lex-analyzer--get-function
    (lambda (token)
@@ -968,7 +968,7 @@
 
   (setq
    parser-generator-lex-analyzer--function
-   (lambda (index)
+   (lambda (index _state)
      (let* ((string '((a 1 . 2) (a 2 . 3) (b 3 . 4) (b 4 . 5) (b 5 . 6)))
             (string-length (length string))
             (max-index index)
@@ -978,7 +978,8 @@
                (< (1- index) max-index))
          (push (nth (1- index) string) tokens)
          (setq index (1+ index)))
-       (nreverse tokens))))
+       (list (car tokens) nil index nil))))
+
   (should-error
    (parser-generator-lr--parse t))
   (message "Passed test with terminals as symbols, invalid syntax")
@@ -993,9 +994,10 @@
   (parser-generator--debug
    (message "goto-tables: %s" (parser-generator-lr--get-expanded-goto-tables))
    (message "action-tables: %s" 
(parser-generator-lr--get-expanded-action-tables)))
+
   (setq
    parser-generator-lex-analyzer--function
-   (lambda (index)
+   (lambda (index _state)
      (let* ((string '(("a" 1 . 2) ("a" 2 . 3) ("b" 3 . 4) ("b" 4 . 5)))
             (string-length (length string))
             (max-index index)
@@ -1005,7 +1007,8 @@
                (< (1- index) max-index))
          (push (nth (1- index) string) tokens)
          (setq index (1+ index)))
-       (nreverse tokens))))
+       (list (car tokens) nil index nil))))
+
   (should
    (equal
     '(2 2 2 1 1)
@@ -1014,7 +1017,7 @@
 
   (setq
    parser-generator-lex-analyzer--function
-   (lambda (index)
+   (lambda (index _state)
      (let* ((string '(("a" 1 . 2) ("a" 2 . 3) ("b" 3 . 4) ("b" 4 . 5) ("b" 5 . 
6)))
             (string-length (length string))
             (max-index index)
@@ -1024,14 +1027,15 @@
                (< (1- index) max-index))
          (push (nth (1- index) string) tokens)
          (setq index (1+ index)))
-       (nreverse tokens))))
+       (list (car tokens) nil index nil))))
+
   (should-error
    (parser-generator-lr--parse t))
   (message "Passed test with terminals as string, invalid syntax")
 
   (setq
    parser-generator-lex-analyzer--function
-   (lambda (index)
+   (lambda (index _state)
      (let* ((string '(("a" 1 . 2) ("a" 2 . 3) ("b" 3 . 4) ("b" 4 . 5)))
             (string-length (length string))
             (max-index index)
@@ -1041,7 +1045,7 @@
                (< (1- index) max-index))
          (push (nth (1- index) string) tokens)
          (setq index (1+ index)))
-       (nreverse tokens))))
+       (list (car tokens) nil index nil))))
 
   (parser-generator-lr-test--parse-incremental-vs-regular)
   (message "Passed incremental-tests")
@@ -1090,16 +1094,18 @@
   (parser-generator-set-look-ahead-number 1)
   (parser-generator-process-grammar)
   (parser-generator-lr-generate-parser-tables)
+
   (setq
    parser-generator-lex-analyzer--function
-   (lambda (index)
+   (lambda (index _state)
      (with-current-buffer "*PHP8.0*"
-       (let ((token))
+       (let ((token)
+             (move-to-index-flag))
          (goto-char index)
          (cond
           ((looking-at "[ \n\t]+")
            (setq
-            parser-generator-lex-analyzer--move-to-index-flag
+            move-to-index-flag
             (match-end 0)))
           ((or
             (looking-at "{")
@@ -1154,8 +1160,8 @@
             )
            )
           )
-         token
-         ))))
+         (list token move-to-index-flag (match-end 0) nil)))))
+
   (let ((buffer (generate-new-buffer "*PHP8.0*")))
     (with-current-buffer buffer
       (kill-region (point-min) (point-max))
@@ -1211,16 +1217,18 @@
   (parser-generator-set-look-ahead-number 1)
   (parser-generator-process-grammar)
   (parser-generator-lr-generate-parser-tables)
+  
   (setq
    parser-generator-lex-analyzer--function
-   (lambda (index)
+   (lambda (index _state)
      (with-current-buffer "*PHP8.0*"
-       (let ((token))
+       (let ((token)
+             (move-to-index-flag))
          (goto-char index)
          (cond
           ((looking-at "[ \n\t]+")
            (setq
-            parser-generator-lex-analyzer--move-to-index-flag
+            move-to-index-flag
             (match-end 0)))
           ((looking-at "\\(\".+\"\\)")
            (setq
@@ -1269,8 +1277,8 @@
             )
            )
           )
-         token
-         ))))
+         (list token move-to-index-flag (match-end 0) nil)))))
+
   (let ((buffer (generate-new-buffer "*PHP8.0*")))
     (with-current-buffer buffer
       (kill-region (point-min) (point-max))
@@ -1528,13 +1536,14 @@
     ;; Setup lex-analyzer
     (setq
      parser-generator-lex-analyzer--function
-     (lambda (index)
+     (lambda (index _state)
        (with-current-buffer buffer
          (when (<= (+ index 1) (point-max))
            (let ((start index)
                  (end (+ index 1)))
              (let ((token (buffer-substring-no-properties start end)))
-               `(,token ,start . ,end)))))))
+               (list `(,token ,start . ,end) nil end nil)))))))
+
     (setq
      parser-generator-lex-analyzer--get-function
      (lambda (token)
@@ -1589,13 +1598,13 @@
     ;; Setup lex-analyzer
     (setq
      parser-generator-lex-analyzer--function
-     (lambda (index)
+     (lambda (index _state)
        (with-current-buffer buffer
          (when (<= (+ index 1) (point-max))
            (let ((start index)
                  (end (+ index 1)))
              (let ((token (buffer-substring-no-properties start end)))
-               `(,token ,start . ,end)))))))
+               (list `(,token ,start . ,end) nil end nil)))))))
     (setq
      parser-generator-lex-analyzer--get-function
      (lambda (token)
@@ -1802,13 +1811,14 @@
     ;; Setup lex-analyzer
     (setq
      parser-generator-lex-analyzer--function
-     (lambda (index)
+     (lambda (index _state)
        (with-current-buffer buffer
          (when (<= (+ index 1) (point-max))
            (let ((start index)
                  (end (+ index 1)))
              (let ((token (buffer-substring-no-properties start end)))
-               `(,token ,start . ,end)))))))
+               (list `(,token ,start . ,end) nil end nil)))))))
+
     (setq
      parser-generator-lex-analyzer--get-function
      (lambda (token)
@@ -1865,13 +1875,14 @@
     ;; Setup lex-analyzer
     (setq
      parser-generator-lex-analyzer--function
-     (lambda (index)
+     (lambda (index _state)
        (with-current-buffer buffer
          (when (< index (point-max))
            (let ((start index)
                  (end (+ index 1)))
              (let ((token (buffer-substring-no-properties start end)))
-               `(,token ,start . ,end)))))))
+               (list `(,token ,start . ,end)) nil end nil))))))
+
     (setq
      parser-generator-lex-analyzer--get-function
      (lambda (token)
@@ -1918,13 +1929,13 @@
 
     (setq
      parser-generator-lex-analyzer--function
-     (lambda (index)
+     (lambda (index _state)
        (with-current-buffer buffer
          (when (<= (+ index 1) (point-max))
            (let ((start index)
                  (end (+ index 1)))
              (let ((token (buffer-substring-no-properties start end)))
-               `(,token ,start . ,end)))))))
+               (list `(,token ,start . ,end)) nil end nil))))))
 
     (setq
      parser-generator-lex-analyzer--get-function
@@ -1965,7 +1976,7 @@
 
     (setq
      parser-generator-lex-analyzer--function
-     (lambda (index)
+     (lambda (index _state)
        (with-current-buffer buffer
          (unless (>= index (point-max))
            (goto-char index)
@@ -1991,7 +2002,7 @@
               ((looking-at "[a-zA-Z]+")
                (setq token `(VARIABLE ,(match-beginning 0) . ,(match-end 0))))
               (t (error "Invalid syntax! Could not lex-analyze at %s!" 
(point))))
-             token)))))
+             (list token nil (match-end 0) nil))))))
 
     (setq
      parser-generator-lex-analyzer--get-function
@@ -2068,7 +2079,7 @@
 
     (setq
      parser-generator-lex-analyzer--function
-     (lambda (index)
+     (lambda (index _state)
        (with-current-buffer "*a*"
          (unless (>= index (point-max))
            (goto-char index)
@@ -2092,7 +2103,7 @@
               ((looking-at "[a-zA-Z]+")
                (setq token `(VARIABLE ,(match-beginning 0) . ,(match-end 0))))
               (t (error "Invalid syntax! Could not lex-analyze at %s!" 
(point))))
-             token)))))
+             (list token nil (match-end 0) nil))))))
 
     (setq
      parser-generator-lex-analyzer--get-function
@@ -2112,8 +2123,8 @@
 
 (defun parser-generator-lr-test ()
   "Run test."
-  ;; (setq debug-on-error nil)
-  ;; (setq debug-on-signal nil)
+  (setq debug-on-error nil)
+  (setq debug-on-signal nil)
 
   (parser-generator-lr-test--items-for-prefix)
   (parser-generator-lr-test--items-valid-p)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]