gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnunet-scheme] 103/324: config: parser: parse ${variable} expansions.


From: gnunet
Subject: [gnunet-scheme] 103/324: config: parser: parse ${variable} expansions.
Date: Tue, 21 Sep 2021 13:22:23 +0200

This is an automated email from the git hooks/post-receive script.

maxime-devos pushed a commit to branch master
in repository gnunet-scheme.

commit e415fff50b5898ce16837d61b918c07a1e25eb93
Author: Maxime Devos <maximedevos@telenet.be>
AuthorDate: Tue Mar 30 19:04:25 2021 +0200

    config: parser: parse ${variable} expansions.
    
    TODO: there are some edge cases like ${{} that perhaps should
    be parsed differently.
    
    * gnu/gnunet/config/parser.scm
      (make-${}-position): Correct variable name and verify the list
      argument actually is a list.
      (make-empty-variable-violation): Correct record construction.
      (%make-missing-close-violation?): Rename to ...
      (make-missing-close-violation): .. this.
      (cs::-or-close, cs:$-or-close, cs:unbraced-end/nested)
      (cs:unbraced-end): Define two character sets to use in the new
      procedure.
      (parse-expandable*): Define procedure.
      (parse-expandable): Define a variant of 'parse-expandable'.
---
 gnu/gnunet/config/parser.scm | 166 +++++++++++++++++++++--
 tests/config-parser.scm      | 313 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 468 insertions(+), 11 deletions(-)

diff --git a/gnu/gnunet/config/parser.scm b/gnu/gnunet/config/parser.scm
index 5ee088e..d763737 100644
--- a/gnu/gnunet/config/parser.scm
+++ b/gnu/gnunet/config/parser.scm
@@ -56,20 +56,21 @@
          expansion-violation? empty-variable-violation? 
missing-close-violation?
          expansion-violation-position empty-variable-kind missing-close-kind
 
-         ;; TODO: the parser!
-         )
+         parse-expandable* parse-expandable)
   (import (only (guile)
-               eval-when quote
+               eval-when quote char-set
                char-set:whitespace
                string-index
                string-skip string-skip-right string-prefix?)
          (only (rnrs base)
                begin define lambda define-syntax syntax-rules ...
                assert or + - if char=? not and exact? integer?
-               < <= =
+               < <= = cons values reverse pair? null?
                string-length string-ref)
          (only (rnrs control)
                unless)
+         (only (rnrs exceptions)
+               raise)
          (only (rnrs records syntactic)
                define-record-type)
          (only (rnrs conditions)
@@ -304,10 +305,12 @@ expansion position objects, representing the structure of 
@samp{DEFAULT-VALUE}
                        (<= 0 #{${:-}-name-start}#)
                        (< #{${:-}-name-start}# #{${:-}-name-end}#)
                        (= (- #{${:-}-value-start}# #{${:-}-name-end}#) 2)
-                       (<= #{${:-}-value-start}# #{${:-}-value-end}#)))
-          (%make (#{${:-}-name-start}# #{${:-}-name-end}#
-                  #{${:-}-value-start}# #{${:-}-value-end}#
-                  #{${:-}-value-parts}#))))))
+                       (<= #{${:-}-value-start}# #{${:-}-value-end}#)
+                       (or (pair? #{${:-}-value-parts}#)
+                           (null? #{${:-}-value-parts}#))))
+          (%make #{${:-}-name-start}# #{${:-}-name-end}#
+                 #{${:-}-value-start}# #{${:-}-value-end}#
+                 #{${:-}-value-parts}#)))))
 
     ;; Now define the possible syntax errors.
     (define-condition-type &expansion-violation &lexical
@@ -332,10 +335,10 @@ default and @code{$} for unbraced variable expansions."
       (assert (and (exact-integers? position)
                   (<= 0 position)
                   (memq kind '($ #{${}}# #{${:-}}#))))
-      (%make-empty-variable-violation position position kind))
+      (%make-empty-variable-violation position kind))
 
     (define-condition-type &missing-close &expansion-violation
-      %make-missing-close-violation? missing-close-violation?
+      %make-missing-close-violation missing-close-violation?
       ;; ${} or ${:-}
       (kind missing-close-kind))
 
@@ -347,4 +350,145 @@ indicates the type of variable expansion found, as in
       (assert (and (exact-integers? position)
                   (<= 0 position)
                   (memq kind '(#{${}}# #{${:-}}#))))
-      (%make-empty-variable-violation position position kind))))
+      (%make-missing-close-violation position kind))
+
+    (define cs::-or-close (char-set #\: #\}))
+    (define cs:$-or-close (char-set #\$ #\}))
+    ;; TODO: should #\0 be included?  It seems to be
+    ;; ‘merely’ an artifact of the C implementation.
+    (define cs:unbraced-end/nested (char-set #\/ #\\ #\0 #\ #\}))
+    (define cs:unbraced-end (char-set #\/ #\\ #\0 #\ ))
+
+    (define (parse-expandable* text start end nested?)
+      "Search in @var{text} for variable references to expand, returning
+a list of expansible position object and the end position (exclusive,
+does not include closing brace).
+
+Alternatively, raise an @code{&expansion-violation}.  If @var{nested?}
+is trueish, stop at (and expect) an unbalanced close brace.
+If @var{nested?} is Scheme-trueish, it is used as the ‘kind’ argument for
+@code{&expansion-violation}.
+
+(In the current parser, in practice this will be @code{#f} or @code{@{:-@}},
+but perhaps the syntax will be extended in the future.)
+
+TODO: there currently is not a dedicated condition type for ${a:} and ${a:+}
+(in the first, a - after the : is missing, and in the second, + is
+invalid).
+
+If @var{nested?} is Scheme-falsish, then the second return value is simply
+@var{end} itself."
+      (assert (and (exact-integers? start end)
+                  (<= 0 start)
+                  (<= start end)
+                  (<= end (string-length text))))
+      (let^ ((/o/ loop
+                 ;; in reverse chronological order
+                 (accumulated '())
+                 ;; where to start searching for the next expansion object
+                 (start start))
+            ;; Search for a $ to expand (or a closing brace to stop at,
+            ;; when nested/recursing).
+            (! dollar-close (string-index text (if nested?
+                                                   cs:$-or-close
+                                                   #\$) start end))
+            ;; Add the literal region of text to @var{accumulated}
+            ;; (unless it is empty).
+            (! accumulated
+               (if (or (= start (or dollar-close end)))
+                   accumulated
+                   (cons (make-literal-position start (or dollar-close end))
+                         accumulated)))
+            ;; No #\$ and we're not nested/recursing?
+            ;; Then we're done.
+            (? (and (not nested?) (not dollar-close))
+               (values (reverse accumulated) end))
+            ;; No #\$ or #\}, but we're nested/recursing?  Then
+            ;; we're missing a close brace.
+            (? (and nested? (not dollar-close))
+               (raise (make-missing-close-violation end nested?)))
+            ;; (@var{dollar-close} is trueish now)
+            ;; Did we find a closing brace when nested?
+            ;; Then we're done
+            (? (and nested? dollar-close
+                    (char=? #\} (string-ref text dollar-close)))
+               (values (reverse accumulated) dollar-close))
+            ;; The character at @var{dollar-close} is a dollar now.
+            (! dollar dollar-close)
+            ;; Empty variable names are not allowed.
+            (? (= (+ 1 dollar) end)
+               ;; passing @var{kind} here would be incorrect!
+               (raise (make-empty-variable-violation (+ 1 dollar) '$)))
+            (! next-character (string-ref text (+ 1 dollar)))
+            ;; Is this an braced variable expansion?
+            (? (char=? next-character #\{)
+               (let^ ((! name-start (+ 2 dollar))
+                      ;; Then search for a closing }
+                      ;; or the : in ${VAR:-DEFAULT}.
+                      (! name-end (string-index text cs::-or-close
+                                                name-start end))
+                      ;; There should eventually be at least
+                      ;; a closing }.
+                      (? (not name-end)
+                         (raise (make-missing-close-violation end '#{${}}#)))
+                      (! name-end-character
+                         (string-ref text name-end))
+                      ;; Empty variable names are not allowed.
+                      (? (= name-start name-end)
+                         (raise (make-empty-variable-violation
+                                 name-end
+                                 (if (char=? name-end-character #\:)
+                                     '#{${:-}}#
+                                     '#{${}}#))))
+                      ;; Was this ${NAME}?
+                      (? (char=? name-end-character #\})
+                         ;; Then add it to @var{accumulated} and
+                         ;; continue.
+                         (loop (cons (#{make-${}-position}# name-start 
name-end)
+                                     accumulated)
+                               (+ 1 name-end)))
+                      ;; Otherwise, it was ${NAME:-VALUE}.
+                      ;; But verify - exists.
+                      (? (not (and (< (+ 1 name-end) end)
+                                   (char=? (string-ref text (+ 1 name-end))
+                                           #\-)))
+                         ;; TODO a more specific condition would be nice.
+                         (raise (%make-expansion-violation (+ 1 name-end))))
+                      (! value-start (+ 2 name-end))
+                      ;; Now parse VALUE in ${NAME:-VALUE}.
+                      ;;
+                      ;; This procedure call will verify a close
+                      ;; brace at @var{default-end} exist.
+                      (<-- (value-parts value-end)
+                           (parse-expandable* text value-start end '#{${:-}}#))
+                      ;; This was violated at some draft of this procedure.
+                      ;; Verify it is fixed.
+                      (!! (or (pair? value-parts)
+                              (null? value-parts)))
+                      ;; So the following should be true.
+                      ;; (Not related to previous comment.)
+                      (!! (char=? #\} (string-ref text value-end))))
+                     ;; Add the variable expansion to @var{accumulated}
+                     ;; and continue.
+                     (loop (cons (#{make-${:-}-position}# name-start name-end
+                                  value-start value-end value-parts)
+                                 accumulated)
+                           ;; + 1: eat the closing brace.
+                           (+ 1 value-end))))
+            ;; Then it is an unbraced $VARIABLE expansion.
+            (! name-start (+ 1 dollar))
+            (! name-end (string-index text (if nested?
+                                               cs:unbraced-end/nested
+                                               cs:unbraced-end)
+                                      name-start end))
+            (! name-end (or name-end end))
+            ;; Empty variable names are not allowed.
+            (? (= name-start name-end)
+               (raise (make-empty-variable-violation name-end '$))))
+           ;; Add the variable to @var{accumulated} and continue.
+           (loop (cons (make-$-position name-start name-end) accumulated)
+                 name-end)))
+
+    (define (parse-expandable text)
+      (parse-expandable* text 0 (string-length text) #f))))
+
diff --git a/tests/config-parser.scm b/tests/config-parser.scm
index 6279523..0e0b817 100644
--- a/tests/config-parser.scm
+++ b/tests/config-parser.scm
@@ -22,6 +22,8 @@
             (quickcheck arbitrary)
             (quickcheck property)
             ((rnrs conditions) #:select (&assertion))
+            (ice-9 match)
+            (srfi srfi-8)
             (srfi srfi-26))
 
 ;; Test the line parser on some valid inputs.
@@ -253,6 +255,317 @@ in-bounds for the string @var{line}."
        (false-if-assertion
        (in-bounds? line (parse-line line)))))))
 
+
+
+;; Test the position-preserving variable substitution parser.
+;; First verify some properties on random data.
+
+(configure-quickcheck
+ ;; Increase this when testing changes.
+ (stop? (lambda (success-count _)
+         (>= success-count 2048 #;000 success-count)))
+ ;; Large inputs don't produce much additional value.
+ (size (lambda (test-number)
+        (if (zero? test-number)
+            0
+            (min 6 (1+ (inexact->exact (floor/ (log test-number) (log 
4)))))))))
+
+(define (expo:start expo)
+  "Given a position object, return the starting position of
+the region of text it covers."
+  (cond ((#{${:-}-position?}# expo)
+        ;; - 2: remove the ${ in ${VAR:-DEFAULT}
+        (- (#{expo:${:-}-name-start}# expo) 2))
+       ((#{${}-position?}# expo)
+        ;; - 2: remove the ${ in ${VAR}
+        (- (#{expo:${}-name-start}# expo) 2))
+       (($-position? expo)
+        ;; - 1: remove the $ in $VAR
+        (- (expo:$-name-start expo) 1))
+       ((literal-position? expo)
+        (expo:literal-start expo))))
+
+(define (expo:end expo)
+  "Given a position object, return the end position (exclusive) of
+the region of text it covers."
+  (cond ((#{${:-}-position?}# expo)
+        ;; + 1: add the } in ${VAR:-DEFAULT}
+        (+ 1 (#{expo:${:-}-value-end}# expo) 1))
+       ((#{${}-position?}# expo)
+        ;; + 1: add the } in ${VAR}
+        (+ (#{expo:${}-name-end}# expo) 1))
+       (($-position? expo)
+        (expo:$-name-end expo))
+       ((literal-position? expo)
+        (expo:literal-end expo))))
+
+(define (expo:contiguous? expos)
+  "Is the list expansion position objects @var{expos} contiguous?
+If so, return the last object in @var{expos}.  Otherwise, return
+@code{#f}."
+  (define (internally-contiguous? x)
+    (cond ((#{${:-}-position?}# x)
+          (let ((parts (#{expo:${:-}-value-parts}# x)))
+            (if (null? parts)
+                x
+                (expo:contiguous? parts))))
+         ((#{${}-position?}# x) #t)
+         (($-position? x) #t)
+         ((literal-position? x) #t)
+         (#t (error "what is this madness?"))))
+  (match expos
+    (() #t)
+    ((x) (internally-contiguous? x))
+    ((x y . rest)
+     (and (= (expo:end x) (expo:start y))
+         (internally-contiguous? x)
+         (expo:contiguous? (cdr expos))))))
+
+(define $interesting-char/expo
+  (char-set->arbitrary (string->char-set "${:-}ab")))
+(define-syntax-rule ($choose-with-eq? x ...)
+  ($choose ((cute eq? x) ($const x)) ...))
+(define $nested ($choose-with-eq? #f '#{${}}# '#{${:-}}#))
+
+(define-syntax-rule (true-if-parse-error exp exp* ...)
+  (with-exception-handler
+      (lambda (e) #t)
+    (lambda () exp exp* ...)
+    #:unwind? #t
+    #:unwind-for-type &expansion-violation))
+
+(define $text-and-range
+  (arbitrary
+   (gen
+    (sized-generator
+     (lambda (size)
+       (generator-let* ((text-length (choose-integer 0 size))
+                       (text (choose-string
+                              (arbitrary-gen $interesting-char/expo)
+                              text-length))
+                       (start (choose-integer 0 text-length))
+                       (end (choose-integer start text-length)))
+                      (generator-return (list text start end))))))
+   (xform #f)))
+
+;; Unfortunatly, these QuickCheck tests do not reach all lines
+;; of the procedure in practice.  TODO: write a fuzzer for Guile.
+;;
+;; (Should be feasible using the tracing framework.)
+(test-assert "expansion parser does not crash"
+  (quickcheck
+   (property ((text-and-range $text-and-range)
+             (nested? $nested))
+     (match text-and-range
+       ((text start end)
+       (false-if-assertion
+        (true-if-parse-error
+         (parse-expandable* text start end nested?)
+         #t)))))))
+
+(test-assert "expansion position objects are contiguous"
+  (quickcheck
+   (property ((text-and-range $text-and-range)
+             (nested? $nested))
+     (match text-and-range
+       ((text start end)
+       (true-if-parse-error
+        (receive (expos end)
+            (parse-expandable* text start end nested?)
+          (expo:contiguous? expos))))))))
+
+(define (maybe-parse text start end nested?)
+  "Try to parse the range @var{start} to @var{end} of @var{text}.
+Return a structure that can be compares with @code{equal?} and
+is invariant under translations."
+  (with-exception-handler
+      (lambda (e)
+       (cond ((empty-variable-violation? e)
+              `(empty-variable-violation
+                ,(- (expansion-violation-position e) start)
+                ,(empty-variable-kind e)))
+             ((missing-close-violation? e)
+              `(missing-close-violation
+                ,(- (expansion-violation-position e) start)
+                ,(missing-close-kind e)))
+             ;; See the TODO in parse-expandable*.
+             (#t
+              `(todo
+                ,(- (expansion-violation-position e) start)))))
+    (lambda ()
+      (receive (expandibles end)
+         (parse-expandable* text start end nested?)
+       (cons (map (cute expansible->sexp <> start) expandibles)
+             (- end start))))
+    #:unwind? #t
+    #:unwind-for-type &expansion-violation))
+
+(define (expansible->sexp pos start)
+  (cond ((literal-position? pos)
+        `(literal ,(- (expo:literal-start pos) start)
+                  ,(- (expo:literal-end pos) start)))
+       (($-position? pos)
+        `($ ,(- (expo:$-name-start pos) start)
+            ,(- (expo:$-name-end pos) start)))
+       ((#{${}-position?}# pos)
+        `(#{${}}#
+          ,(- (#{expo:${}-name-start}# pos) start)
+          ,(- (#{expo:${}-name-end}# pos) start)))
+       ;; HACK: work-around buggy Emacs parenthesis
+       ;; matching detection.
+       ((#{${:-}-position?}# pos)
+        `(,(string->symbol "${:-}")
+          ,(- (#{expo:${:-}-name-start}# pos) start)
+          ,(- (#{expo:${:-}-name-end}# pos) start)
+          ,(- (#{expo:${:-}-value-start}# pos) start)
+          ,(- (#{expo:${:-}-value-end}# pos) start)
+          ,(map (cute expansible->sexp <> start)
+                (#{expo:${:-}-value-parts}# pos))))))
+
+(test-assert "start and end are respected"
+  (quickcheck
+   (property ((text-and-range $text-and-range)
+             (nested? $nested))
+     (match text-and-range
+       ((text start end)
+       (equal? (maybe-parse text start end nested?)
+               (maybe-parse (substring text start end)
+                            0 (- end start) nested?)))))))
+
+
+;; Now plenty of failure cases.
+
+;; Expand an expansion error @code{c} conforming to
+;; @code{cond}.
+(define-syntax-rule (test-expansion-error (name nested?) (c text) cond?)
+  (test-assert name
+    (with-exception-handler (lambda (c) cond?)
+      (lambda () (parse-expandable* text 0 (string-length text) nested?))
+      #:unwind? #t
+      #:unwind-for-type &expansion-violation)))
+
+;; Test unbraced variable expansion, unnested.
+
+(test-expansion-error ("$ + delimiter" #f)
+  (c "$/")
+  (and (empty-variable-violation? c)
+       (eq? (empty-variable-kind c) '$)
+       (= (expansion-violation-position c) 1)))
+
+(test-expansion-error ("$ + delimiter + more" #f)
+  (c "$/more")
+  (and (empty-variable-violation? c)
+       (eq? (empty-variable-kind c) '$)
+       (= (expansion-violation-position c) 1)))
+
+(test-expansion-error ("more + $ + delimiter" #f)
+  (c "more$/")
+  (and (empty-variable-violation? c)
+       (eq? (empty-variable-kind c) '$)
+       (= (expansion-violation-position c) 5)))
+
+(test-expansion-error ("$ + end of string" #f)
+  (c "$")
+  (and (empty-variable-violation? c)
+       (eq? (empty-variable-kind c) '$)
+       (= (expansion-violation-position c) 1)))
+
+(test-expansion-error ("more + $ + end of string" #f)
+  (c "more$")
+  (and (empty-variable-violation? c)
+       (eq? (empty-variable-kind c) '$)
+       (= (expansion-violation-position c) 5)))
+
+;; Test unbraced variable expansion, nested.
+(test-expansion-error ("$ + }, nested" '#{${:-}}#)
+  (c "$}")
+  (and (empty-variable-violation? c)
+       (eq? (empty-variable-kind c) '$)
+       (= (expansion-violation-position c) 1)))
+(test-expansion-error ("$ + } + delimiter, nested" '#{${:-}}#)
+  ;; don't interpret this as the variable } expanded
+  ;; folowed by a slash!
+  (c "$}/")
+  (and (empty-variable-violation? c)
+       (eq? (empty-variable-kind c) '$)
+       (= (expansion-violation-position c) 1)))
+
+;; Test braced variables, unnested & some nesting
+(test-expansion-error ("empty braced variable" #f)
+  (c "${}")
+  (and (empty-variable-violation? c)
+       (eq? (empty-variable-kind c) '#{${}}#)
+       (= (expansion-violation-position c) 2)))
+(test-expansion-error ("empty braced variable with empty default" #f)
+  (c "${:-}")
+  (and (empty-variable-violation? c)
+       (eq? (empty-variable-kind c) '#{${:-}}#)
+       (= (expansion-violation-position c) 2)))
+(test-expansion-error ("empty braced variable with nonempty default" #f)
+  (c "${:-def}")
+  (and (empty-variable-violation? c)
+       (eq? (empty-variable-kind c) '#{${:-}}#)
+       (= (expansion-violation-position c) 2)))
+(test-expansion-error ("unclosed braced variable" #f)
+  (c "${")
+  (and (missing-close-violation? c)
+       (eq? (missing-close-kind c) '#{${}}#)
+       (= (expansion-violation-position c) 2)))
+(test-expansion-error ("unclosed braced variable with text" #f)
+  (c "${text")
+  (and (missing-close-violation? c)
+       (eq? (missing-close-kind c) '#{${}}#)
+       (= (expansion-violation-position c) 6)))
+(test-expansion-error ("unclosed braced variable with default" #f)
+  (c "${text:-default")
+  (and (missing-close-violation? c)
+       (eq? (missing-close-kind c) '#{${:-}}#)
+       (= (expansion-violation-position c) 15)))
+(test-expansion-error ("unclosed braced variable and weird character after -" 
#f)
+  (c "${text:@") ; <-- allowed in upstream
+  (and (expansion-violation? c)
+       (= (expansion-violation-position c) 7)))
+
+
+
+;; Now some success cases.
+(define-syntax-rule (test-expansion text expected ...)
+  (test-equal text
+    (map (cute expansible->sexp <> 0)
+        (list expected ...))
+    (match (maybe-parse text 0 (string-length text) #f)
+      ((x . y) x)
+      (z (cons 'what-is-this-madness z)))))
+
+(test-expansion "$TMP" (make-$-position 1 4))
+(test-expansion "$TMP/gnunet_arm.sock"
+               (make-$-position 1 4)
+               (make-literal-position 4 20))
+(test-expansion "${TMP}" (#{make-${}-position}# 2 5))
+(test-expansion "${TMP}/gnunet_arm.sock"
+               (#{make-${}-position}# 2 5)
+               (make-literal-position 6 22))
+(test-expansion "${TMP:-/tmp}"
+               (#{make-${:-}-position}# 2 5 7 11
+                (list (make-literal-position 7 11))))
+(test-expansion "${TMP:-/tmp}/gnunet_arm.sock"
+               (#{make-${:-}-position}# 2 5 7 11
+                (list (make-literal-position 7 11)))
+               (make-literal-position 12 28))
+(test-expansion "some ${STUFF:-${TMP:-/tmp}/etc$etera}/other"
+               (make-literal-position 0 5)
+               (#{make-${:-}-position}# 7 12 14 36
+                (list (#{make-${:-}-position}# 16 19 21 25
+                       (list (make-literal-position 21 25)))
+                      (make-literal-position 26 30)
+                      (make-$-position 31 36)))
+               (make-literal-position 37 43))
+
+;; TODO: what should ${{} be parsed as?
+;; As ${} } or as the braced variable expansion with name
+;; {?
+
 ;;; Local Variables:
 ;;; eval: (put 'property 'scheme-indent-function 1)
+;;; eval: (put 'test-expansion-error 'scheme-indent-function 1)
 ;;; End:

-- 
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]