gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnunet-scheme] 99/324: config: Parse configuration lines into one of se


From: gnunet
Subject: [gnunet-scheme] 99/324: config: Parse configuration lines into one of several types.
Date: Tue, 21 Sep 2021 13:22:19 +0200

This is an automated email from the git hooks/post-receive script.

maxime-devos pushed a commit to branch master
in repository gnunet-scheme.

commit bc0ded1878bc220efb389e4ad30dec7573b98757
Author: Maxime Devos <maximedevos@telenet.be>
AuthorDate: Thu Mar 25 14:34:21 2021 +0100

    config: Parse configuration lines into one of several types.
    
    * README.org (Modules): Note the existence of the new module.
    * Makefile.am
      (modules): Compile the line position parser.
      (SCM_TESTS): Run the test cases.
    * gnu/gnunet/config/parser.scm
      (parse-line, <position:%>, <position:#>, <position:=>)
      (<position:[]>, <position:@inline@>): Define a parser for
      dissecting individual lines.
    * tests/config-parser.scm: Test the line parser.
---
 Makefile.am                  |   4 +-
 README.org                   |   3 +
 gnu/gnunet/config/parser.scm | 224 +++++++++++++++++++++++++++++++++++++
 tests/config-parser.scm      | 257 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 487 insertions(+), 1 deletion(-)

diff --git a/Makefile.am b/Makefile.am
index ea62015..79a3187 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -47,6 +47,7 @@ modules = \
   gnu/gnunet/utils/hat-let.scm \
   gnu/gnunet/utils/netstruct.scm \
   gnu/gnunet/utils/platform-enum.scm \
+  gnu/gnunet/config/parser.scm \
   \
   gnu/gnunet/util/cmsg.scm \
   gnu/gnunet/icmp/struct.scm \
@@ -96,7 +97,8 @@ SCM_TESTS = \
   tests/update.scm \
   tests/message-io.scm \
   tests/bv-slice.scm \
-  tests/cmsg.scm
+  tests/cmsg.scm \
+  tests/config-parser.scm
 
 SCM_TESTS_ENVIRONMENT = \
   GUILE_AUTO_COMPILE=0 \
diff --git a/README.org b/README.org
index f67d273..1c504ee 100644
--- a/README.org
+++ b/README.org
@@ -73,6 +73,9 @@
      fibers channels and for messages.
    + TODO actual queues?  Maybe we don't need them?
    + TODO filling the queues
+** Configuration
+   + gnu/gnunet/config/parser.scm: Parse configuration files.
+   TODO: writing, modifying, querying ...
 ** Network structures
    Features:
 
diff --git a/gnu/gnunet/config/parser.scm b/gnu/gnunet/config/parser.scm
new file mode 100644
index 0000000..51fe4d3
--- /dev/null
+++ b/gnu/gnunet/config/parser.scm
@@ -0,0 +1,224 @@
+;;   This file is part of scheme-GNUnet, a partial Scheme port of GNUnet.
+;;   Copyright (C) 2021 Maxime Devos <maximedevos@telenet.be>
+;;   Copyright (C) 2006, 2007, 2008, 2009, 2013, 2020 GNUnet e.V.
+;;
+;;   scheme-GNUnet is free software: you can redistribute it and/or modify it
+;;   under the terms of the GNU Affero General Public License as published
+;;   by the Free Software Foundation, either version 3 of the License,
+;;   or (at your option) any later version.
+;;
+;;   scheme-GNUnet is distributed in the hope that it will be useful, but
+;;   WITHOUT ANY WARRANTY; without even the implied warranty of
+;;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;   Affero General Public License for more details.
+;;
+;;   You should have received a copy of the GNU Affero General Public License
+;;   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+;;
+;;   SPDX-License-Identifier: AGPL-3.0-or-later
+
+;; Author: Christian Grothoff (upstream, C)
+;; Author: Maxime Devos (downstream, Scheme)
+;; Brief: parse GNUnet configuration files.
+
+;; TODO: unquoting, expanding variable references
+(define-library (gnu gnunet config parser)
+  (export parse-line
+         <position:%> make-%-position %-position?
+         position:%
+         <position:#> make-#-position #{#-position?}#
+         position:#
+         <position:=> make-=-position =-position?
+         position:variable-start position:variable-end
+         position:= position:value-start position:value-end
+         #{<position:[]>}# #{make-[]-position}# #{[]-position?}#
+         position:section-name-start position:section-name-end
+         <position:@inline@> make-@inline@-position @inline@-position?
+         position:@inline@-start position:@inline@-end
+         position:@inline@-filename-start position:@inline@-filename-end)
+  (import (only (guile)
+               eval-when quote
+               char-set:whitespace
+               string-index
+               string-skip string-skip-right string-prefix?)
+         (only (rnrs base)
+               begin define lambda define-syntax syntax-rules ...
+               assert or + - if char=? not and exact? integer?
+               < <=
+               string-length string-ref)
+         (only (rnrs control)
+               unless)
+         (only (rnrs records syntactic)
+               define-record-type)
+         (only (gnu gnunet utils hat-let)
+               let^))
+  (begin
+    
+    ;; * The position-preserving line parser
+    ;;
+    ;; This parser operates on a per-line basis without any state.
+    ;; It does not directly return configuration values.  Rather,
+    ;; it returns the start and end positions.
+
+    ;; Divergence from upstream GNUnet:
+    ;; upstream only recognises #\newline, #\return and #\tab,
+    ;; while this includes other Unicode whitespace as well.
+    ;; Maybe we shouldn't.
+    (define whitespace char-set:whitespace)
+
+    ;; The output record types of @code{parse-line}.
+
+    ;; Only defining this at expansion time halves the number
+    ;; of output lines of "guild disassemble".
+    (eval-when (expand)
+      (define-syntax exact-integers?
+       (syntax-rules ()
+         ((_ x ...)
+          (and (and (integer? x) (exact? x))
+               ...))))
+      (define-syntax define-positions-type
+        (syntax-rules ()
+         ((_ (<positions:type> make-type-positions type-positions?)
+             ((ascending-position-field accessor) ...)
+             (additional-restriction ...)
+             docstring)
+          (define-record-type
+              (<positions:type> make-type-positions type-positions?)
+            (fields (immutable ascending-position-field accessor) ...)
+            (opaque #t)
+            (sealed #t)
+            (protocol
+             (lambda (%make)
+               docstring
+               (lambda (ascending-position-field ...)
+                 (assert (and (exact-integers? ascending-position-field ...)
+                              (<= 0 ascending-position-field ...)
+                              additional-restriction ...))
+                 (%make ascending-position-field ...)))))))))
+
+    (define-positions-type (<position:%> make-%-position %-position?)
+      ((% position:%))
+      ()
+      "@var{%} is the position of the @code{#\\%} comment character in
+a comment.")
+
+    (define-positions-type (<position:#> make-#-position #{#-position?}#)
+      ((#{#}# position:#))
+      ()
+      "@var{#} is the position of the @code{#\\#} comment character in
+a comment.")
+
+    (define-positions-type (<position:=> make-=-position =-position?)
+      ((variable-start position:variable-start)
+       (variable-end position:variable-end)
+       (= position:=)
+       (value-start position:value-start)
+       (value-end position:value-end))
+      ;; TODO: should empty variable names be allowed?
+      ((< variable-start variable-end)
+       (<= = value-start))
+      "@var{variable-start} (inclusive) and @var{variable-end} (exclusive) are
+the start and end positions of the variable name in an assignment.  @var{=} is
+the position of the equality sign.  @var{value-start} (inclusive) and
+@var{value-end} (exclusive) are the start and end positions of the value.
+
+If the value is empty, then by convention @var{variable-start} and
+@var{variable-end} are the positions right after the equality sign.")
+
+    (define-positions-type
+      (#{<position:[]>}# #{make-[]-position}# #{[]-position?}#)
+      ((section-name-start position:section-name-start)
+       (section-name-end position:section-name-end))
+      ;; TODO: should empty section names be allowed?
+      ;; Also, maybe impose some restrictions on names?
+      ;; (Likewise for variable names)
+      ()
+      "@var{section-name-start} (inclusive) and @var{section-name-end}
+(exclusive) are the start and end positions of a section name.")
+
+    (define-positions-type (<position:@inline@> make-@inline@-position
+                                               @inline@-position?)
+      ((@inline@-start position:@inline@-start)
+       (@inline@-end position:@inline@-end))
+      ;; TODO: should empty file names be allowed?
+      ;; If so, change < to <=.
+      ((< (string-length "@INLINE@ ") (- @inline@-end @inline@-start)))
+      "@var{@inline@-start} (inclusive) and @var{@inline@-end} (exclusive)
+are the start and end positions of an inclusion directive.")
+
+    (define (position:@inline@-filename-start position)
+      "The start position (inclusive) of the file name of the inclusion
+directive described by @var{filename}."
+      (+ (position:@inline@-start position)
+        (string-length "@INLINE@ ")))
+
+    ;; The end position (exclusive) of the file name.
+    (define position:@inline@-filename-end position:@inline@-end)
+
+    (define (parse-line line)
+      "Parse a single line @var{line} (without the end of line characters)
+from a GNUnet configuration file, into one of its possible types.
+
+@begin itemize
+@item The boolean @code{#false} if @var{line} is not recognised.
+@item The boolean @code{#true} if @var{line} is an empty line.
+@item A @code{<position:%>} or @code{<position:#>} for comment lines
+ started with @code{#\\%} and @code{#\\#} respectively.
+@item A @code{<position:=>} for variable assignements.
+@item A @code{<position:[]>} for section names.
+@item A @code{<position:@inline@>} for inclusion directives.
+@end itemize
+
+Other syntax may be supported in the future, in which case other data
+of other types may be returned."
+      ;; Ignore leading whitespace.
+      (let^ ((! start-inclusive (string-skip line whitespace))
+            ;; Did the line consist of only whitespace?
+            ;; Then stop.
+            (? (not start-inclusive) #true)
+            (! first-important-character
+               (string-ref line start-inclusive))
+            ;; Is this a comment?  Then stop.
+            (? (char=? first-important-character #\#)
+               (make-#-position start-inclusive))
+            (? (char=? first-important-character #\%)
+               (make-%-position start-inclusive))
+            ;; Ignore trailing whitespace.
+            (! end-inclusive
+               (string-skip-right line whitespace start-inclusive))
+            (!! end-inclusive)
+            ;; Is this a section name?  Then stop.
+            (? (and (char=? #\[ first-important-character)
+                    (char=? #\] (string-ref line end-inclusive)))
+               (#{make-[]-position}# (+ 1 start-inclusive) end-inclusive))
+            ;; Is this an inclusion directive?  Then stop.
+            ;; TODO upstream GNUnet compares case-insensitively.
+            ;; Is this a bug or a feature?
+            (? (and (char=? #\@ first-important-character)
+                    (string-prefix? "@INLINE@ " line 1
+                                    (string-length "@INLINE@ ")
+                                    ;; XXX what if the file name is empty?
+                                    (+ 1 start-inclusive) (+ 1 end-inclusive)))
+               (make-@inline@-position start-inclusive
+                                       (+ 1 end-inclusive)))
+            ;; Maybe this is an assignment; search for the equality
+            ;; sign.
+            (! =-position (string-index line #\= start-inclusive
+                                        (+ 1 end-inclusive)))
+            ;; no clue!
+            (? (not =-position) #f)
+            ;; Remove trailing whitespace from the variable name
+            ;; (the ‘tag’).
+            (! variable-end-inclusive (string-skip-right line whitespace
+                                                         start-inclusive
+                                                         =-position))
+            ;; TODO should empty tags by allowed?
+            ;; Bail out if the variable name consists of only whitespace.
+            (? (not variable-end-inclusive) #f)
+            (! variable-end (+ 1 variable-end-inclusive))
+            ;; Remove whitespace from the variable value.
+            (! value-start (string-skip line whitespace (+ 1 =-position)
+                                        (+ 1 end-inclusive)))
+            (! value-start (or value-start (+ 1 end-inclusive))))
+           (make-=-position start-inclusive variable-end
+                            =-position value-start (+ 1 end-inclusive))))))
diff --git a/tests/config-parser.scm b/tests/config-parser.scm
new file mode 100644
index 0000000..7c11420
--- /dev/null
+++ b/tests/config-parser.scm
@@ -0,0 +1,257 @@
+;; This file is part of scheme-GNUnet.
+;; Copyright (C) 2021 Maxime Devos
+;;
+;; scheme-GNUnet is free software: you can redistribute it and/or modify it
+;; under the terms of the GNU Affero General Public License as published
+;; by the Free Software Foundation, either version 3 of the License,
+;; or (at your option) any later version.
+;;
+;; scheme-GNUnet is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; Affero General Public License for more details.
+;;
+;; You should have received a copy of the GNU Affero General Public License
+;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
+;;
+;; SPDX-License-Identifier: AGPL3.0-or-later
+
+(use-modules (gnu gnunet config parser)
+            (quickcheck)
+            (quickcheck generator)
+            (quickcheck arbitrary)
+            (quickcheck property)
+            ((rnrs conditions) #:select (&assertion))
+            (srfi srfi-26))
+
+;; Test the line parser on some valid inputs.
+(define-syntax-rule (cond/pos (x y) (pred? accessor ...) ...)
+  (cond ((and (pred? x) (pred? y))
+        (and (= (accessor x) (accessor y)) ...))
+       ...
+       ((and (or (pred? x) ...)
+             (or (pred? y) ...)) #f)
+       (#t (error "what madness is this?"))))
+
+(define (lipo=? x y)
+  "Are two line position objects equal?"
+  (cond/pos (x y)
+           (#{%-position?}# position:%)
+           (#{#-position?}# position:#)
+           (=-position?
+            position:variable-start
+            position:variable-end
+            position:=
+            position:value-start
+            position:value-end)
+           (#{[]-position?}#
+            position:section-name-start
+            position:section-name-end)
+           (@inline@-position?
+            position:@inline@-start
+            position:@inline@-end)
+           ((cut eq? <> #f))
+           ((cut eq? <> #t))))
+
+(define-syntax-rule (test-lipo name text expected)
+  (test-assert name
+    (lipo=? (parse-line text) expected)))
+
+(test-lipo "trivial empty line" "" #t)
+(test-lipo "empty line: lf" "\n" #t)
+(test-lipo "empty line: cr" "\r" #t)
+(test-lipo "empty line: space" " " #t)
+(test-lipo "empty line: space + lf" " \n" #t)
+(test-lipo "empty line: tab" "\t" #t)
+
+(test-lipo "section name" "[hello]"
+          (#{make-[]-position}# 1 6))
+(test-lipo "section name with spaces" "[  hello ]"
+          (#{make-[]-position}# 1 9))
+;; Used for some services.
+(test-lipo "section name with dots" "[hell.o.gnu]"
+          (#{make-[]-position}# 1 11))
+;; Allowed in upstream.
+(test-lipo "section name with leading space" "\t[hello]"
+          (#{make-[]-position}# 2 7))
+(test-lipo "section name with more leading space" "\t [hello]"
+          (#{make-[]-position}# 3 8))
+(test-lipo "section name with trailing space" "[hello]\t"
+          (#{make-[]-position}# 1 6))
+(test-lipo "section name with more trailing space" "[hello]\t\t"
+          (#{make-[]-position}# 1 6))
+
+(test-lipo "section name with missing ]" "[hell" #f)
+(test-lipo "section name with missing [" "hell]" #f)
+
+(test-lipo "empty % comment" "%" (#{make-%-position}# 0))
+(test-lipo "empty # comment" "#" (#{make-#-position}# 0))
+(test-lipo "% comment with text" "%text" (#{make-%-position}# 0))
+(test-lipo "# comment with text" "#text" (#{make-#-position}# 0))
+(test-lipo "% comment with leading whitespace" " %text"
+          (#{make-%-position}# 1))
+(test-lipo "# comment with leading whitespace" " #text"
+          (#{make-#-position}# 1))
+(test-lipo "% comment with more leading whitespace" " \t%text"
+          (#{make-%-position}# 2))
+(test-lipo "# comment with more leading whitespace" " \t#text"
+          (#{make-#-position}# 2))
+(test-lipo "# comment with %" "#%stuff" (#{make-#-position}# 0))
+(test-lipo "% comment with #" "%#stuff" (#{make-%-position}# 0))
+
+(test-lipo "= not allowed with empty variable name" "=value" #f)
+(test-lipo "even with spaces" "   =value" #f)
+(test-lipo "= with variable and value" "var=value"
+          (make-=-position 0 3 3 4 9))
+(test-lipo "= with spacy variable and spacy value" "\t\tvar =\tvalue   "
+          (make-=-position 2 5 6 8 13))
+;; parse-line does not impose what the end-of-line characters are.
+(test-lipo "= with spacier variable and spacy value" "\t\tvar \n=\tvalue   "
+          (make-=-position 2 5 7 9 14))
+(test-lipo "= with spaces in value" "var=val ue"
+          (make-=-position 0 3 3 4 10))
+(test-lipo "line parser does not perform unquoting" "var = 'val ue'"
+          (make-=-position 0 3 4 6 14))
+(test-lipo "quotes still make nice delimiters" "var = ' value '"
+          (make-=-position 0 3 4 6 15))
+;; "VAR = VALUE # comment" seems acceptable to me actually,
+;; but upstream interprets it as "VAR" = "VALUE # comment"
+;; IIUC.
+(test-lipo "= cannot be followed by a % comment" "var = value %comment "
+          (make-=-position 0 3 4 6 20))
+(test-lipo "= cannot be followed by a # comment" "var = value #comment "
+          (make-=-position 0 3 4 6 20))
+
+;; Bug discovered with the QuickCheck tests below!
+(test-lipo "= with empty value" "x="
+          (make-=-position 0 1 1 2 2))
+(test-lipo "= with spacy empty value" "x= "
+          ;; (0 1 1 3 3) would also be correct.
+          (make-=-position 0 1 1 2 2))
+(test-lipo "= with spacier empty value" "x=  "
+          ;; (0 1 1 3 3) and (0 1 1 4 4) would also be correct.
+          (make-=-position 0 1 1 2 2))
+
+(define-syntax-rule (test-inline-po name line expected-fipo)
+  (test-equal name expected-fipo
+             (let ((l (parse-line line)))
+               (if (@inline@-position? l)
+                   (cons (position:@inline@-filename-start l)
+                         (position:@inline@-filename-end l))
+                   'What?))))
+
+(test-lipo "@INLINE@ with file name" "@INLINE@ /x/${stuff}.config"
+          (make-@inline@-position 0 27))
+(test-inline-po "@INLINE@ file name positions" "@INLINE@ stuff" (cons 9 14))
+(test-lipo "@INLINE@ with file name + space" "@INLINE@ X\t"
+          (make-@inline@-position 0 10))
+(test-inline-po "@INLINE@ + space file name positions" "@INLINE@ stuff "
+  (cons 9 14))
+(test-lipo "@INLINE@ with file name + more space" "@INLINE@ X\t\t"
+          (make-@inline@-position 0 10))
+(test-inline-po "@INLINE@ more space file name positions" "@INLINE@ X \t"
+               (cons 9 10))
+(test-lipo "space + @INLINE@ with file name" " @INLINE@ X"
+          (make-@inline@-position 1 11))
+(test-inline-po "space + @INLINE@ file name positions" " @INLINE@ X"
+               (cons 10 11))
+
+;; TODO: are empty file names acceptable?
+;; If so, change the tests (see #; commented out code).
+(test-lipo "@INLINE@ without space" "@INLINE@" #false)
+(test-lipo "@INLINE@ with empty file name" "@INLINE@ "
+          #f
+          #;(make-@inline@-position 0 9))
+#;
+(test-inline-po "@INLINE@ with empty file name (position)" "@INLINE@ "
+               (cons 9 9))
+(test-lipo "@INLINE@ with empty file name + space" "@INLINE@ \t"
+          #f
+          #;(make-@inline@-position 0 9))
+#;
+(test-inline-po "@INLINE@ with empty file name + space (position)" "@INLINE@  "
+               (cons 9 9))
+
+
+
+;; This fairly trivial procedure is copied from tests/kinds/octal.scm
+;; (disarchive by Timothy Sample)
+;; 
https://git.ngyro.com/disarchive/tree/tests/kinds/octal.scm?id=27a0fc79aacaaab0388e974b07cda885079f0f05).
+(define (char-set->arbitrary cs)
+  (arbitrary
+   (gen (choose-char cs))
+   (xform (lambda (chr gen)
+            (generator-variant (char->integer chr) gen)))))
+
+;; Test the line parser on random inputs
+(define $interesting-char
+  (char-set->arbitrary (string->char-set "[]=#% \tab")))
+(define $interesting-random-string
+  ($string $interesting-char))
+(define $interesting-infix
+  ($choose ((cute string=? "") ($const ""))
+          ((cute string=? "@INCLUDE@") ($const "@INCLUDE@"))))
+
+(define-syntax-rule (false-if-assertion exp exp* ...)
+  (with-exception-handler
+      (lambda (e) #f)
+    (lambda () exp exp* ...)
+    #:unwind? #t
+    #:unwind-for-type &assertion))
+
+(define (in-bounds? line pos)
+  "Verify the position information @var{pos} is at least
+in-bounds for the string @var{line}."
+  (cond ((%-position? pos)
+        (and (<= 0 (position:% pos))
+             (< (position:% pos) (string-length line))))
+       ((#{#-position?}# pos)
+        (and (<= 0 (#{position:#}# pos))
+             (< (#{position:#}# pos) (string-length line))))
+       ((=-position? pos)
+        (and (<= 0 (position:= pos))
+             (< (position:= pos) (string-length line))))
+       ((#{[]-position?}# pos)
+        (and (<= 0 (position:section-name-start pos)
+                 (position:section-name-end pos))
+             (< (position:section-name-end pos)
+                (string-length line))))
+       ((@inline@-position? pos)
+        (and (<= 0 (position:@inline@-start pos)
+                 (position:@inline@-end pos))
+             (< (position:@inline@-end pos)
+                (string-length line))))
+       ((eq? pos #f) #t)
+       ((eq? pos #t) #t)
+       (#f (error "what madness is this?"))))
+
+(configure-quickcheck
+ (stop? (lambda (success-count _)
+         (>= success-count 2048)))
+ ;; Large inputs don't produce much additional value.
+ (size (lambda (test-number)
+        (if (zero? test-number)
+            0
+            (1+ (floor/ (log test-number) (log 8)))))))
+
+(test-assert "line position parser does not crash"
+  (quickcheck
+   (property ((pre $interesting-random-string)
+             (in $interesting-infix)
+             (post $interesting-random-string))
+     (false-if-assertion
+      (begin (parse-line (string-append pre in post))
+            #t)))))
+
+(test-assert "line position parser produces in-bounds results"
+  (quickcheck
+   (property ((pre $interesting-random-string)
+             (in $interesting-infix)
+             (post $interesting-random-string))
+     (let ((line (string-append pre in post)))
+       (false-if-assertion
+       (in-bounds? line (parse-line line)))))))
+
+;;; Local Variables:
+;;; eval: (put 'property 'scheme-indent-function 1)
+;;; End:

-- 
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]