From 7f8efdafad827f4c38cae4f25be53b7462824e95 Mon Sep 17 00:00:00 2001 From: Peter Bex Date: Tue, 1 Nov 2016 15:18:26 +0100 Subject: [PATCH] Improve read/write invariance of keywords (#1332). Keywords are now treated more like symbols are: when they are written, they are written using the "portable" #: representation, regardless of the current "keyword style", so they can be read back with a CHICKEN running under a different keyword style. The reader now also uses the same "extended token" reader for keywords using the "portable" representation as the style-specific reader. When writing keywords, we also check for readability, like we do with symbols. We now also support empty keywords, which can be entered through the "portable" syntax using quotation, i.e., as #:|| --- NEWS | 3 ++ library.scm | 75 +++++++++++++++++++++++-------------------------- tests/library-tests.scm | 37 +++++++++++++++++++++--- 3 files changed, 71 insertions(+), 44 deletions(-) diff --git a/NEWS b/NEWS index 631f1bf..7f0975c 100644 --- a/NEWS +++ b/NEWS @@ -65,6 +65,9 @@ - Runtime system: - "time" macro now shows peak memory usage (#1318, thanks to Kooda). +- Core libraries: + - Keywords are more consistently read/written, like symbols (#1332). + 4.11.1 - Security fixes diff --git a/library.scm b/library.scm index b150540..9594d51 100644 --- a/library.scm +++ b/library.scm @@ -3386,8 +3386,8 @@ EOF (##sys#read-char-0 port) ) ((eq? c #\.) (##sys#read-char-0 port) - (let ([c2 (##sys#peek-char-0 port)]) - (cond [(or (char-whitespace? c2) + (let ((c2 (##sys#peek-char-0 port))) + (cond ((or (char-whitespace? c2) (eq? c2 #\() (eq? c2 #\)) (eq? c2 #\") @@ -3401,22 +3401,26 @@ EOF (##sys#read-error port (starting-line "missing list terminator") - end) ) ] - [else + end) ) ) + (else (r-xtoken (lambda (tok kw) (let* ((tok (##sys#string-append "." tok)) (val - (if kw - (build-keyword tok) - (or (and (char-numeric? c2) - (##sys#string->number tok)) - (build-symbol tok)))) - (node (cons val '())) ) + (cond ((and (string=? tok ".:") + (eq? ksp #:suffix)) + ;; Edge case: r-xtoken sees + ;; a bare ":" and sets kw to #f + (build-keyword ".")) + (kw (build-keyword tok)) + ((and (char-numeric? c2) + (##sys#string->number tok))) + (else (build-symbol tok))) ) + (node (cons val '()))) (if first (##sys#setslot last 1 node) (set! first node) ) - (loop node) ))) ] ) ) ) + (loop node) ))) ) ) ) ) (else (let ([node (cons (readrec) '())]) (if first @@ -3496,10 +3500,6 @@ EOF (##sys#read-char-0 port) (loop (##sys#peek-char-0 port) (cons c lst)) ) ) ) ) - (define (r-next-token) - (r-spaces) - (r-token) ) - (define (r-symbol) (r-xtoken (lambda (str kw) @@ -3513,9 +3513,13 @@ EOF (cond ((or (eof-object? c) (char-whitespace? c) (memq c terminating-characters)) - (if (and skw (eq? ksp #:suffix)) + ;; The not null? checks here ensure we read a + ;; plain ":" as a symbol, not as a keyword. + (if (and skw (eq? ksp #:suffix) + (not (null? (cdr lst)))) (k (##sys#reverse-list->string (cdr lst)) #t) - (k (##sys#reverse-list->string lst) pkw))) + (k (##sys#reverse-list->string lst) + (and pkw (not (null? lst)))) ) ) ((memq c reserved-characters) (reserved-character c)) (else @@ -3623,9 +3627,7 @@ EOF (define (build-keyword tok) (##sys#intern-symbol - (if (eq? 0 (##sys#size tok)) - ":" - (##sys#string-append kwprefix tok)) )) + (##sys#string-append kwprefix tok)) ) ;; now have the state to make a decision. (set! reserved-characters @@ -3733,10 +3735,14 @@ EOF (else (list 'location (readrec)) )))) ((#\:) (##sys#read-char-0 port) - (let ((tok (r-token))) - (if (eq? 0 (##sys#size tok)) - (##sys#read-error port "empty keyword") - (build-keyword tok)))) + (let ((c (##sys#peek-char-0 port))) + (fluid-let ((ksp #f)) + (r-xtoken + (lambda (str kw) + (if (and (eq? 0 (##sys#size str)) + (not (char=? c #\|))) + (##sys#read-error port "empty keyword") + (build-keyword str))) ) ) ) ) ((#\%) (build-symbol (##sys#string-append "#" (r-token))) ) ((#\+) @@ -4027,24 +4033,13 @@ EOF ((not (##core#inline "C_blockp" x)) (outstr port "#")) ((##core#inline "C_forwardedp" x) (outstr port "#")) ((##core#inline "C_symbolp" x) - (cond [(fx= 0 (##sys#byte (##sys#slot x 1) 0)) - (let ([str (##sys#symbol->string x)]) - (case ksp - [(#:prefix) - (outchr port #\:) - (outstr port str) ] - [(#:suffix) - (outstr port str) - (outchr port #\:) ] - [else - (outstr port "#:") - (outstr port str) ] ) ) ] - [(memq x '(#!optional #!key #!rest)) - (outstr port (##sys#slot x 1))] - [(##sys#qualified-symbol? x) - (outstr port (##sys#symbol->qualified-string x))] + (cond ((and (##sys#qualified-symbol? x) + (not (fx= 0 (##sys#byte (##sys#slot x 1) 0)))) + (outstr port (##sys#symbol->qualified-string x))) (else (let ((str (##sys#symbol->string x))) + (when (fx= 0 (##sys#byte (##sys#slot x 1) 0)) ; kw? + (outstr port "#:")) (if (or (not readable) (sym-is-readable? str)) (outstr port str) (outreadablesym port str) ) ) ) ) ) diff --git a/tests/library-tests.scm b/tests/library-tests.scm index aaa9097..6ef1a8f 100644 --- a/tests/library-tests.scm +++ b/tests/library-tests.scm @@ -335,11 +335,21 @@ (parameterize ((keyword-style #:suffix)) (assert (string=? "abc:" (symbol->string (with-input-from-string "|abc:|" read)))) - (assert (string=? "abc" (symbol->string (with-input-from-string "|abc|:" read))))) ; keyword + (assert (string=? "abc" (symbol->string (with-input-from-string "|abc|:" read)))) ; keyword + (let ((kw (with-input-from-string "|foo bar|:" read))) + (assert (eq? kw (with-input-from-string "#:|foo bar|" read))) + (assert (string=? "foo bar" (symbol->string kw))) + (assert (string=? "#:|foo bar|" + (with-output-to-string (lambda () (write kw))))))) (parameterize ((keyword-style #:prefix)) (assert (string=? "abc" (symbol->string (with-input-from-string ":|abc|" read)))) - (assert (string=? ":abc" (symbol->string (with-input-from-string "|:abc|" read))))) + (assert (string=? ":abc" (symbol->string (with-input-from-string "|:abc|" read)))) + (let ((kw (with-input-from-string ":|foo bar|" read))) + (assert (eq? kw (with-input-from-string "#:|foo bar|" read))) + (assert (string=? "foo bar" (symbol->string kw))) + (assert (string=? "#:|foo bar|" + (with-output-to-string (lambda () (write kw))))))) (assert (eq? '|#:| (string->symbol "#:"))) (assert-fail (with-input-from-string "#:" read)) ; empty keyword @@ -366,10 +376,29 @@ (assert (not (keyword? (with-input-from-string ":abc:" read)))) (assert (not (keyword? (with-input-from-string "abc:" read))))) -(assert (string=? ":" (symbol->string (with-input-from-string ":" read)))) -(assert (string=? ":" (symbol->string (with-input-from-string ":||" read)))) +(let ((colon-sym (with-input-from-string ":" read))) + (assert (symbol? colon-sym)) + (assert (not (keyword? colon-sym))) + (assert (string=? ":" (symbol->string colon-sym)))) + +;; The next two cases are a bit dubious. These could also be read as +;; keywords due to the literal quotation. +(let ((colon-sym (with-input-from-string ":||" read))) + (assert (symbol? colon-sym)) + (assert (not (keyword? colon-sym))) + (assert (string=? ":" (symbol->string colon-sym)))) + +(let ((colon-sym (with-input-from-string "||:" read))) + (assert (symbol? colon-sym)) + (assert (not (keyword? colon-sym))) + (assert (string=? ":" (symbol->string colon-sym)))) + (assert-fail (with-input-from-string "#:" read)) +(let ((empty-kw (with-input-from-string "#:||" read))) + (assert (keyword? empty-kw)) + (assert (string=? "" (keyword->string empty-kw)))) + (assert (keyword? (with-input-from-string "42:" read))) (assert (keyword? (with-input-from-string ".:" read))) -- 2.1.4