guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Add resolve-relative-reference in (web uri), as in RFC 3986 5.2.


From: Vivien Kraus
Subject: [PATCH] Add resolve-relative-reference in (web uri), as in RFC 3986 5.2.
Date: Mon, 25 Sep 2023 18:48:28 +0200
User-agent: Evolution 3.46.4

* module/web/uri.scm (remove-dot-segments): Implement algorithm 5.2.4.
(merge-paths): Implement algorithm 5.2.3.
(resolve-relative-reference): Implement algorithm 5.2.2.
(module): Export resolve-relative-reference.
* NEWS: Reference it here.
---
Dear Guile developers,

When you request https://example.com/resource an URI and get redirected
to "here", you end up with 2 URI references:

  - https://example.com/resource
  - here

What should you request next? The answer is,
"https://example.com/here";. It seems evident how we go from one to the
other.

However, there are more subtle cases. What if you get redirected to
"../here", for instance?

RFC 3986 has you covered, in section 5.2. It explains how we go from a
base URI and a URI reference to the new URI.

What do you think?

Best regards,

Vivien

 NEWS                          |   7 ++
 module/web/uri.scm            | 152 +++++++++++++++++++++++++++++++++-
 test-suite/tests/web-uri.test |  68 +++++++++++++++
 3 files changed, 226 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index b319404d7..bdf75cb3c 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,13 @@ Changes in 3.0.10 (since 3.0.9)
 
 * New interfaces and functionality
 
+** New function in (web uri): resolve-relative-reference
+
+Implement the /5.2. Relative Resolution/ algorithm in RFC 3986. It may
+be used to request a moved resource in case of a 301 or 302 HTTP
+response, by resolving the Location value of the response on top of the
+requested URI.
+
 ** New warning: unused-module
 
 This analysis, enabled at `-W2', issues warnings for modules that appear
diff --git a/module/web/uri.scm b/module/web/uri.scm
index 8e0b9bee7..2b80c3847 100644
--- a/module/web/uri.scm
+++ b/module/web/uri.scm
@@ -47,7 +47,9 @@
 
             uri-reference? relative-ref?
             build-uri-reference build-relative-ref
-            string->uri-reference string->relative-ref))
+            string->uri-reference string->relative-ref
+
+            resolve-relative-reference))
 
 (define-record-type <uri>
   (make-uri scheme userinfo host port path query fragment)
@@ -501,3 +503,151 @@ strings, and join the parts together with ‘/’ as a 
delimiter.
 For example, the list ‘(\"scrambled eggs\" \"biscuits&gravy\")’
 encodes as ‘\"scrambled%20eggs/biscuits%26gravy\"’."
   (string-join (map uri-encode parts) "/"))
+
+(define (remove-dot-segments path)
+  "Remove the @samp{./} and @samp{../} segments in @var{path}, as
+ RFC3986, section 5.2.4."
+  (let scan ((input
+              (let ((components (split-and-decode-uri-path path)))
+                (if (string-suffix? "/" path)
+                    `(,@components "")
+                    components)))
+             (input-path-absolute? (string-prefix? "/" path))
+             (output '())
+             (output-absolute? #f)
+             (output-ends-in-/? (string-suffix? "/" path)))
+    (cond
+     ((and input-path-absolute?
+           (null? input))
+      ;; Transfer the initial "/" from the input to the end of the
+      ;; output.
+      (scan '() #f output output-absolute? #t))
+     ((null? input)
+      (string-append
+       (if output-absolute? "/" "")
+       (encode-and-join-uri-path
+        (reverse output))
+       (if output-ends-in-/? "/" "")))
+     ((and (not input-path-absolute?)
+           (or (equal? (car input) "..")
+               (equal? (car input) ".")))
+      (scan (cdr input) #f output output-absolute? output-ends-in-/?))
+     ((and input-path-absolute?
+           (equal? (car input) "."))
+      (scan (cdr input) #t output output-absolute? output-ends-in-/?))
+     ((and input-path-absolute?
+           (equal? (car input) ".."))
+      (scan (cdr input) #t
+            (if (null? output)
+                output
+                (cdr output))
+            ;; Remove the last segment, including the preceding /. So,
+            ;; if there is 0 or 1 segment, remove the root / too.
+            (if (or (null? output) (null? (cdr output)))
+                #f  ;; remove the /
+                #t) ;; keep it
+            #f))
+     (else
+      (scan (cdr input)
+            ;; If there is only 1 item in input, then it does not end in
+            ;; /, so the recursive call does not start with
+            ;; /. Otherwise, the recursive call starts with /.
+            (not (null? (cdr input)))
+            (cons (car input) output)
+            ;; If the output is empty and the input path is absolute,
+            ;; the / of the transferred path is transferred as well.
+            (or output-absolute?
+                (and (null? output)
+                     input-path-absolute?))
+            #f)))))
+
+(define (merge-paths base-has-authority? base relative)
+  "Return @samp{@var{base}/@var{relative}}, with the subtelties of absolute
+ paths explained in RFC3986, section 5.2.3. If the base URI has an
+authority (userinfo, host, port), then the processing is a bit
+different."
+  (if (and base-has-authority?
+           (equal? base ""))
+      (string-append "/" relative)
+      (let ((last-/ (string-rindex base #\/)))
+        (if last-/
+            (string-append (substring base 0 last-/) "/" relative)
+            relative))))
+
+(define (resolve-relative-reference base relative)
+  "Resolve @var{relative} on top of @var{base}, as RFC3986, section 5.2."
+  (let ((b-scheme (uri-scheme base))
+        (b-userinfo (uri-userinfo base))
+        (b-host (uri-host base))
+        (b-port (uri-port base))
+        (b-path (uri-path base))
+        (b-query (uri-query base))
+        (b-fragment (uri-fragment base))
+        (r-scheme (uri-scheme relative))
+        (r-userinfo (uri-userinfo relative))
+        (r-host (uri-host relative))
+        (r-port (uri-port relative))
+        (r-path (uri-path relative))
+        (r-query (uri-query relative))
+        (r-fragment (uri-fragment relative))
+        (t-scheme #f)
+        (t-userinfo #f)
+        (t-host #f)
+        (t-port #f)
+        (t-path "")
+        (t-query #f)
+        (t-fragment #f))
+    ;; https://www.rfc-editor.org/rfc/rfc3986#section-5.2
+    (if r-scheme
+        (begin
+          (set! t-scheme r-scheme)
+          (set! t-userinfo r-userinfo)
+          (set! t-host r-host)
+          (set! t-port r-port)
+          (set! t-path (remove-dot-segments r-path))
+          (set! t-query r-query))
+        ;; r-scheme is not defined:
+        (begin
+          (if r-host
+              (begin
+                (set! t-userinfo r-userinfo)
+                (set! t-host r-host)
+                (set! t-port r-port)
+                (set! t-path (remove-dot-segments r-path))
+                (set! t-query r-query))
+              ;; r-scheme is not defined, r-authority is not defined:
+              (begin
+                (if (equal? r-path "")
+                    (begin
+                      (set! t-path b-path)
+                      (if r-query
+                          ;; r-scheme, r-authority, r-path are not
+                          ;; defined:
+                          (set! t-query r-query)
+                          ;; r-scheme, r-authority, r-path, r-query are
+                          ;; not defined:
+                          (set! t-query b-query)))
+                    ;; r-scheme, r-authority not defined, r-path defined:
+                    (begin
+                      (if (string-prefix? "/" r-path)
+                          ;; r-scheme, r-authority not defined, r-path
+                          ;; absolute:
+                          (set! t-path (remove-dot-segments r-path))
+                          ;; r-scheme, r-authority not defined, r-path
+                          ;; relative:
+                          (set! t-path
+                                (remove-dot-segments
+                                 (merge-paths b-host b-path r-path))))
+                      (set! t-query r-query)))
+                (set! t-userinfo b-userinfo)
+                (set! t-host b-host)
+                (set! t-port b-port)))
+          (set! t-scheme b-scheme)))
+    (set! t-fragment r-fragment)
+    (build-uri-reference #:scheme t-scheme
+                         #:userinfo t-userinfo
+                         #:host t-host
+                         #:port t-port
+                         #:path t-path
+                         #:query t-query
+                         #:fragment t-fragment)))
diff --git a/test-suite/tests/web-uri.test b/test-suite/tests/web-uri.test
index 95fd82f16..c453bf60f 100644
--- a/test-suite/tests/web-uri.test
+++ b/test-suite/tests/web-uri.test
@@ -20,6 +20,7 @@
 (define-module (test-web-uri)
   #:use-module (web uri)
   #:use-module (ice-9 regex)
+  #:use-module (ice-9 string-fun)
   #:use-module (test-suite lib))
 
 
@@ -693,3 +694,70 @@
   (pass-if (equal? "foo%20bar" (uri-encode "foo bar")))
   (pass-if (equal? "foo%0A%00bar" (uri-encode "foo\n\x00bar")))
   (pass-if (equal? "%3C%3E%5C%5E" (uri-encode "<>\\^"))))
+
+(with-test-prefix "resolve relative reference"
+  ;; Test suite in RFC3986, section 5.4.
+  (let ((base (string->uri "http://a/b/c/d;p?q";))
+        (equal/encoded?
+         ;; The test suite checks for ';' characters, but Guile escapes
+         ;; them in URIs. Same for '='.
+         (let ((escape-colon
+                (lambda (x)
+                  (string-replace-substring x ";" "%3B")))
+               (escape-equal
+                (lambda (x)
+                  (string-replace-substring x "=" "%3D"))))
+         (lambda (x y)
+           (equal? (escape-colon (escape-equal x))
+                   (escape-colon (escape-equal y)))))))
+    (let ((resolve
+           (lambda (relative)
+             (let* ((relative-uri
+                     (string->uri-reference relative))
+                    (resolved-uri
+                     (resolve-relative-reference base relative-uri))
+                    (resolved (uri->string resolved-uri)))
+               resolved))))
+      (with-test-prefix "normal"
+        (pass-if (equal/encoded? (resolve "g:h") "g:h"))
+        (pass-if (equal/encoded? (resolve "g") "http://a/b/c/g";))
+        (pass-if (equal/encoded? (resolve "./g") "http://a/b/c/g";))
+        (pass-if (equal/encoded? (resolve "g/") "http://a/b/c/g/";))
+        (pass-if (equal/encoded? (resolve "/g") "http://a/g";))
+        (pass-if (equal/encoded? (resolve "//g") "http://g";))
+        (pass-if (equal/encoded? (resolve "?y") "http://a/b/c/d;p?y";))
+        (pass-if (equal/encoded? (resolve "g?y") "http://a/b/c/g?y";))
+        (pass-if (equal/encoded? (resolve "#s") "http://a/b/c/d;p?q#s";))
+        (pass-if (equal/encoded? (resolve "g?y#s") "http://a/b/c/g?y#s";))
+        (pass-if (equal/encoded? (resolve ";x") "http://a/b/c/;x";))
+        (pass-if (equal/encoded? (resolve "g;x?y#s") "http://a/b/c/g;x?y#s";))
+        (pass-if (equal/encoded? (resolve "") "http://a/b/c/d;p?q";))
+        (pass-if (equal/encoded? (resolve ".") "http://a/b/c/";))
+        (pass-if (equal/encoded? (resolve "./") "http://a/b/c/";))
+        (pass-if (equal/encoded? (resolve "..") "http://a/b/";))
+        (pass-if (equal/encoded? (resolve "../") "http://a/b/";))
+        (pass-if (equal/encoded? (resolve "../g") "http://a/b/g";))
+        (pass-if (equal/encoded? (resolve "../..") "http://a/";))
+        (pass-if (equal/encoded? (resolve "../../") "http://a/";))
+        (pass-if (equal/encoded? (resolve "../../g") "http://a/g";)))
+      (with-test-prefix "abnormal"
+        (pass-if (equal/encoded? (resolve "../../../g") "http://a/g";))
+        (pass-if (equal/encoded? (resolve "../../../../g") "http://a/g";))
+        (pass-if (equal/encoded? (resolve "/./g") "http://a/g";))
+        (pass-if (equal/encoded? (resolve "/../g") "http://a/g";))
+        (pass-if (equal/encoded? (resolve "g.") "http://a/b/c/g.";))
+        (pass-if (equal/encoded? (resolve ".g") "http://a/b/c/.g";))
+        (pass-if (equal/encoded? (resolve "g..") "http://a/b/c/g..";))
+        (pass-if (equal/encoded? (resolve "..g") "http://a/b/c/..g";))
+        (pass-if (equal/encoded? (resolve "./../g") "http://a/b/g";))
+        (pass-if (equal/encoded? (resolve "./g/.") "http://a/b/c/g/";))
+        (pass-if (equal/encoded? (resolve "g/./h") "http://a/b/c/g/h";))
+        (pass-if (equal/encoded? (resolve "g/../h") "http://a/b/c/h";))
+        (pass-if (equal/encoded? (resolve "g;x=1/./y") "http://a/b/c/g;x=1/y";))
+        (pass-if (equal/encoded? (resolve "g;x=1/../y") "http://a/b/c/y";))
+        (pass-if (equal/encoded? (resolve "g?y/./x") "http://a/b/c/g?y/./x";))
+        (pass-if (equal/encoded? (resolve "g?y/../x") "http://a/b/c/g?y/../x";))
+        (pass-if (equal/encoded? (resolve "g#s/./x") "http://a/b/c/g#s/./x";))
+        (pass-if (equal/encoded? (resolve "g#s/../x") "http://a/b/c/g#s/../x";))
+        (pass-if (equal/encoded? (resolve "http:g") "http:g"))))))
+        

base-commit: 8441d8ff5671db690eb239cfea4dcfdee6d6dcdb
-- 
2.41.0



reply via email to

[Prev in Thread] Current Thread [Next in Thread]