bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] [PATCH 17/27] Bugfix: Remove surrounding quotes from Metalink


From: Matthew White
Subject: [Bug-wget] [PATCH 17/27] Bugfix: Remove surrounding quotes from Metalink/HTTP key's value
Date: Thu, 29 Sep 2016 06:02:57 +0200

* src/metalink.h: Add declaration of function dequote_metalink_string()
* src/metalink.c: Add function dequote_metalink_string() remove
  surrounding quotes from string, \' or \"
* src/metalink.c (find_key_value, find_key_values): Call 
dequote_metalink_string()
  to remove the surrounding quotes from the parsed value
* src/metalink.c (test_find_key_value, test_find_key_values): Add
  quoted key's values for unit-tests
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-http-quoted.py: New file. Metalink/HTTP quoted
  values tests

Some Metalink/HTTP keys, like "type" [2], may have a quoted value [1]:
Link: <http://example.com/example.ext.meta4>; rel=describedby;
type="application/metalink4+xml"

Wget was expecting a dequoted value from the Metalink module. This
patch addresses this problem.

References:
 [1] Metalink/HTTP: Mirrors and Hashes
     1.1. Example Metalink Server Response
     https://tools.ietf.org/html/rfc6249#section-1.1

 [2] Additional Link Relations
     6. "type"
     https://tools.ietf.org/html/rfc6903#section-6
---
 src/metalink.c                       |  42 ++++++++++--
 src/metalink.h                       |   1 +
 testenv/Makefile.am                  |   1 +
 testenv/Test-metalink-http-quoted.py | 126 +++++++++++++++++++++++++++++++++++
 4 files changed, 163 insertions(+), 7 deletions(-)
 create mode 100755 testenv/Test-metalink-http-quoted.py

diff --git a/src/metalink.c b/src/metalink.c
index 5212742..b7f3a72 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -823,6 +823,32 @@ clean_metalink_string (char **str)
   *str = new;
 }
 
+/*
+  Remove the quotation surrounding a string.
+
+  The string is permanently modified.
+ */
+void
+dequote_metalink_string (char **str)
+{
+  char *new;
+  size_t str_len;
+
+  if (!str || !*str || ((*str)[0] != '\"' && (*str)[0] != '\''))
+    return;
+
+  str_len = strlen (*str); /* current string length */
+
+  /* Verify if the current string is surrounded by quotes.  */
+  if (str_len < 2 || (*str)[0] != (*str)[str_len - 1])
+    return;
+
+  /* Dequoted string.  */
+  new = xmemdup0 (*str + 1, str_len - 2);
+  xfree (*str);
+  *str = new;
+}
+
 /* Append the suffix ".badhash" to the file NAME, except without
    overwriting an existing file with that name and suffix.  */
 void
@@ -970,6 +996,7 @@ find_key_value (const char *start, const char *end, const 
char *key, char **valu
           while (val_end < end && *val_end != ';' && !c_isspace (*val_end))
             val_end++;
           *value = xstrndup (val_beg, val_end - val_beg);
+          dequote_metalink_string (value);
           return true;
         }
     }
@@ -1070,6 +1097,7 @@ find_key_values (const char *start, const char *end, char 
**key, char **value)
 
   *key = xstrndup (key_start, key_end - key_start);
   *value = xstrndup (val_start, val_end - val_start);
+  dequote_metalink_string (value);
 
   /* Skip trailing whitespaces.  */
   while (val_end < end && c_isspace (*val_end))
@@ -1082,10 +1110,10 @@ find_key_values (const char *start, const char *end, 
char **key, char **value)
 const char *
 test_find_key_values (void)
 {
-  static const char *header_data = "key1=val1;key2=val2 ;key3=val3; key4=val4"\
-                                   " ; key5=val5;key6 =val6;key7= val7; "\
-                                   "key8 = val8 ;    key9    =   val9       "\
-                                   "    ,key10= val10,key11,key12=val12";
+  static const char *header_data = "key1=val1;key2=\"val2\" ;key3=val3; 
key4=val4"\
+                                   " ; key5=val5;key6 ='val6';key7= val7; "\
+                                   "key8 = val8 ;    key9    =   \"val9\"      
 "\
+                                   "    ,key10= 'val10',key11,key12=val12";
   static const struct
   {
     const char *key;
@@ -1126,9 +1154,9 @@ test_find_key_values (void)
 const char *
 test_find_key_value (void)
 {
-  static const char *header_data = "key1=val1;key2=val2 ;key3=val3; key4=val4"\
-                                   " ; key5=val5;key6 =val6;key7= val7; "\
-                                   "key8 = val8 ;    key9    =   val9       ";
+  static const char *header_data = "key1=val1;key2=val2 ;key3='val3'; 
key4=val4"\
+                                   " ; key5='val5';key6 =val6;key7= \"val7\"; 
"\
+                                   "key8 = \"val8\" ;    key9    =   val9      
 ";
   static const struct
   {
     const char *key;
diff --git a/src/metalink.h b/src/metalink.h
index 4095262..6bd61f5 100644
--- a/src/metalink.h
+++ b/src/metalink.h
@@ -53,6 +53,7 @@ char *last_component (char const *name);
 char *get_metalink_basename (char *name);
 void append_suffix_number (char **str, const char *sep, wgint num);
 void clean_metalink_string (char **str);
+void dequote_metalink_string (char **str);
 void badhash_suffix (char *name);
 void badhash_or_remove (char *name);
 
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index 4ad7d0a..32b3db9 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -28,6 +28,7 @@
 
 if METALINK_IS_ENABLED
   METALINK_TESTS = Test-metalink-http.py            \
+    Test-metalink-http-quoted.py                    \
     Test-metalink-xml.py                            \
     Test-metalink-xml-continue.py                   \
     Test-metalink-xml-relpath.py                    \
diff --git a/testenv/Test-metalink-http-quoted.py 
b/testenv/Test-metalink-http-quoted.py
new file mode 100755
index 0000000..af2c445
--- /dev/null
+++ b/testenv/Test-metalink-http-quoted.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import hashlib
+from base64 import b64encode
+
+"""
+    This is to test Metalink/HTTP quoted values support in Wget.
+"""
+
+# Helper function for hostname, port and digest substitution
+def SubstituteServerInfo (text, host, port, digest):
+    text = text.replace('{{FILE1_HASH}}', digest)
+    text = text.replace('{{SRV_HOST}}', host)
+    text = text.replace('{{SRV_PORT}}', str (port))
+    return text
+
+############# File Definitions ###############################################
+File1 = "Would you like some Tea?"
+File1_corrupted = "Would you like some Coffee?"
+File1_lowPref = "Do not take this"
+File1_sha256 = b64encode (hashlib.sha256 (File1.encode ('UTF-8')).digest 
()).decode ('ascii')
+Signature = '''-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.0.7 (GNU/Linux)
+
+This is no valid signature. But it should be downloaded.
+The attempt to verify should fail but should not prevent
+a successful metalink resource retrieval (the sig failure
+should not be fatal).
+-----END PGP SIGNATURE-----
+'''
+File2 = "No meta data for this file."
+
+LinkHeaders = [
+    # This file has low priority and should not be picked.
+    "<http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref>; rel=\"duplicate\"; 
pri=\"9\"; geo=\"pl\"",
+    # This file should be picked second, after hash failure.
+    "<http://0.0.0.0/File1_try2_badconnection>; rel =\"duplicate\";pref; 
pri=\"7\"",
+    # This signature download will fail.
+    "<http://{{SRV_HOST}}:{{SRV_PORT}}/Sig2.asc>; rel=\"describedby\"; 
type=\"application/pgp-signature\"",
+    # Two good signatures
+    "<http://{{SRV_HOST}}:{{SRV_PORT}}/Sig.asc>; rel=\"describedby\"; 
type=\"application/pgp-signature\"",
+    "<http://{{SRV_HOST}}:{{SRV_PORT}}/Sig.asc>; rel=\"describedby\"; 
type=\"application/pgp-signature\"",
+    # Bad URL scheme
+    "<invalid_url>; rel=\"duplicate\"; pri=\"4\"",
+    # rel missing
+    "<http://{{SRV_HOST}}:{{SRV_PORT}}/File1>; pri=\"1\"; pref",
+    # invalid rel
+    "<http://{{SRV_HOST}}:{{SRV_PORT}}/File1>; rel=\"strange\"; pri=\"4\"",
+    # This file should be picked first, because it has the lowest pri among 
preferred.
+    "<http://{{SRV_HOST}}:{{SRV_PORT}}/File1_try1_corrupted>; 
rel=\"duplicate\"; geo=\"su\"; pri=\"4\"; pref",
+    # This file should NOT be picked third due to preferred location set to 
'uk'
+    "<http://{{SRV_HOST}}:{{SRV_PORT}}/File1_badgeo>; rel 
=\"duplicate\";pri=\"5\"",
+    # This file should be picked as third try, and it should succeed
+    "<http://{{SRV_HOST}}:{{SRV_PORT}}/File1_try3_ok>; rel=\'duplicate\'; 
pri=\"5\";geo=\"uk\""
+    ]
+DigestHeader = "SHA-256=\'{{FILE1_HASH}}\'"
+
+# This will be filled as soon as we know server hostname and port
+MetaFileRules = {'SendHeader' : {}}
+
+FileOkServer = WgetFile ("File1_try3_ok", File1)
+FileBadPref = WgetFile ("File1_lowPref", File1_lowPref)
+FileBadHash = WgetFile ("File1_try1_corrupted", File1_corrupted)
+MetaFile = WgetFile ("test.meta", rules=MetaFileRules)
+# In case of Metalink over HTTP, the local file name is
+# derived from the URL suffix.
+FileOkLocal = WgetFile ("test.meta", File1)
+SigFile = WgetFile ("Sig.asc", Signature)
+FileNoMeta = WgetFile ("File2", File2)
+
+WGET_OPTIONS = "--metalink-over-http --preferred-location=uk"
+WGET_URLS = [["test.meta", "File2"]]
+
+Files = [[FileOkServer, FileBadPref, FileBadHash, MetaFile, SigFile, 
FileNoMeta]]
+Existing_Files = []
+
+ExpectedReturnCode = 0
+ExpectedDownloadedFiles = [FileNoMeta, FileOkLocal]
+
+RequestList = [
+    [
+        "HEAD /test.meta",
+        "GET /Sig2.asc",
+        "GET /Sig.asc",
+        "GET /File1_try1_corrupted",
+        "GET /File1_try3_ok",
+        "HEAD /File2",
+        "GET /File2",
+    ]
+]
+
+################ Pre and Post Test Hooks #####################################
+pre_test = {
+    "ServerFiles"       : Files,
+    "LocalFiles"        : Existing_Files
+}
+test_options = {
+    "WgetCommands"      : WGET_OPTIONS,
+    "Urls"              : WGET_URLS
+}
+post_test = {
+    "ExpectedFiles"     : ExpectedDownloadedFiles,
+    "ExpectedRetcode"   : ExpectedReturnCode,
+    "FilesCrawled"      : RequestList,
+}
+
+http_test = HTTPTest (
+                pre_hook=pre_test,
+                test_params=test_options,
+                post_hook=post_test,
+)
+
+http_test.server_setup()
+srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname ()
+
+MetaFileRules["SendHeader"] = {
+        'Link': [ SubstituteServerInfo (LinkHeader, srv_host, srv_port, 
File1_sha256)
+                    for LinkHeader in LinkHeaders ],
+        'Digest': SubstituteServerInfo (DigestHeader, srv_host, srv_port, 
File1_sha256),
+}
+
+err = http_test.begin ()
+
+exit (err)
-- 
2.7.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]