[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Bug-wget] Wget 1.16.3 v. VMS
From: |
Steven M. Schweda |
Subject: |
[Bug-wget] Wget 1.16.3 v. VMS |
Date: |
Tue, 31 Mar 2015 23:22:03 -0500 (CDT) |
I don't know how long this has been true, but I recently noticed that
some recursive HTTP fetch operations were failing (on VMS) because the
URLs contained a "?", and the code in src/url.c (et al.) thought that
this was a problem in file names on only Windows. For example (1.16.3):
ALP $ wgo --user-agent=mozilla
"http://www.google.com/search?source=hp&q=fred"
--2015-03-31 23:52:13-- http://www.google.com/search?source=hp&q=fred
Resolving www.google.com... 74.125.198.99, 74.125.198.103,
74.125.198.104, ...
Connecting to www.google.com|74.125.198.99|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
search!source=hp&q=fred: i/o error
Cannot write to 'search!source=hp&q=fred' (error 0).
(Interestingly, in 1.16.1, that last error message was more informative:
Cannot write to 'search!source=hp&q=fred' (i/o error).
but I haven't investigated.)
Adding a VMS option to the restrict_files_os stuff, and treating VMS
like Windows for FN_QUERY_SEP and FN_QUERY_SEP_STR seems to solve the
problem (at least on an ODS5 volume):
ALP $ wgx --user-agent=mozilla "http://www.google.com/search?source=hp&q=fred"
--2015-03-31 23:39:35-- http://www.google.com/search?source=hp&q=fred
Resolving www.google.com... 74.125.198.147, 74.125.198.99, 74.125.198.103, ...
Connecting to www.google.com|74.125.198.147|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: 'address@hidden&q=fred'
address@hidden&q= [ <=> ] 37.78K 174KB/s in 0.2s
2015-03-31 23:39:36 (174 KB/s) - 'address@hidden&q=fred' saved [38691]
ALP $ dire search*
[...]
address@hidden&q^=fred.;1
I haven't looked at the documentation, but the following code
changes seem plausible to me:
diff -ru wget-1_16_3a_vms/src/init.c wget-1_16_3/src/init.c
--- wget-1_16_3a_vms/src/init.c 2015-01-30 17:25:57 -0600
+++ wget-1_16_3/src/init.c 2015-03-31 22:46:59 -0500
@@ -397,6 +397,8 @@
/* The default for file name restriction defaults to the OS type. */
#if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
opt.restrict_files_os = restrict_windows;
+#elif defined(__VMS)
+ opt.restrict_files_os = restrict_vms;
#else
opt.restrict_files_os = restrict_unix;
#endif
@@ -1481,6 +1483,8 @@
if (VAL_IS ("unix"))
restrict_os = restrict_unix;
+ else if (VAL_IS ("vms"))
+ restrict_os = restrict_vms;
else if (VAL_IS ("windows"))
restrict_os = restrict_windows;
else if (VAL_IS ("lowercase"))
@@ -1495,7 +1499,7 @@
{
fprintf (stderr, _("\
%s: %s: Invalid restriction %s,\n\
- use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
+ use [unix|vms|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
exec_name, com, quote (val));
return false;
}
diff -ru wget-1_16_3a_vms/src/options.h wget-1_16_3/src/options.h
--- wget-1_16_3a_vms/src/options.h 2015-01-30 17:25:57 -0600
+++ wget-1_16_3/src/options.h 2015-03-31 22:37:59 -0500
@@ -239,6 +239,7 @@
enum {
restrict_unix,
+ restrict_vms,
restrict_windows
} restrict_files_os; /* file name restriction ruleset. */
bool restrict_files_ctrl; /* non-zero if control chars in URLs
diff -ru wget-1_16_3a_vms/src/url.c wget-1_16_3/src/url.c
--- wget-1_16_3a_vms/src/url.c 2015-02-23 09:10:22 -0600
+++ wget-1_16_3/src/url.c 2015-03-31 23:09:48 -0500
@@ -1328,8 +1328,9 @@
enum {
filechr_not_unix = 1, /* unusable on Unix, / and \0 */
- filechr_not_windows = 2, /* unusable on Windows, one of \|/<>?:*" */
- filechr_control = 4 /* a control character, e.g. 0-31 */
+ filechr_not_vms = 2, /* unusable on VMS (ODS5), 0x00-0x1F * ? */
+ filechr_not_windows = 4, /* unusable on Windows, one of \|/<>?:*" */
+ filechr_control = 8 /* a control character, e.g. 0-31 */
};
#define FILE_CHAR_TEST(c, mask) \
@@ -1338,11 +1339,14 @@
/* Shorthands for the table: */
#define U filechr_not_unix
+#define V filechr_not_vms
#define W filechr_not_windows
#define C filechr_control
+#define UVWC U|V|W|C
#define UW U|W
-#define UWC U|W|C
+#define VC V|C
+#define VW V|W
/* Table of characters unsafe under various conditions (see above).
@@ -1353,22 +1357,22 @@
static const unsigned char filechr_table[256] =
{
-UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */
- C, C, C, C, C, C, C, C, /* BS HT LF VT FF CR SO SI */
- C, C, C, C, C, C, C, C, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
- C, C, C, C, C, C, C, C, /* CAN EM SUB ESC FS GS RS US */
- 0, 0, W, 0, 0, 0, 0, 0, /* SP ! " # $ % & ' */
- 0, 0, W, 0, 0, 0, 0, UW, /* ( ) * + , - . / */
- 0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */
- 0, 0, W, 0, W, 0, W, W, /* 8 9 : ; < = > ? */
- 0, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
- 0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */
- 0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */
- 0, 0, 0, 0, W, 0, 0, 0, /* X Y Z [ \ ] ^ _ */
- 0, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
- 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
- 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
- 0, 0, 0, 0, W, 0, 0, C, /* x y z { | } ~ DEL */
+UVWC, VC, VC, VC, VC, VC, VC, VC, /* NUL SOH STX ETX EOT ENQ ACK BEL */
+ VC, VC, VC, VC, VC, VC, VC, VC, /* BS HT LF VT FF CR SO SI */
+ VC, VC, VC, VC, VC, VC, VC, VC, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
+ VC, VC, VC, VC, VC, VC, VC, VC, /* CAN EM SUB ESC FS GS RS US */
+ 0, 0, W, 0, 0, 0, 0, 0, /* SP ! " # $ % & ' */
+ 0, 0, VW, 0, 0, 0, 0, UW, /* ( ) * + , - . / */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */
+ 0, 0, W, 0, W, 0, W, VW, /* 8 9 : ; < = > ? */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */
+ 0, 0, 0, 0, W, 0, 0, 0, /* X Y Z [ \ ] ^ _ */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
+ 0, 0, 0, 0, W, 0, 0, C, /* x y z { | } ~ DEL */
C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 128-143 */
C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 144-159 */
@@ -1381,10 +1385,13 @@
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
#undef U
+#undef V
#undef W
#undef C
#undef UW
-#undef UWC
+#undef UVWC
+#undef VC
+#undef VW
/* FN_PORT_SEP is the separator between host and port in file names
for non-standard port numbers. On Unix this is normally ':', as in
@@ -1393,10 +1400,14 @@
#define FN_PORT_SEP (opt.restrict_files_os != restrict_windows ? ':' : '+')
/* FN_QUERY_SEP is the separator between the file name and the URL
- query, normally '?'. Since Windows cannot handle '?' as part of
+ query, normally '?'. Because VMS and Windows cannot handle '?' in a
file name, we use '@' instead there. */
-#define FN_QUERY_SEP (opt.restrict_files_os != restrict_windows ? '?' : '@')
-#define FN_QUERY_SEP_STR (opt.restrict_files_os != restrict_windows ? "?" :
"@")
+#define FN_QUERY_SEP \
+ (((opt.restrict_files_os != restrict_vms) && \
+ (opt.restrict_files_os != restrict_windows)) ? '?' : '@')
+#define FN_QUERY_SEP_STR \
+ (((opt.restrict_files_os != restrict_vms) && \
+ (opt.restrict_files_os != restrict_windows)) ? "?" : "@")
/* Quote path element, characters in [b, e), as file name, and append
the quoted string to DEST. Each character is quoted as per
@@ -1415,6 +1426,8 @@
int mask;
if (opt.restrict_files_os == restrict_unix)
mask = filechr_not_unix;
+ else if (opt.restrict_files_os == restrict_vms)
+ mask = filechr_not_vms;
else
mask = filechr_not_windows;
if (opt.restrict_files_ctrl)
I assume that I've violated some style standard(s), but I'm old and
hard to educate. ("git"? What's a "git"?) Please let me know if I
need to do more to persuade you to adopt/adapt a change set like this,
or if there's a better way. Thanks, as always.
------------------------------------------------------------------------
Steven M. Schweda address@hidden
382 South Warwick Street (+1) 651-699-9818
Saint Paul MN 55105-2547
- [Bug-wget] Wget 1.16.3 v. VMS,
Steven M. Schweda <=