bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] Wget 1.16.3 v. VMS


From: Steven M. Schweda
Subject: [Bug-wget] Wget 1.16.3 v. VMS
Date: Tue, 31 Mar 2015 23:22:03 -0500 (CDT)

   I don't know how long this has been true, but I recently noticed that
some recursive HTTP fetch operations were failing (on VMS) because the
URLs contained a "?", and the code in src/url.c (et al.) thought that
this was a problem in file names on only Windows.  For example (1.16.3):

ALP $ wgo --user-agent=mozilla
"http://www.google.com/search?source=hp&q=fred";
--2015-03-31 23:52:13--  http://www.google.com/search?source=hp&q=fred
Resolving www.google.com... 74.125.198.99, 74.125.198.103,
74.125.198.104, ...
Connecting to www.google.com|74.125.198.99|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
search!source=hp&q=fred: i/o error

Cannot write to 'search!source=hp&q=fred' (error 0).

(Interestingly, in 1.16.1, that last error message was more informative:
Cannot write to 'search!source=hp&q=fred' (i/o error).
but I haven't investigated.)

   Adding a VMS option to the restrict_files_os stuff, and treating VMS
like Windows for FN_QUERY_SEP and FN_QUERY_SEP_STR seems to solve the
problem (at least on an ODS5 volume):

ALP $ wgx --user-agent=mozilla "http://www.google.com/search?source=hp&q=fred";
--2015-03-31 23:39:35--  http://www.google.com/search?source=hp&q=fred
Resolving www.google.com... 74.125.198.147, 74.125.198.99, 74.125.198.103, ...
Connecting to www.google.com|74.125.198.147|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: 'address@hidden&q=fred'

address@hidden&q=     [  <=>                 ]  37.78K   174KB/s   in 0.2s

2015-03-31 23:39:36 (174 KB/s) - 'address@hidden&q=fred' saved [38691]

ALP $ dire search*
[...]
address@hidden&q^=fred.;1


   I haven't looked at the documentation, but the following code
changes seem plausible to me:

diff -ru wget-1_16_3a_vms/src/init.c wget-1_16_3/src/init.c
--- wget-1_16_3a_vms/src/init.c 2015-01-30 17:25:57 -0600
+++ wget-1_16_3/src/init.c      2015-03-31 22:46:59 -0500
@@ -397,6 +397,8 @@
   /* The default for file name restriction defaults to the OS type. */
 #if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__)
   opt.restrict_files_os = restrict_windows;
+#elif defined(__VMS)
+  opt.restrict_files_os = restrict_vms;
 #else
   opt.restrict_files_os = restrict_unix;
 #endif
@@ -1481,6 +1483,8 @@
 
       if (VAL_IS ("unix"))
         restrict_os = restrict_unix;
+      else if (VAL_IS ("vms"))
+        restrict_os = restrict_vms;
       else if (VAL_IS ("windows"))
         restrict_os = restrict_windows;
       else if (VAL_IS ("lowercase"))
@@ -1495,7 +1499,7 @@
         {
           fprintf (stderr, _("\
 %s: %s: Invalid restriction %s,\n\
-    use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
+    use [unix|vms|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
                    exec_name, com, quote (val));
           return false;
         }
diff -ru wget-1_16_3a_vms/src/options.h wget-1_16_3/src/options.h
--- wget-1_16_3a_vms/src/options.h      2015-01-30 17:25:57 -0600
+++ wget-1_16_3/src/options.h   2015-03-31 22:37:59 -0500
@@ -239,6 +239,7 @@
 
   enum {
     restrict_unix,
+    restrict_vms,
     restrict_windows
   } restrict_files_os;          /* file name restriction ruleset. */
   bool restrict_files_ctrl;     /* non-zero if control chars in URLs
diff -ru wget-1_16_3a_vms/src/url.c wget-1_16_3/src/url.c
--- wget-1_16_3a_vms/src/url.c  2015-02-23 09:10:22 -0600
+++ wget-1_16_3/src/url.c       2015-03-31 23:09:48 -0500
@@ -1328,8 +1328,9 @@
 
 enum {
   filechr_not_unix    = 1,      /* unusable on Unix, / and \0 */
-  filechr_not_windows = 2,      /* unusable on Windows, one of \|/<>?:*" */
-  filechr_control     = 4       /* a control character, e.g. 0-31 */
+  filechr_not_vms     = 2,      /* unusable on VMS (ODS5), 0x00-0x1F * ? */
+  filechr_not_windows = 4,      /* unusable on Windows, one of \|/<>?:*" */
+  filechr_control     = 8       /* a control character, e.g. 0-31 */
 };
 
 #define FILE_CHAR_TEST(c, mask) \
@@ -1338,11 +1339,14 @@
 
 /* Shorthands for the table: */
 #define U filechr_not_unix
+#define V filechr_not_vms
 #define W filechr_not_windows
 #define C filechr_control
 
+#define UVWC U|V|W|C
 #define UW U|W
-#define UWC U|W|C
+#define VC V|C
+#define VW V|W
 
 /* Table of characters unsafe under various conditions (see above).
 
@@ -1353,22 +1357,22 @@
 
 static const unsigned char filechr_table[256] =
 {
-UWC,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
-  C,  C,  C,  C,   C,  C,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
-  C,  C,  C,  C,   C,  C,  C,  C,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
-  C,  C,  C,  C,   C,  C,  C,  C,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
-  0,  0,  W,  0,   0,  0,  0,  0,   /* SP  !   "   #    $   %   &   '   */
-  0,  0,  W,  0,   0,  0,  0, UW,   /* (   )   *   +    ,   -   .   /   */
-  0,  0,  0,  0,   0,  0,  0,  0,   /* 0   1   2   3    4   5   6   7   */
-  0,  0,  W,  0,   W,  0,  W,  W,   /* 8   9   :   ;    <   =   >   ?   */
-  0,  0,  0,  0,   0,  0,  0,  0,   /* @   A   B   C    D   E   F   G   */
-  0,  0,  0,  0,   0,  0,  0,  0,   /* H   I   J   K    L   M   N   O   */
-  0,  0,  0,  0,   0,  0,  0,  0,   /* P   Q   R   S    T   U   V   W   */
-  0,  0,  0,  0,   W,  0,  0,  0,   /* X   Y   Z   [    \   ]   ^   _   */
-  0,  0,  0,  0,   0,  0,  0,  0,   /* `   a   b   c    d   e   f   g   */
-  0,  0,  0,  0,   0,  0,  0,  0,   /* h   i   j   k    l   m   n   o   */
-  0,  0,  0,  0,   0,  0,  0,  0,   /* p   q   r   s    t   u   v   w   */
-  0,  0,  0,  0,   W,  0,  0,  C,   /* x   y   z   {    |   }   ~   DEL */
+UVWC, VC, VC, VC,  VC, VC, VC, VC,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
+  VC, VC, VC, VC,  VC, VC, VC, VC,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
+  VC, VC, VC, VC,  VC, VC, VC, VC,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
+  VC, VC, VC, VC,  VC, VC, VC, VC,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
+   0,  0,  W,  0,   0,  0,  0,  0,   /* SP  !   "   #    $   %   &   '   */
+   0,  0, VW,  0,   0,  0,  0, UW,   /* (   )   *   +    ,   -   .   /   */
+   0,  0,  0,  0,   0,  0,  0,  0,   /* 0   1   2   3    4   5   6   7   */
+   0,  0,  W,  0,   W,  0,  W, VW,   /* 8   9   :   ;    <   =   >   ?   */
+   0,  0,  0,  0,   0,  0,  0,  0,   /* @   A   B   C    D   E   F   G   */
+   0,  0,  0,  0,   0,  0,  0,  0,   /* H   I   J   K    L   M   N   O   */
+   0,  0,  0,  0,   0,  0,  0,  0,   /* P   Q   R   S    T   U   V   W   */
+   0,  0,  0,  0,   W,  0,  0,  0,   /* X   Y   Z   [    \   ]   ^   _   */
+   0,  0,  0,  0,   0,  0,  0,  0,   /* `   a   b   c    d   e   f   g   */
+   0,  0,  0,  0,   0,  0,  0,  0,   /* h   i   j   k    l   m   n   o   */
+   0,  0,  0,  0,   0,  0,  0,  0,   /* p   q   r   s    t   u   v   w   */
+   0,  0,  0,  0,   W,  0,  0,  C,   /* x   y   z   {    |   }   ~   DEL */
 
   C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 128-143 */
   C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 144-159 */
@@ -1381,10 +1385,13 @@
   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
 };
 #undef U
+#undef V
 #undef W
 #undef C
 #undef UW
-#undef UWC
+#undef UVWC
+#undef VC
+#undef VW
 
 /* FN_PORT_SEP is the separator between host and port in file names
    for non-standard port numbers.  On Unix this is normally ':', as in
@@ -1393,10 +1400,14 @@
 #define FN_PORT_SEP  (opt.restrict_files_os != restrict_windows ? ':' : '+')
 
 /* FN_QUERY_SEP is the separator between the file name and the URL
-   query, normally '?'.  Since Windows cannot handle '?' as part of
+   query, normally '?'.  Because VMS and Windows cannot handle '?' in a
    file name, we use '@' instead there.  */
-#define FN_QUERY_SEP (opt.restrict_files_os != restrict_windows ? '?' : '@')
-#define FN_QUERY_SEP_STR (opt.restrict_files_os != restrict_windows ? "?" : 
"@")
+#define FN_QUERY_SEP \
+ (((opt.restrict_files_os != restrict_vms) && \
+   (opt.restrict_files_os != restrict_windows)) ? '?' : '@')
+#define FN_QUERY_SEP_STR \
+ (((opt.restrict_files_os != restrict_vms) && \
+   (opt.restrict_files_os != restrict_windows)) ? "?" : "@")
 
 /* Quote path element, characters in [b, e), as file name, and append
    the quoted string to DEST.  Each character is quoted as per
@@ -1415,6 +1426,8 @@
   int mask;
   if (opt.restrict_files_os == restrict_unix)
     mask = filechr_not_unix;
+  else if (opt.restrict_files_os == restrict_vms)
+    mask = filechr_not_vms;
   else
     mask = filechr_not_windows;
   if (opt.restrict_files_ctrl)


   I assume that I've violated some style standard(s), but I'm old and
hard to educate.  ("git"?  What's a "git"?)  Please let me know if I
need to do more to persuade you to adopt/adapt a change set like this,
or if there's a better way.  Thanks, as always.

------------------------------------------------------------------------

   Steven M. Schweda               address@hidden
   382 South Warwick Street        (+1) 651-699-9818
   Saint Paul  MN  55105-2547



reply via email to

[Prev in Thread] Current Thread [Next in Thread]