>From 5980a3665d8924c7d2374f0740bb82ff0cdc9043 Mon Sep 17 00:00:00 2001 From: "Andries E. Brouwer" Date: Thu, 13 Aug 2015 19:06:03 +0200 Subject: [PATCH] Do not escape high control bytes on a UTF-8 system. --- src/init.c | 26 +++++++++++++++++++++++++- src/options.h | 1 + src/url.c | 12 +++++++++--- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/init.c b/src/init.c index ea074cc..6f71de1 100644 --- a/src/init.c +++ b/src/init.c @@ -348,6 +348,27 @@ command_by_name (const char *cmdname) return -1; } + +/* Used to determine whether bytes 128-159 are OK in a filename */ +static int +have_utf8_locale() { +#if defined(WINDOWS) || defined(MSDOS) || defined(__CYGWIN__) + /* insert some test for Windows */ +#else + char *p; + + p = getenv("LC_ALL"); + if (p == NULL) + p = getenv("LC_CTYPE"); + if (p == NULL) + p = getenv("LANG"); + if (strstr(p, "UTF-8") != NULL || strstr(p, "UTF8") != NULL || + strstr(p, "utf-8") != NULL || strstr(p, "utf8") != NULL) + return true; +#endif + return false; +} + /* Reset the variables to default values. */ void defaults (void) @@ -419,6 +440,7 @@ defaults (void) opt.restrict_files_os = restrict_unix; #endif opt.restrict_files_ctrl = true; + opt.restrict_files_highctrl = (have_utf8_locale() ? false : true); opt.restrict_files_nonascii = false; opt.restrict_files_case = restrict_no_case_restriction; @@ -1487,6 +1509,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno { int restrict_os = opt.restrict_files_os; int restrict_ctrl = opt.restrict_files_ctrl; + int restrict_highctrl = opt.restrict_files_highctrl; int restrict_case = opt.restrict_files_case; int restrict_nonascii = opt.restrict_files_nonascii; @@ -1511,7 +1534,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno else if (VAL_IS ("uppercase")) restrict_case = restrict_uppercase; else if (VAL_IS ("nocontrol")) - restrict_ctrl = false; + restrict_ctrl = restrict_highctrl = false; else if (VAL_IS ("ascii")) restrict_nonascii = true; else @@ -1532,6 +1555,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno opt.restrict_files_os = restrict_os; opt.restrict_files_ctrl = restrict_ctrl; + opt.restrict_files_highctrl = restrict_highctrl; opt.restrict_files_case = restrict_case; opt.restrict_files_nonascii = restrict_nonascii; diff --git a/src/options.h b/src/options.h index 24ddbb5..083d16b 100644 --- a/src/options.h +++ b/src/options.h @@ -251,6 +251,7 @@ struct options bool restrict_files_ctrl; /* non-zero if control chars in URLs are restricted from appearing in generated file names. */ + bool restrict_files_highctrl; /* idem for bytes 128-159 */ bool restrict_files_nonascii; /* non-zero if bytes with values greater than 127 are restricted. */ enum { diff --git a/src/url.c b/src/url.c index 73c8dd0..e98bfaa 100644 --- a/src/url.c +++ b/src/url.c @@ -1348,7 +1348,8 @@ enum { filechr_not_unix = 1, /* unusable on Unix, / and \0 */ filechr_not_vms = 2, /* unusable on VMS (ODS5), 0x00-0x1F * ? */ filechr_not_windows = 4, /* unusable on Windows, one of \|/<>?:*" */ - filechr_control = 8 /* a control character, e.g. 0-31 */ + filechr_control = 8, /* a control character, e.g. 0-31 */ + filechr_highcontrol = 16 /* a high control character, in 128-159 */ }; #define FILE_CHAR_TEST(c, mask) \ @@ -1360,6 +1361,7 @@ enum { #define V filechr_not_vms #define W filechr_not_windows #define C filechr_control +#define Z filechr_highcontrol #define UVWC U|V|W|C #define UW U|W @@ -1392,8 +1394,8 @@ UVWC, VC, VC, VC, VC, VC, VC, VC, /* NUL SOH STX ETX EOT ENQ ACK BEL */ 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */ 0, 0, 0, 0, W, 0, 0, C, /* x y z { | } ~ DEL */ - C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 128-143 */ - C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 144-159 */ + Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, /* 128-143 */ + Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, Z, /* 144-159 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -1406,6 +1408,7 @@ UVWC, VC, VC, VC, VC, VC, VC, VC, /* NUL SOH STX ETX EOT ENQ ACK BEL */ #undef V #undef W #undef C +#undef Z #undef UW #undef UVWC #undef VC @@ -1448,8 +1451,11 @@ append_uri_pathel (const char *b, const char *e, bool escaped, mask = filechr_not_vms; else mask = filechr_not_windows; + if (opt.restrict_files_ctrl) mask |= filechr_control; + if (opt.restrict_files_highctrl) + mask |= filechr_highcontrol; /* Copy [b, e) to PATHEL and URL-unescape it. */ if (escaped) -- 1.9.1