# # # patch "cmd_netsync.cc" # from [f0a73af0909b03372b8c96bad5647e3bddff17ac] # to [755726132bff7c073d6c81971a2eecfba6f69543] # # patch "netsync.cc" # from [a4c703b9007b142478b1d8ecddf9d6c389e6d00c] # to [a06359b1e0b592660a8700e819d5750c087258d2] # # patch "uri.cc" # from [72edc51b298f51cac2c9b431ac72e5987f6d53f9] # to [f4e8894b72f63fae8d84dc30925b39761bc4034e] # # patch "uri.hh" # from [1fbd694d8b1dbb0dc6f0bfae3e6a41a4cf88a9c5] # to [dc12c2c0f9c78663d8a780fa7f9e3b18054df8ed] # ============================================================ --- cmd_netsync.cc f0a73af0909b03372b8c96bad5647e3bddff17ac +++ cmd_netsync.cc 755726132bff7c073d6c81971a2eecfba6f69543 @@ -50,13 +50,10 @@ find_key_if_needed(utf8 & addr, app_stat find_key_if_needed(utf8 & addr, app_state & app) { uri u; - bool transport_requires_auth(true); - if (parse_uri(addr(), u)) + parse_uri(addr(), u); + + if (app.lua.hook_use_transport_auth(u)) { - transport_requires_auth = app.lua.hook_use_transport_auth(u); - } - if (transport_requires_auth) - { rsa_keypair_id key; get_user_key(key, app); app.opts.signing_key = key; ============================================================ --- netsync.cc a4c703b9007b142478b1d8ecddf9d6c389e6d00c +++ netsync.cc a06359b1e0b592660a8700e819d5750c087258d2 @@ -2338,12 +2338,13 @@ build_stream_to_server(app_state & app, shared_ptr server; uri u; vector argv; - if (parse_uri(address(), u) - && app.lua.hook_get_netsync_connect_command(u, - include_pattern, - exclude_pattern, - global_sanity.debug, - argv)) + + parse_uri(address(), u); + if (app.lua.hook_get_netsync_connect_command(u, + include_pattern, + exclude_pattern, + global_sanity.debug, + argv)) { I(argv.size() > 0); string cmd = argv[0]; ============================================================ --- uri.cc 72edc51b298f51cac2c9b431ac72e5987f6d53f9 +++ uri.cc f4e8894b72f63fae8d84dc30925b39761bc4034e @@ -7,94 +7,144 @@ // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // PURPOSE. -#include - -#include - #include "sanity.hh" #include "uri.hh" using std::string; +typedef string::size_type stringpos; -bool -parse_uri(string const & in, uri & out) +static void +parse_authority(string const & in, uri & u) { - uri u; + L(FL("matched URI authority: '%s'") % in); - // This is a simplified URI grammar. It does the basics. + stringpos p = 0; - string scheme_part = "(?:([^:/?#]+):)?"; - string authority_part = "(?://([^/?#]*))?"; - string path_part = "([^?#]*)"; - string query_part = "(?:\\?([^#]*))?"; - string fragment_part = "(?:#(.*))?"; + // First, there might be a user: one or more non-@ characters followed + // by an @. + stringpos user_end = in.find('@', p); + if (user_end != 0 && user_end < in.size()) + { + u.user.assign(in, 0, user_end); + p = user_end + 1; + L(FL("matched URI user: '%s'") % u.user); + } - string uri_rx = (string("^") - + scheme_part - + authority_part - + path_part - + query_part - + fragment_part - + "$"); + // The next thing must either be an ipv6 address, which has the form + // \[ [0-9A-Za-z:]+ \] and we discard the square brackets, or some other + // sort of hostname, [^:]+. (A host-part can be terminated by /, ?, or # + // as well as :, but our caller has taken care of that.) + if (p < in.size() && in.at(p) == '[') + { + p++; + stringpos ipv6_end = in.find(']', p); + N(ipv6_end != string::npos, + F("IPv6 address in URI has no closing ']'")); - boost::match_results uri_matches; - if (boost::regex_match(in, uri_matches, boost::regex(uri_rx))) + u.host.assign(in, p, ipv6_end - p); + p = ipv6_end + 1; + L(FL("matched URI host (IPv6 address): '%s'") % u.host); + } + else { + stringpos host_end = in.find(':', p); + u.host.assign(in, p, host_end - p); + p = host_end; + L(FL("matched URI host: '%s'") % u.host); + } - u.scheme = uri_matches.str(1); + // Finally, if the host-part was ended by a colon, there is a port number + // following, which must consist entirely of digits. + if (p < in.size() && in.at(p) == ':') + { + p++; + N(p < in.size(), + F("explicit port-number specification in URI has no digits")); - // The "authority" fragment gets a bit more post-processing. + N(in.find_first_not_of("0123456789", p) == string::npos, + F("explicit port-number specification in URI contains nondigits")); + + u.port.assign(in, p, string::npos); + L(FL("matched URI port: '%s'") % u.port); + } +} + +void +parse_uri(string const & in, uri & u) +{ + u.scheme.clear(); + u.user.clear(); + u.host.clear(); + u.port.clear(); + u.path.clear(); + u.query.clear(); + u.fragment.clear(); + + stringpos p = 0; + + // This is a simplified URI grammar. It does the basics. + + // First there may be a scheme: one or more characters which are not + // ":/?#", followed by a colon. + stringpos scheme_end = in.find_first_of(":/?#", p); + + if (scheme_end != 0 && scheme_end < in.size() && in.at(scheme_end) == ':') + { + u.scheme.assign(in, p, scheme_end - p); + p = scheme_end + 1; L(FL("matched URI scheme: '%s'") % u.scheme); + } - if (uri_matches[2].matched) - { - string authority = uri_matches.str(2); - L(FL("matched URI authority: '%s'") % authority); + // Next, there may be an authority: "//" followed by zero or more + // characters which are not "/?#". - string user_part = "(?:(address@hidden)@)?"; - string ipv6_host_part = "\\[([^\\]]+)]\\]"; - string normal_host_part = "([^:/]+)"; - string host_part = "(?:" + ipv6_host_part + "|" + normal_host_part + ")"; - string port_part = "(?::([[:digit:]]+))?"; - string auth_rx = user_part + host_part + port_part; - boost::match_results auth_matches; - - N(boost::regex_match(authority, auth_matches, boost::regex(auth_rx)), - F("The URI syntax is invalid. Maybe you used an URI in scp-style?")); - - u.user = auth_matches.str(1); - u.port = auth_matches.str(4); - if (auth_matches[2].matched) - u.host = auth_matches.str(2); - else - { - I(auth_matches[3].matched); - u.host = auth_matches.str(3); - } - L(FL("matched URI user: '%s'") % u.user); - L(FL("matched URI host: '%s'") % u.host); - L(FL("matched URI port: '%s'") % u.port); + if (p + 1 < in.size() && in.at(p) == '/' && in.at(p+1) == '/') + { + p += 2; + stringpos authority_end = in.find_first_of("/?#", p); + if (authority_end != p) + { + parse_authority(string(in, p, authority_end - p), u); + p = authority_end; + } + if (p >= in.size()) + return; + } - } + // Next, a path: zero or more characters which are not "?#". + { + stringpos path_end = in.find_first_of("?#", p); + u.path.assign(in, p, path_end - p); + p = path_end; + L(FL("matched URI path: '%s'") % u.path); + if (p >= in.size()) + return; + } - u.path = uri_matches.str(3); - u.query = uri_matches.str(4); - u.fragment = uri_matches.str(5); - L(FL("matched URI path: '%s'") % u.path); + // Next, perhaps a query: "?" followed by zero or more characters + // which are not "#". + if (in.at(p) == '?') + { + p++; + stringpos query_end = in.find('#', p); + u.query.assign(in, p, query_end - p); + p = query_end; L(FL("matched URI query: '%s'") % u.query); + if (p >= in.size()) + return; + } + + // Finally, if there is a '#', then whatever comes after it in the string + // is a fragment identifier. + if (in.at(p) == '#') + { + u.fragment.assign(in, p + 1, string::npos); L(FL("matched URI fragment: '%s'") % u.fragment); - out = u; - return true; } - else - return false; } - - #ifdef BUILD_UNIT_TESTS #include "unit_tests.hh" -#include "transforms.hh" static void test_one_uri(string scheme, @@ -161,27 +211,50 @@ test_one_uri(string scheme, L(FL("testing parse of URI '%s'") % built); uri u; - BOOST_CHECK(parse_uri(built, u)); + BOOST_CHECK_NOT_THROW(parse_uri(built, u), informative_failure); BOOST_CHECK(u.scheme == scheme); BOOST_CHECK(u.user == user); - BOOST_CHECK(u.host == host); + if (!normal_host.empty()) + BOOST_CHECK(u.host == normal_host); + else + BOOST_CHECK(u.host == ipv6_host); BOOST_CHECK(u.port == port); BOOST_CHECK(u.path == path); BOOST_CHECK(u.query == query); BOOST_CHECK(u.fragment == fragment); } -UNIT_TEST(uri, uri) +UNIT_TEST(uri, basic) { test_one_uri("ssh", "graydon", "", "venge.net", "22", "/tmp/foo.mtn", "", ""); test_one_uri("ssh", "graydon", "", "venge.net", "", "/tmp/foo.mtn", "", ""); test_one_uri("ssh", "", "", "venge.net", "22", "/tmp/foo.mtn", "", ""); test_one_uri("ssh", "", "", "venge.net", "", "/tmp/foo.mtn", "", ""); + test_one_uri("ssh", "", "fe:00:01::04:21", "", "", "/tmp/foo.mtn", "", ""); test_one_uri("file", "", "", "", "", "/tmp/foo.mtn", "", ""); test_one_uri("", "", "", "", "", "/tmp/foo.mtn", "", ""); test_one_uri("http", "graydon", "", "venge.net", "8080", "/foo.cgi", "branch=foo", "tip"); + test_one_uri("http", "graydon", "", "192.168.0.104", "8080", "/foo.cgi", "branch=foo", "tip"); + test_one_uri("http", "graydon", "fe:00:01::04:21", "", "8080", "/foo.cgi", "branch=foo", "tip"); } +UNIT_TEST(uri, bizarre) +{ + test_one_uri("", "graydon", "", "venge.net", "22", "/tmp/foo.mtn", "", ""); + test_one_uri("", "", "", "", "", "/address@hidden:22/tmp/foo.mtn", "", ""); + test_one_uri("ssh", "graydon", "", "venge.net", "22", "/tmp/foo.mtn", "", ""); + test_one_uri("ssh", "", "", "", "", "/address@hidden:22/tmp/foo.mtn", "", ""); +} + +UNIT_TEST(uri, invalid) +{ + uri u; + + BOOST_CHECK_THROW(parse_uri("http://[f3:03:21/foo/bar", u), informative_failure); + BOOST_CHECK_THROW(parse_uri("http://example.com:/foo/bar", u), informative_failure); + BOOST_CHECK_THROW(parse_uri("http://example.com:1a4/foo/bar", u), informative_failure); +} + #endif // BUILD_UNIT_TESTS // Local Variables: ============================================================ --- uri.hh 1fbd694d8b1dbb0dc6f0bfae3e6a41a4cf88a9c5 +++ uri.hh dc12c2c0f9c78663d8a780fa7f9e3b18054df8ed @@ -23,7 +23,7 @@ struct uri std::string fragment; }; -bool +void parse_uri(std::string const & in, uri & out); // Local Variables: