# # # patch "automate.cc" # from [372cc205ad6a5376ca94596985b6dcf7c657f33b] # to [0ec01e9736a7988e5397d851bf6009c59b0574ec] # # patch "cert.cc" # from [4e4b08193ea8d1cafeebe2c1e227c0a70424062a] # to [2e88ff620a6c09e14461f3d0e9d67ef0e1b8411d] # # patch "cmd_list.cc" # from [783991aedffea80666c33fe6e3fff2ff251a87d9] # to [622ac9ea93b781409604baa769cecad53a425e10] # # patch "cmd_netsync.cc" # from [2b8548c58d2d9e9154ead567ce499731bc081703] # to [5f1360b0446a86c6e9fc646141a54e6e5a20dbe8] # # patch "database.cc" # from [43b7580169bd3c7fc805f0ce8bd85052f8e74fca] # to [4ae4c79a2a7fba4f39b072de6edc52625e1f69c0] # # patch "database.hh" # from [0cf5bf163523720b681afd1739903947c8d8af65] # to [7eb9a59905d0c790a5f76cb6fd1c35b538b7af60] # # patch "globish.cc" # from [1ffa504207d3631c43dd18ecb0e2cd57f5fe359a] # to [db87786cc191ad8c0f6b48264a6dc2eb158a982a] # # patch "globish.hh" # from [7b0d071f31a91efa626ee0da5c07263d5ea1abf1] # to [67b73a81afa6057967ac9e0a0e10b7dbedf3f5f0] # # patch "key_store.cc" # from [c15c1c54b12ba2e79540d132da465c88d48ada34] # to [0644bde00d3fd85c7a34eea4cb99f12a4779f40d] # # patch "key_store.hh" # from [44eedad6a981d25603d458c675187c0c740ce3d2] # to [273ff9423e95ea0f81d07bd73d273c89a56b0970] # # patch "lua.cc" # from [cdff2c5d72b8deecf8d8f95fd9a1d82cdc73f077] # to [84e60272337bdf7b653fd56de92de911892fc1f5] # # patch "lua_hooks.cc" # from [ff4053ed166f572c179628ec23c9ef263484e234] # to [8db4abc61015882eff57df28d1fa14cdb46ade88] # # patch "lua_hooks.hh" # from [0f79afa4f23d64a221b23ef4b39766528fec8a14] # to [a03b6a9de65a23204776355184e50a093f65f649] # # patch "luaext_globish.cc" # from [8dd1a45c7545658b03a8c59ef2bd1ab28fcabd75] # to [e5d6925746cb90183f6a58e3a40ae0f5d68376e3] # # patch "netcmd.cc" # from [584773446209355dabaaba6c8a88db4b06fb778c] # to [50e187a13cc5e04970639893759e1460640a3757] # # patch "netcmd.hh" # from [cb1dfe175af2c32f617594c37f3f35779521d460] # to [ec56a3a0bb06c217dc9f47f43678e296829990bf] # # patch "project.cc" # from [653a802e8f9bafbe2b131ef0dc53b0c40a5fdbc1] # to [6ba153708126f7a2c3e5f55c631d4ec1e43a9a3f] # # patch "vocab_terms.hh" # from [234b8fe803456b22719405ad5ffefd62f6e9ba01] # to [35079019863841cd9a214ab7d3f37b049f7a7f8b] # ============================================================ --- automate.cc 372cc205ad6a5376ca94596985b6dcf7c657f33b +++ automate.cc 0ec01e9736a7988e5397d851bf6009c59b0574ec @@ -1459,7 +1459,6 @@ CMD_AUTOMATE(tags, N_("[BRANCH_PATTERN]" filtering = true; } - globish_matcher match(incl, globish()); basic_io::printer prt; basic_io::stanza stz; stz.push_str_pair(symbol("format_version"), "1"); @@ -1483,7 +1482,7 @@ CMD_AUTOMATE(tags, N_("[BRANCH_PATTERN]" if (app.lua.hook_ignore_branch(*branch)) continue; - if (!show && match((*branch)())) + if (!show && incl.matches((*branch)())) show = true; branch_names.push_back((*branch)()); } ============================================================ --- cert.cc 4e4b08193ea8d1cafeebe2c1e227c0a70424062a +++ cert.cc 2e88ff620a6c09e14461f3d0e9d67ef0e1b8411d @@ -455,7 +455,7 @@ get_user_key(rsa_keypair_id & key, app_s return; vector all_privkeys; - app.keys.get_keys(all_privkeys); + app.keys.get_key_ids(all_privkeys); N(!all_privkeys.empty(), F("you have no private key to make signatures with\n" "perhaps you need to 'genkey '")); ============================================================ --- cmd_list.cc 783991aedffea80666c33fe6e3fff2ff251a87d9 +++ cmd_list.cc 622ac9ea93b781409604baa769cecad53a425e10 @@ -158,9 +158,9 @@ CMD(keys, "keys", "", CMD_REF(list), "[P { vector pubs; vector privkeys; - string pattern; + globish pattern("*"); if (args.size() == 1) - pattern = idx(args, 0)(); + pattern = globish(idx(args, 0)()); else if (args.size() > 1) throw usage(execid); @@ -251,26 +251,19 @@ CMD(branches, "branches", "", CMD_REF(li options::opts::depth | options::opts::exclude) { globish inc("*"); - globish exc; if (args.size() == 1) inc = globish(idx(args,0)()); else if (args.size() > 1) throw usage(execid); - vector excludes; - typecast_vocab_container(app.opts.exclude_patterns, excludes); - combine_and_check_globish(excludes, exc); - globish_matcher match(inc, exc); + + globish exc(app.opts.exclude_patterns); set names; app.get_project().get_branch_list(inc, names); for (set::const_iterator i = names.begin(); i != names.end(); ++i) - { - if (match((*i)()) && !app.lua.hook_ignore_branch(*i)) - { - cout << *i << '\n'; - } - } + if (!exc.matches((*i)()) && !app.lua.hook_ignore_branch(*i)) + cout << *i << '\n'; } CMD(epochs, "epochs", "", CMD_REF(list), "[BRANCH [...]]", @@ -564,10 +557,10 @@ CMD_AUTOMATE(keys, "", if (app.db.database_specified()) { transaction_guard guard(app.db, false); - app.db.get_key_ids("", dbkeys); + app.db.get_key_ids(dbkeys); guard.commit(); } - app.keys.get_key_ids("", kskeys); + app.keys.get_key_ids(kskeys); for (vector::iterator i = dbkeys.begin(); i != dbkeys.end(); i++) ============================================================ --- cmd_netsync.cc 2b8548c58d2d9e9154ead567ce499731bc081703 +++ cmd_netsync.cc 5f1360b0446a86c6e9fc646141a54e6e5a20dbe8 @@ -108,15 +108,10 @@ extract_patterns(args_vector const & arg if (args.size() >= 2 || app.opts.exclude_given) { E(args.size() >= 2, F("no branch pattern given")); - int pattern_offset = 1; - vector patterns; - std::transform(args.begin() + pattern_offset, args.end(), - std::inserter(patterns, patterns.end()), - &typecast_vocab); - combine_and_check_globish(patterns, include_pattern); - vector excludes; - typecast_vocab_container(app.opts.exclude_patterns, excludes); - combine_and_check_globish(excludes, exclude_pattern); + + include_pattern = globish(args.begin() + 1, args.end()); + exclude_pattern = globish(app.opts.exclude_patterns); + if (!app.db.var_exists(default_include_pattern_key) || app.opts.set_default) { @@ -298,14 +293,8 @@ CMD(clone, "clone", "", CMD_REF(network) } globish include_pattern(app.opts.branchname()); + globish exclude_pattern(app.opts.exclude_patterns); - globish exclude_pattern; - { - vector excludes; - typecast_vocab_container(app.opts.exclude_patterns, excludes); - combine_and_check_globish(excludes, exclude_pattern); - } - find_key_if_needed(addr, include_pattern, exclude_pattern, app, false); ============================================================ --- database.cc 43b7580169bd3c7fc805f0ce8bd85052f8e74fca +++ database.cc 4ae4c79a2a7fba4f39b072de6edc52625e1f69c0 @@ -46,6 +46,7 @@ #include "roster_delta.hh" #include "rev_height.hh" #include "vocab_hash.hh" +#include "globish.hh" // defined in schema.c, generated from schema.sql: extern char const schema_constant[]; @@ -2267,25 +2268,32 @@ void // crypto key management void -database::get_key_ids(string const & pattern, - vector & pubkeys) +database::get_key_ids(vector & pubkeys) { pubkeys.clear(); results res; - if (pattern != "") - fetch(res, one_col, any_rows, - query("SELECT id FROM public_keys WHERE id GLOB ?") - % text(pattern)); - else - fetch(res, one_col, any_rows, - query("SELECT id FROM public_keys")); + fetch(res, one_col, any_rows, query("SELECT id FROM public_keys")); for (size_t i = 0; i < res.size(); ++i) pubkeys.push_back(rsa_keypair_id(res[i][0])); } void +database::get_key_ids(globish const & pattern, + vector & pubkeys) +{ + pubkeys.clear(); + results res; + + fetch(res, one_col, any_rows, query("SELECT id FROM public_keys")); + + for (size_t i = 0; i < res.size(); ++i) + if (pattern.matches(res[i][0])) + pubkeys.push_back(rsa_keypair_id(res[i][0])); +} + +void database::get_keys(string const & table, vector & keys) { keys.clear(); @@ -3210,16 +3218,17 @@ outdated_indicator } outdated_indicator -database::get_branches(string const & glob, +database::get_branches(globish const & glob, vector & names) { results res; - query q("SELECT DISTINCT value FROM revision_certs WHERE name = ? AND CAST(value AS TEXT) glob ?"); + query q("SELECT DISTINCT value FROM revision_certs WHERE name = ?"); string cert_name = "branch"; - fetch(res, one_col, any_rows, q % text(cert_name) % text(glob)); + fetch(res, one_col, any_rows, q % text(cert_name)); for (size_t i = 0; i < res.size(); ++i) { - names.push_back(res[i][0]); + if (glob.matches(res[i][0])) + names.push_back(res[i][0]); } return cert_stamper.get_indicator(); } ============================================================ --- database.hh 0cf5bf163523720b681afd1739903947c8d8af65 +++ database.hh 7eb9a59905d0c790a5f76cb6fd1c35b538b7af60 @@ -75,6 +75,7 @@ class rev_height; struct revision_t; struct query; class rev_height; +struct globish; class database { @@ -371,7 +372,8 @@ public: private: void get_keys(std::string const & table, std::vector & keys); public: - void get_key_ids(std::string const & pattern, + void get_key_ids(std::vector & pubkeys); + void get_key_ids(globish const & pattern, std::vector & pubkeys); void get_public_keys(std::vector & pubkeys); @@ -560,7 +562,7 @@ public: public: // branches outdated_indicator get_branches(std::vector & names); - outdated_indicator get_branches(std::string const & glob, + outdated_indicator get_branches(globish const & glob, std::vector & names); bool check_integrity(); ============================================================ --- globish.cc 1ffa504207d3631c43dd18ecb0e2cd57f5fe359a +++ globish.cc db87786cc191ad8c0f6b48264a6dc2eb158a982a @@ -1,4 +1,5 @@ // Copyright (C) 2005 Nathaniel Smith +// Copyright (C) 2007 Zack Weinberg // // This program is made available under the GNU GPL version 2.0 or // greater. See the accompanying file COPYING for details. @@ -10,201 +11,666 @@ #include "base.hh" #include "sanity.hh" #include "globish.hh" +#include "option.hh" // for arg_type +#include "numeric_vocab.hh" +#include +#include + using std::string; using std::vector; +using std::back_inserter; +using std::back_insert_iterator; -using boost::regex_match; +// The algorithm here is originally from pdksh 5. That implementation uses +// the high bit of unsigned chars as a quotation flag. We can't do that, +// because we need to be utf8 clean. Instead, we copy the string and +// replace "live" metacharacters with single bytes from the +// control-character range. This is why bytes <= 0x1f are not allowed in the +// pattern. -// this converts a globish pattern to a regex. The regex should be usable by -// the Boost regex library operating in default mode, i.e., it should be a -// valid ECMAscript regex. -// -// Pattern tranformation: -// -// - As a special case, the empty pattern is translated to "$.^", which cannot -// match any string. -// -// - Any character except those described below are copied as they are. -// - The backslash (\) escapes the following character. The escaping -// backslash is copied to the regex along with the following character. -// - * is transformed to .* in the regex. -// - ? is transformed to . in the regex. -// - { is transformed to ( in the regex -// - } is transformed to ) in the regex -// - , is transformed to | in the regex, if within { and } -// - ^ is escaped unless it comes directly after an unescaped [. -// - ! is transformed to ^ in the regex if it comes directly after an -// unescaped [. -// - ] directly following an unescaped [ is escaped. -static void -maybe_quote(char c, string & re) +enum metachar { + META_STAR = 1, // * + META_QUES, // ? + META_CC_BRA, // [ + META_CC_INV_BRA, // [^ or [! + META_CC_KET, // ] (matches either of the above two) + META_ALT_BRA, // { + META_ALT_OR, // , (when found inside unquoted { ... }) + META_ALT_KET, // } +}; + +// Compile a character class. + +static string::const_iterator +compile_charclass(string const & pat, string::const_iterator p, + back_insert_iterator & to) { - if (!(isalnum(c) || c == '_')) + string in_class; + char bra = (char)META_CC_BRA; + + p++; + N(p != pat.end(), + F("invalid pattern '%s': unmatched '['") % pat); + + if (*p == '!' || *p == '^') { - re += '\\'; + bra = (char)META_CC_INV_BRA; + p++; + N(p != pat.end(), + F("invalid pattern '%s': unmatched '['") % pat); } - re += c; + + while (p != pat.end() && *p != ']') + { + if (*p == '\\') + { + p++; + if (p == pat.end()) + break; + } + // A dash at the beginning or end of the pattern is literal. + else if (*p == '-' + && in_class.size() != 0 + && p+1 != pat.end() + && p[1] != ']') + { + p++; + if (*p == '\\') + p++; + if (p == pat.end()) + break; + + // the cast is needed because boost::format will not obey the %x + // if given a 'char'. + N((widen(*p)) >= ' ', + F("invalid pattern '%s': control character 0x%02x is not allowed") + % pat % (widen(*p))); + + unsigned int start = widen(in_class.end()[-1]); + unsigned int stop = widen(*p); + + N(start != stop, + F("invalid pattern '%s': " + "one-element character ranges are not allowed") % pat); + N(start < stop, + F("invalid pattern '%s': " + "endpoints of a character range must be in " + "ascending numeric order") % pat); + N(start < 0x80 && stop < 0x80, + F("invalid pattern '%s': cannot use non-ASCII characters " + "in classes") % pat); + + L(FL("expanding range from %X (%c) to %X (%c)") + % (start+1) % (char)(start+1) % stop % (char)stop); + + for (unsigned int r = start + 1; r < stop; r++) + in_class.push_back((char)r); + } + else + N(*p != '[', F("syntax error in '%s': " + "character classes may not be nested") % pat); + + N((widen(*p)) >= ' ', + F("invalid pattern '%s': control character 0x%02x is not allowed") + % pat % (widen(*p))); + + N((widen(*p)) < 0x80, + F("invalid pattern '%s': cannot use non-ASCII characters in classes") + % pat); + + in_class.push_back(*p); + p++; + } + + N(p != pat.end(), + F("invalid pattern '%s': unmatched '['") % pat); + + N(in_class.size() != 0, + F("invalid pattern '%s': empty character class") % pat); + + // minor optimization: one-element non-inverted character class becomes + // the character. + if (bra == (char)META_CC_BRA && in_class.size() == 1) + *to++ = in_class[0]; + else + { + *to++ = bra; + std::sort(in_class.begin(), in_class.end()); + std::copy(in_class.begin(), in_class.end(), to); + *to++ = (char)META_CC_KET; + } + return p; } +// Compile one fragment of a glob pattern. + static void -checked_globish_to_regex(string const & glob, string & regex) +compile_frag(string const & pat, back_insert_iterator & to) { - int in_braces = 0; // counter for levels if {} + unsigned int brace_depth = 0; - regex.clear(); - regex.reserve(glob.size() * 2); + for (string::const_iterator p = pat.begin(); p != pat.end(); p++) + switch (*p) + { + default: + N((widen(*p)) >= ' ', + F("invalid pattern '%s': control character 0x%02x is not allowed") + % pat % (widen(*p))); + + *to++ = *p; + break; - L(FL("checked_globish_to_regex: input = '%s'") % glob); + case '*': + // optimization: * followed by any sequence of ?s and *s is + // equivalent to the number of ?s that appeared in the sequence, + // followed by a single star. the latter can be matched without + // nearly as much backtracking. - if (glob == "") + for (p++; p != pat.end(); p++) + { + if (*p == '?') + *to++ = META_QUES; + else if (*p != '*') + break; + } + + p--; + *to++ = META_STAR; + break; + + case '?': + *to++ = META_QUES; + break; + + case '\\': + p++; + N(p != pat.end(), + F("invalid pattern '%s': un-escaped \\ at end") % pat); + + N((widen(*p)) >= ' ', + F("invalid pattern '%s': control character 0x%02x is not allowed") + % pat % (widen(*p))); + + *to++ = *p; + break; + + case '[': + p = compile_charclass(pat, p, to); + break; + + case ']': + N(false, F("invalid pattern '%s': unmatched ']'") % pat); + + case '{': + // There's quite a bit of optimization we could be doing on + // alternatives, but it's hairy, especially if you get into + // nested alternatives; so we're not doing any of it now. + // (Look at emacs's regexp-opt.el for inspiration.) + brace_depth++; + N(brace_depth < 6, + F("invalid pattern '%s': braces nested too deeply") % pat); + *to++ = META_ALT_BRA; + break; + + case ',': + if (brace_depth > 0) + *to++ = META_ALT_OR; + else + *to++ = ','; + break; + + case '}': + N(brace_depth > 0, + F("invalid pattern '%s': unmatched '}'") % pat); + brace_depth--; + *to++ = META_ALT_KET; + break; + } + + N(brace_depth == 0, + F("invalid pattern '%s': unmatched '{'") % pat); +} + +// common code used by the constructors. + +static inline string +compile(string const & pat) +{ + string s; + back_insert_iterator to = back_inserter(s); + compile_frag(pat, to); + return s; +} + +static inline string +compile(vector::const_iterator const & beg, + vector::const_iterator const & end) +{ + if (end - beg == 0) + return ""; + if (end - beg == 1) + return compile((*beg)()); + + string s; + back_insert_iterator to = back_inserter(s); + + *to++ = META_ALT_BRA; + vector::const_iterator i = beg; + for (;;) { - regex = "$.^"; - // and the below loop will do nothing + compile_frag((*i)(), to); + i++; + if (i == end) + break; + *to++ = META_ALT_OR; } - for (string::const_iterator i = glob.begin(); i != glob.end(); ++i) - { - char c = *i; + *to++ = META_ALT_KET; + return s; +} - N(in_braces < 5, F("braces nested too deep in pattern '%s'") % glob); +globish::globish(string const & p) : compiled_pattern(compile(p)) {} +globish::globish(char const * p) : compiled_pattern(compile(p)) {} - switch(c) +globish::globish(vector const & p) + : compiled_pattern(compile(p.begin(), p.end())) {} +globish::globish(vector::const_iterator const & beg, + vector::const_iterator const & end) + : compiled_pattern(compile(beg, end)) {} + +// Debugging. + +static string +decode(string::const_iterator p, string::const_iterator end) +{ + string s; + for (; p != end; p++) + switch (*p) + { + case META_STAR: s.push_back('*'); break; + case META_QUES: s.push_back('?'); break; + case META_CC_BRA: s.push_back('['); break; + case META_CC_KET: s.push_back(']'); break; + case META_CC_INV_BRA: s.push_back('['); + s.push_back('!'); break; + + case META_ALT_BRA: s.push_back('{'); break; + case META_ALT_KET: s.push_back('}'); break; + case META_ALT_OR: s.push_back(','); break; + + // Some of these are only special in certain contexts, + // but it does no harm to escape them always. + case '[': case ']': case '-': case '!': case '^': + case '{': case '}': case ',': + case '*': case '?': case '\\': + s.push_back('\\'); + // fall through + default: + s.push_back(*p); + } + return s; +} + +string +globish::operator()() const +{ + return decode(compiled_pattern.begin(), compiled_pattern.end()); +} + +template <> void dump(globish const & g, string & s) +{ + s = g(); +} + +std::ostream & operator<<(std::ostream & o, globish const & g) +{ + return o << g(); +} + +// Matching. + +static string::const_iterator +find_next_subpattern(string::const_iterator p, + string::const_iterator pe, + bool want_alternatives) +{ + unsigned int depth = 1; + for (; p != pe; p++) + switch (*p) + { + default: break; + + case META_ALT_BRA: + depth++; break; + + case META_ALT_KET: + depth--; + if (depth == 0) + return p+1; + + case META_ALT_OR: + if (depth == 1 && want_alternatives) + return p+1; + } + + I(false); +} + + +static bool +do_match(string::const_iterator s, string::const_iterator se, + string::const_iterator p, string::const_iterator pe) +{ + unsigned int sc, pc; + + if (global_sanity.debug_p()) // decode() is expensive + L(FL("subpattern: '%s' against '%s'") % string(s,se) % decode(p,pe)); + + while (p < pe) + { + pc = widen(*p++); + sc = s < se ? widen(*s) : 0; + s++; + switch (pc) { - case '*': - regex += ".*"; + default: // literal + if (sc != pc) + return false; break; - case '?': - regex += '.'; + + case META_QUES: // any single character + if (sc == 0) + return false; break; - case '{': - in_braces++; - regex += '('; + + case META_CC_BRA: // any of these characters + { + bool matched = false; + I(p < pe); + I(*p != META_CC_KET); + do + { + if (widen(*p) == sc) + matched = true; + p++; + I(p < pe); + } + while (*p != META_CC_KET); + if (!matched) + return false; + } + p++; break; - case '}': - N(in_braces != 0, - F("trying to end a brace expression in a glob when none is started")); - regex += ')'; - in_braces--; + + case META_CC_INV_BRA: // any but these characters + I(p < pe); + I(*p != META_CC_KET); + do + { + if (widen(*p) == sc) + return false; + p++; + I(p < pe); + } + while (*p != META_CC_KET); + p++; break; - case ',': - if (in_braces > 0) - regex += '|'; - else - maybe_quote(c, regex); - break; - case '\\': - N(++i != glob.end(), F("pattern '%s' ends with backslash") % glob); - maybe_quote(*i, regex); - break; - default: - maybe_quote(c, regex); - break; - } - } - N(in_braces == 0, - F("run-away brace expression in pattern '%s'") % glob); + case META_STAR: // zero or more arbitrary characters + if (p == pe) + return true; // star at end always matches, if we get that far - L(FL("checked_globish_to_regex: output = '%s'") % regex); -} + pc = widen(*p); + // If the next character in p is not magic, we can only match + // starting from places in s where that character appears. + if (pc >= ' ') + { + if (global_sanity.debug_p()) + L(FL("after *: looking for '%c' in '%c%s'") + % (char)pc % (char)sc % string(s, se)); + p++; + for (;;) + { + if (sc == pc && do_match(s, se, p, pe)) + return true; + if (s >= se) + break; + sc = widen(*s++); + } + } + else + { + if (global_sanity.debug_p()) + L(FL("metacharacter after *: doing it the slow way")); + s--; + do + { + if (do_match(s, se, p, pe)) + return true; + s++; + } + while (s < se); + } + return false; -void -combine_and_check_globish(vector const & patterns, globish & pattern) -{ - string p; - if (patterns.size() > 1) - p += '{'; - bool first = true; - for (vector::const_iterator i = patterns.begin(); - i != patterns.end(); ++i) - { - string tmp; - // run for the checking it does - checked_globish_to_regex((*i)(), tmp); - if (!first) - p += ','; - first = false; - p += (*i)(); + case META_ALT_BRA: + { + string::const_iterator prest, psub, pnext; + string::const_iterator srest; + + prest = find_next_subpattern(p, pe, false); + psub = p; + s--; + do + { + pnext = find_next_subpattern(psub, pe, true); + srest = (prest == pe ? se : s); + for (; srest < se; srest++) + { + if (do_match(s, srest, psub, pnext - 1) + && do_match(srest, se, prest, pe)) + return true; + } + // try the empty target too + if (do_match(s, srest, psub, pnext - 1) + && do_match(srest, se, prest, pe)) + return true; + + psub = pnext; + } + while (pnext < prest); + return false; + } + } } - if (patterns.size() > 1) - p += '}'; - pattern = globish(p); + return s == se; } -globish_matcher::globish_matcher(globish const & include_pat, - globish const & exclude_pat) +bool globish::matches(string const & target) const { - string re; - checked_globish_to_regex(include_pat(), re); - r_inc = re; - checked_globish_to_regex(exclude_pat(), re); - r_exc = re; -} + bool result; + + // The empty pattern matches nothing. + if (compiled_pattern.empty()) + result = false; + else + result = do_match (target.begin(), target.end(), + compiled_pattern.begin(), compiled_pattern.end()); -bool -globish_matcher::operator()(string const & s) -{ - // regex_match may throw a runtime_error, if the regex turns out to be - // really pathological - bool inc_match = regex_match(s, r_inc); - bool exc_match = regex_match(s, r_exc); - bool result = inc_match && !exc_match; - L(FL("matching '%s' against '%s' excluding '%s': %s, %s: %s") - % s % r_inc % r_exc - % (inc_match ? "included" : "not included") - % (exc_match ? "excluded" : "not excluded") - % (result ? "matches" : "does not match")); + L(FL("matching '%s' against '%s': %s") + % target % (*this)() % (result ? "matches" : "does not match")); return result; } #ifdef BUILD_UNIT_TESTS #include "unit_tests.hh" -UNIT_TEST(globish, checked_globish_to_regex) +UNIT_TEST(globish, syntax) { - string pat; + struct tcase + { + char const * in; + char const * out; + }; + tcase const good[] = { + { "a", "a" }, + { "\\a", "a" }, + { "[a]", "a" }, + { "[!a]", "[!a]" }, + { "[^a]", "[!a]" }, + { "[\\!a]", "[\\!a]" }, + { "[\\^a]", "[\\^a]" }, + { "[ab]", "[ab]" }, + { "[a-b]", "[ab]" }, + { "[a-c]", "[abc]" }, + { "[ac-]", "[\\-ac]" }, + { "[-ac]", "[\\-ac]" }, + { "[+-/]", "[+\\,\\-./]" }, - checked_globish_to_regex("*", pat); - UNIT_TEST_CHECK(pat == ".*"); - checked_globish_to_regex("?", pat); - UNIT_TEST_CHECK(pat == "."); - checked_globish_to_regex("{a,b,c}d", pat); - UNIT_TEST_CHECK(pat == "(a|b|c)d"); - checked_globish_to_regex("foo{a,{b,c},?*}d", pat); - UNIT_TEST_CHECK(pat == "foo(a|(b|c)|..*)d"); - checked_globish_to_regex("\\a\\b\\|\\{\\*", pat); - UNIT_TEST_CHECK(pat == "ab\\|\\{\\*"); - checked_globish_to_regex(".+$^{}", pat); - UNIT_TEST_CHECK(pat == "\\.\\+\\$\\^()"); - checked_globish_to_regex(",", pat); - // we're very conservative about metacharacters, and quote all - // non-alphanumerics, hence the backslash - UNIT_TEST_CHECK(pat == "\\,"); - checked_globish_to_regex("\\.\\+\\$\\^\\(\\)", pat); - UNIT_TEST_CHECK(pat == "\\.\\+\\$\\^\\(\\)"); + { "\xC2\xA1", "\xC2\xA1" }, // U+00A1 in UTF8 + + { "*", "*" }, + { "\\*", "\\*" }, + { "[*]", "\\*" }, + { "?", "?" }, + { "\\?", "\\?" }, + { "[?]", "\\?" }, + { ",", "\\," }, + { "\\,", "\\," }, + { "[,]", "\\," }, + { "\\{", "\\{" }, + { "[{]", "\\{" }, + { "[}]", "\\}" }, + { "\\[", "\\[" }, + { "\\]", "\\]" }, + { "\\\\", "\\\\" }, - UNIT_TEST_CHECK_THROW(checked_globish_to_regex("foo\\", pat), informative_failure); - UNIT_TEST_CHECK_THROW(checked_globish_to_regex("{foo", pat), informative_failure); - UNIT_TEST_CHECK_THROW(checked_globish_to_regex("{foo,bar{baz,quux}", pat), informative_failure); - UNIT_TEST_CHECK_THROW(checked_globish_to_regex("foo}", pat), informative_failure); - UNIT_TEST_CHECK_THROW(checked_globish_to_regex("foo,bar{baz,quux}}", pat), informative_failure); - UNIT_TEST_CHECK_THROW(checked_globish_to_regex("{{{{{{{{{{a,b},c},d},e},f},g},h},i},j},k}", pat), informative_failure); + { "**", "*" }, + { "*?", "?*" }, + { "*???*?*", "????*" }, + { "*a?*?b*", "*a??*b*" }, + + { "{a,b,c}d", "{a,b,c}d" }, + { "foo{a,{b,c},?*}d", "foo{a,{b,c},?*}d" }, + { "\\a\\b\\|\\{\\*", "ab|\\{\\*" }, + { ".+$^{}", ".+$\\^{}" }, + { "\\.\\+\\$\\^\\(\\)", ".+$\\^()" }, + { 0, 0 } + }; + + char const * const bad[] = { + "[", + "[!", + "[\\", + "[\\]", + "[foo", + "[!foo", + "foo]", + "[\003]", + "[a-a]", + "[f-a]", + "[]", + "[\xC2\xA1]", + "[\xC2\xA1\xC2\xA2]", + "[\xC2\xA1-\xC2\xA2]", + "[-\xC2\xA1]", + "[[]", + "[]", + + "\003", + "foo\\", + "{foo", + "{foo,bar{baz,quux}", + "foo}", + "foo,bar{baz,quux}}", + "{{{{{{{{{{a,b},c},d},e},f},g},h},i},j},k}", + 0 + }; + char const dummy[] = ""; + + for (tcase const * p = good; p->in; p++) + { + globish g(p->in); + string s; + dump(g, s); + L(FL("globish syntax: %s -> %s [expect %s]") % p->in % s % p->out); + UNIT_TEST_CHECK(s == p->out); + } + + for (char const * const * p = bad; *p; p++) + { + L(FL("globish syntax: invalid %s") % *p); + UNIT_TEST_CHECK_THROW(I(globish(*p).matches(dummy)), informative_failure); + } } -UNIT_TEST(globish, combine_and_check_globish) +UNIT_TEST(globish, from_vector) { - vector s; - s.push_back(globish("a")); - s.push_back(globish("b")); - s.push_back(globish("c")); - globish combined; - combine_and_check_globish(s, combined); - UNIT_TEST_CHECK(combined() == "{a,b,c}"); + vector v; + v.push_back(arg_type("a")); + v.push_back(arg_type("b")); + v.push_back(arg_type("c")); + globish combined(v); + string s; + dump(combined, s); + UNIT_TEST_CHECK(s == "{a,b,c}"); } -UNIT_TEST(globish, globish_matcher) +UNIT_TEST(globish, simple_matches) { - { + UNIT_TEST_CHECK(globish("abc").matches("abc")); + UNIT_TEST_CHECK(!globish("abc").matches("aac")); + + UNIT_TEST_CHECK(globish("a[bc]d").matches("abd")); + UNIT_TEST_CHECK(globish("a[bc]d").matches("acd")); + UNIT_TEST_CHECK(!globish("a[bc]d").matches("and")); + UNIT_TEST_CHECK(!globish("a[bc]d").matches("ad")); + UNIT_TEST_CHECK(!globish("a[bc]d").matches("abbd")); + + UNIT_TEST_CHECK(globish("a[!bc]d").matches("and")); + UNIT_TEST_CHECK(globish("a[!bc]d").matches("a#d")); + UNIT_TEST_CHECK(!globish("a[!bc]d").matches("abd")); + UNIT_TEST_CHECK(!globish("a[!bc]d").matches("acd")); + UNIT_TEST_CHECK(!globish("a[!bc]d").matches("ad")); + UNIT_TEST_CHECK(!globish("a[!bc]d").matches("abbd")); + + UNIT_TEST_CHECK(globish("a?c").matches("abc")); + UNIT_TEST_CHECK(globish("a?c").matches("aac")); + UNIT_TEST_CHECK(globish("a?c").matches("a%c")); + UNIT_TEST_CHECK(!globish("a?c").matches("a%d")); + UNIT_TEST_CHECK(!globish("a?c").matches("d%d")); + UNIT_TEST_CHECK(!globish("a?c").matches("d%c")); + UNIT_TEST_CHECK(!globish("a?c").matches("a%%d")); + + UNIT_TEST_CHECK(globish("a*c").matches("ac")); + UNIT_TEST_CHECK(globish("a*c").matches("abc")); + UNIT_TEST_CHECK(globish("a*c").matches("abac")); + UNIT_TEST_CHECK(globish("a*c").matches("abbcc")); + UNIT_TEST_CHECK(globish("a*c").matches("abcbbc")); + UNIT_TEST_CHECK(!globish("a*c").matches("abcbb")); + UNIT_TEST_CHECK(!globish("a*c").matches("abcb")); + UNIT_TEST_CHECK(!globish("a*c").matches("aba")); + UNIT_TEST_CHECK(!globish("a*c").matches("ab")); + + UNIT_TEST_CHECK(globish("*.bak").matches(".bak")); + UNIT_TEST_CHECK(globish("*.bak").matches("a.bak")); + UNIT_TEST_CHECK(globish("*.bak").matches("foo.bak")); + UNIT_TEST_CHECK(globish("*.bak").matches(".bak.bak")); + UNIT_TEST_CHECK(globish("*.bak").matches("fwibble.bak.bak")); + + UNIT_TEST_CHECK(globish("a*b*[cd]").matches("abc")); + UNIT_TEST_CHECK(globish("a*b*[cd]").matches("abcd")); + UNIT_TEST_CHECK(globish("a*b*[cd]").matches("aabrd")); + UNIT_TEST_CHECK(globish("a*b*[cd]").matches("abbbbbbbccd")); + UNIT_TEST_CHECK(!globish("a*b*[cd]").matches("ab")); + UNIT_TEST_CHECK(!globish("a*b*[cd]").matches("abde")); + UNIT_TEST_CHECK(!globish("a*b*[cd]").matches("aaaaaaab")); + UNIT_TEST_CHECK(!globish("a*b*[cd]").matches("axxxxd")); + UNIT_TEST_CHECK(!globish("a*b*[cd]").matches("adb")); +} + +UNIT_TEST(globish, complex_matches) +{ { globish_matcher m(globish("{a,b}?*\\*|"), globish("*c*")); UNIT_TEST_CHECK(m("aq*|")); UNIT_TEST_CHECK(m("bq*|")); ============================================================ --- globish.hh 7b0d071f31a91efa626ee0da5c07263d5ea1abf1 +++ globish.hh 67b73a81afa6057967ac9e0a0e10b7dbedf3f5f0 @@ -16,38 +16,73 @@ // pathological patterns), because we must match branches against untrusted // patterns when doing netsync. -// the syntax is: -// most things - match themselves -// * - match 0 or more characters -// ? - match 0 or 1 characters -// \ - match -// {,,...} - match any of the given items -// so like standard globs, except without [] character sets, and with {} -// alternation. -// the one strange thing is there is a special-case -- the empty pattern -// matches nothing, not even the empty string. this hardly ever matters, but -// it's nice to have some way to say "don't exclude anything", for instance. +// all characters stand for themselves except +// +// \x matches x, even if x is one of the metacharacters +// * matches zero or more characters of any kind (greedily) +// ? matches any single character +// [...] matches any single character that appears within the brackets +// [^..] matches any single character that does _not_ appear +// [!..] same as [^..] +// {a,b,c} matches a or b or c (may be of arbitrary length, have arbitrary +// number of alternations; nesting is allowed but only five deep) +// +// [\]] is how you put a ] in a character class +// [\[] similarly (otherwise a syntax error) +// [\\] similarly +// [{}?*] within [] these stand for themselves +// +// \n matches n, not newline +// \007 same as '007' +// +// to match, the _entire_ target must match the pattern; there is no scan +// for a substring match, nor is a prefix match a match. the pattern is +// expected to be utf8, and characters in the 0x00 - 0x1f range are not +// permitted. +// +// as an extra special case, the empty string matches nothing, not even an +// empty string. this hardly ever matters, but it's nice to have some way +// to say "don't exclude anything", for instance. -#include -#include +#include "vector.hh" -#include "vocab.hh" +struct arg_type; -void combine_and_check_globish(std::vector const &patterns, - globish & pattern); +struct globish +{ + globish() : compiled_pattern() {} + globish(char const * pat); + globish(std::string const & pat); + globish(std::vector const & pat); + globish(std::vector::const_iterator const & beg, + std::vector::const_iterator const & end); -class globish_matcher + std::string operator()(void) const; + bool matches(std::string const & target) const; + +private: + std::string compiled_pattern; +}; + +std::ostream & operator<<(std::ostream &, globish const &); +template <> void dump(globish const &, std::string &); + +// convenience functor for when you want to match all things +// that _do_ match one glob but do _not_ match another +struct globish_matcher { -public: - // this may throw an informative_failure if a pattern is invalid - globish_matcher(globish const & include_pat, globish const & exclude_pat); - // this method may throw a std::runtime_error if the pattern is really - // pathological - bool operator()(std::string const & s); + globish_matcher(globish const & incl, globish const & excl) + : included(incl), excluded(excl) {} + + bool operator()(std::string const & s) + { return included.matches(s) && !excluded.matches(s); } + private: - boost::regex r_inc, r_exc; + globish included; + globish excluded; }; + // Local Variables: // mode: C++ // fill-column: 76 ============================================================ --- key_store.cc c15c1c54b12ba2e79540d132da465c88d48ada34 +++ key_store.cc 0644bde00d3fd85c7a34eea4cb99f12a4779f40d @@ -131,33 +131,25 @@ void } void -key_store::get_key_ids(string const & pattern, - vector & priv) +key_store::get_key_ids(globish const & pattern, + vector & priv) { maybe_read_key_dir(); priv.clear(); - globish inc(pattern); - if (pattern.empty()) - inc = globish("*"); - globish_matcher gm(inc, globish("")); for (map::const_iterator i = keys.begin(); i != keys.end(); ++i) - { - if (gm((i->first)())) - priv.push_back(i->first); - } + if (pattern.matches((i->first)())) + priv.push_back(i->first); } void -key_store::get_keys(vector & priv) +key_store::get_key_ids(vector & priv) { maybe_read_key_dir(); priv.clear(); for (map::const_iterator i = keys.begin(); i != keys.end(); ++i) - { - priv.push_back(i->first); - } + priv.push_back(i->first); } bool ============================================================ --- key_store.hh 44eedad6a981d25603d458c675187c0c740ce3d2 +++ key_store.hh 273ff9423e95ea0f81d07bd73d273c89a56b0970 @@ -7,6 +7,7 @@ class app_state; #include "paths.hh" class app_state; +class globish; class key_store { @@ -29,11 +30,10 @@ public: void ensure_in_database(rsa_keypair_id const & ident); bool try_ensure_in_db(hexenc const & hash); - void get_key_ids(std::string const & pattern, + void get_key_ids(std::vector & priv); + void get_key_ids(globish const & pattern, std::vector & priv); - void get_keys(std::vector & priv); - bool key_pair_exists(rsa_keypair_id const & ident); void get_key_pair(rsa_keypair_id const & ident, ============================================================ --- lua.cc cdff2c5d72b8deecf8d8f95fd9a1d82cdc73f077 +++ lua.cc 84e60272337bdf7b653fd56de92de911892fc1f5 @@ -548,17 +548,17 @@ namespace { record_if_matches(string const & b, char const * p, vector & t) - : base(b + "/"), glob(globish(p), globish()), target(t) + : base(b + "/"), glob(p), target(t) { target.clear(); } virtual void consume(const char * component) { - if (glob(component)) + if (glob.matches(component)) target.push_back(base + component); } private: string base; - globish_matcher glob; + globish glob; vector & target; }; } ============================================================ --- lua_hooks.cc ff4053ed166f572c179628ec23c9ef263484e234 +++ lua_hooks.cc 8db4abc61015882eff57df28d1fa14cdb46ade88 @@ -30,6 +30,7 @@ #include "uri.hh" #include "cmd.hh" #include "commands.hh" +#include "globish.hh" // defined in {std,test}_hooks.lua, converted to {std,test}_hooks.c respectively extern char const std_hooks_constant[]; ============================================================ --- lua_hooks.hh 0f79afa4f23d64a221b23ef4b39766528fec8a14 +++ lua_hooks.hh a03b6a9de65a23204776355184e50a093f65f649 @@ -23,6 +23,7 @@ struct lua_State; struct uri; class app_state; struct lua_State; +struct globish; extern app_state* get_app_state(lua_State *L); ============================================================ --- luaext_globish.cc 8dd1a45c7545658b03a8c59ef2bd1ab28fcabd75 +++ luaext_globish.cc e5d6925746cb90183f6a58e3a40ae0f5d68376e3 @@ -1,6 +1,5 @@ #include "base.hh" #include "lua.hh" - #include "globish.hh" #include "sanity.hh" @@ -13,14 +12,10 @@ LUAEXT(match, globish) bool result = false; try { - string r(re); - string n; - string s(str); - result = globish_matcher(globish(r), globish(n))(s); + globish g(re); + result = g.matches(str); } catch (informative_failure & e) { return luaL_error(L, e.what()); - } catch (boost::bad_pattern & e) { - return luaL_error(L, e.what()); } catch (...) { return luaL_error(L, "Unknown error."); } @@ -28,8 +23,6 @@ LUAEXT(match, globish) return 1; } - - // Local Variables: // mode: C++ // fill-column: 76 ============================================================ --- netcmd.cc 584773446209355dabaaba6c8a88db4b06fb778c +++ netcmd.cc 50e187a13cc5e04970639893759e1460640a3757 @@ -18,6 +18,7 @@ #include "sanity.hh" #include "transforms.hh" #include "hmac.hh" +#include "globish.hh" using std::string; @@ -680,8 +681,8 @@ UNIT_TEST(netcmd, functions) do_netcmd_roundtrip(out_cmd, in_cmd, buf); in_cmd.read_anonymous_cmd(in_role, in_include_pattern, in_exclude_pattern, in_key); UNIT_TEST_CHECK(in_key == out_key); - UNIT_TEST_CHECK(in_include_pattern == out_include_pattern); - UNIT_TEST_CHECK(in_exclude_pattern == out_exclude_pattern); + UNIT_TEST_CHECK(in_include_pattern() == out_include_pattern()); + UNIT_TEST_CHECK(in_exclude_pattern() == out_exclude_pattern()); UNIT_TEST_CHECK(in_role == out_role); L(FL("anonymous_cmd test done, buffer was %d bytes") % buf.size()); } @@ -711,8 +712,8 @@ UNIT_TEST(netcmd, functions) UNIT_TEST_CHECK(in_key == out_key); UNIT_TEST_CHECK(in_signature == out_signature); UNIT_TEST_CHECK(in_role == out_role); - UNIT_TEST_CHECK(in_include_pattern == out_include_pattern); - UNIT_TEST_CHECK(in_exclude_pattern == out_exclude_pattern); + UNIT_TEST_CHECK(in_include_pattern() == out_include_pattern()); + UNIT_TEST_CHECK(in_exclude_pattern() == out_exclude_pattern()); L(FL("auth_cmd test done, buffer was %d bytes") % buf.size()); } ============================================================ --- netcmd.hh cb1dfe175af2c32f617594c37f3f35779521d460 +++ netcmd.hh ec56a3a0bb06c217dc9f47f43678e296829990bf @@ -20,6 +20,8 @@ #include "hmac.hh" #include "string_queue.hh" +struct globish; + typedef enum { server_voice, ============================================================ --- project.cc 653a802e8f9bafbe2b131ef0dc53b0c40a5fdbc1 +++ project.cc 6ba153708126f7a2c3e5f55c631d4ec1e43a9a3f @@ -54,7 +54,7 @@ project_t::get_branch_list(globish const bool allow_suspend_certs) { std::vector got; - app.db.get_branches(glob(), got); + app.db.get_branches(glob, got); names.clear(); multimap inverse_graph_cache; ============================================================ --- vocab_terms.hh 234b8fe803456b22719405ad5ffefd62f6e9ba01 +++ vocab_terms.hh 35079019863841cd9a214ab7d3f37b049f7a7f8b @@ -22,7 +22,6 @@ ATOMIC_NOVERIFY(branch_name); // utf-8 ATOMIC_NOVERIFY(inodeprint); // fingerprint of an inode ATOMIC_NOVERIFY(branch_name); // utf-8 -ATOMIC_NOVERIFY(globish); // kinda like a glob, see globish.hh (also, utf-8) ATOMIC(cert_name); // symbol-of-your-choosing ATOMIC_NOVERIFY(cert_value); // symbol-of-your-choosing