# # patch "ChangeLog" # from [f873d1f0202e03df2753ba54508c28d303f3f23d] # to [1e1c391bdd1f85ec83bc7bf3cacc4680ba6ddbdd] # # patch "transforms.cc" # from [92b2495dcc4067794d6b3cd58c0f30f865a08a28] # to [61593e946306f8bd5926c02fca2ea683e510d271] # --- ChangeLog +++ ChangeLog @@ -1,3 +1,11 @@ +2005-04-17 Richard Levitte + + * transforms.cc (glob_to_regexp): New function that takes a glob + expression and transforms it into a regexp. This will be useful + for globbing branch expressions when collections are exchanged to + branch globs and regexps. + (glob_to_regexp_test): A unit test for glob_to_regexp(). + 2005-04-16 Emile Snyder * tests/t_add_stomp_file.at: New test for failing case. @@ -2913,7 +2921,7 @@ * AUTHORS: Mention Wojciech and Neil. * revision.cc (calculate_ancestors_from_graph): Make non-recursive. -2005-01-17 Wojciech Miłkowski +2005-01-17 Wojciech MiÅ‚kowski * std_hooks.lua: Teach about meld. --- transforms.cc +++ transforms.cc @@ -718,7 +718,145 @@ dst += linesep_str; } +// glob_to_regexp converts a sh file glob to a regexp. The regexp should +// be usable by the Boost regexp library. +// +// Pattern tranformation: +// +// - Any character except those described below are copied as they are. +// - The backslash (\) escapes the following character. The escaping +// backslash is copied to the regexp along with the following character. +// - * is transformed to .* in the regexp. +// - ? is transformed to . in the regexp. +// - { is transformed to ( in the regexp, unless within [ and ]. +// - } is transformed to ) in the regexp, unless within [ and ]. +// - , is transformed to | in the regexp, if within { and } and not +// within [ and ]. +// - ^ is escaped unless it comes directly after an unescaped [. +// - ! is transformed to ^ in the regexp if it comes directly after an +// unescaped [. +// - ] directly following an unescaped [ is escaped. +string glob_to_regexp(const string & glob) +{ + struct bad_glob { + bad_glob() : what("Bad glob syntax") {} + string what; + }; + + int in_braces = 0; // counter for levels if {} + bool in_brackets = false; // flags if we're inside a [], which + // has higher precedence than {}. + // Also, [ is accepted inside [] unescaped. + bool this_was_opening_bracket = false; + string tmp; + + tmp.reserve(glob.size() * 2); + #ifdef BUILD_UNIT_TESTS + cerr << "DEBUG[glob_to_regexp]: input = \"" << glob << "\"" << endl; +#endif + + for (string::const_iterator i = glob.begin(); i != glob.end(); ++i) + { + char c = *i; + bool last_was_opening_bracket = this_was_opening_bracket; + this_was_opening_bracket = false; + + // Special case ^ and ! at the beginning of a [] expression. + if (in_brackets && last_was_opening_bracket + && (c == '!' || c == '^')) + { + tmp += '^'; + if (++i == glob.end()) + break; + c = *i; + } + + if (c == '\\') + { + tmp += c; + if (++i == glob.end()) + break; + tmp += *i; + } + else if (in_brackets) + { + switch(c) + { + case ']': + if (!last_was_opening_bracket) + { + in_brackets = false; + tmp += c; + break; + } + // Trickling through to the standard character conversion, + // because ] as the first character of a set is regarded as + // a normal character. + default: + if (!(isalnum(c) || c == '_')) + { + tmp += '\\'; + } + tmp += c; + break; + } + } + else + { + switch(c) + { + case '*': + tmp += ".*"; + break; + case '?': + tmp += '.'; + break; + case '{': + in_braces++; + tmp += '('; + break; + case '}': + if (in_braces == 0) + throw bad_glob(); + tmp += ')'; + in_braces--; + break; + case '[': + in_brackets = true; + this_was_opening_bracket = true; + tmp += c; + break; + case ',': + if (in_braces > 0) + { + tmp += '|'; + break; + } + // Trickling through to default: here, since a comma outside of + // brace notation is just a normal character. + default: + if (!(isalnum(c) || c == '_')) + { + tmp += '\\'; + } + tmp += c; + break; + } + } + } + + if (in_braces != 0 || in_brackets) + throw bad_glob(); + +#ifdef BUILD_UNIT_TESTS + cerr << "DEBUG[glob_to_regexp]: output = \"" << tmp << "\"" << endl; +#endif + + return tmp; +} + +#ifdef BUILD_UNIT_TESTS #include "unit_tests.hh" static void @@ -1004,6 +1142,15 @@ check_idna_encoding(); } +static void glob_to_regexp_test() +{ + BOOST_CHECK(glob_to_regexp("abc,v") == "abc\\,v"); + BOOST_CHECK(glob_to_regexp("foo[12m,]") == "foo[12m\\,]"); + // A full fledged, use all damn features test... + BOOST_CHECK(glob_to_regexp("foo.{bar*,cookie?{haha,hehe[^\\123!,]}}[!]a^b]") + == "foo\\.(bar.*|cookie.(haha|hehe[^\\123\\!\\,]))[^\\]a\\^b]"); +} + void add_transform_tests(test_suite * suite) { @@ -1015,6 +1162,7 @@ suite->add(BOOST_TEST_CASE(&join_lines_test)); suite->add(BOOST_TEST_CASE(&strip_ws_test)); suite->add(BOOST_TEST_CASE(&encode_test)); + suite->add(BOOST_TEST_CASE(&glob_to_regexp_test)); } #endif // BUILD_UNIT_TESTS