# from [cd917e32e1047fb44b15595b16506452592d8865]
# to [09579b285b88ef6c3c5d248d7bb73a0a103850f9]
#
# patch "globish.cc"
# from [1a7b2b42320d0c8d8fde56b5af871bc125d17a5a]
# to [23f857decc878f3aa3b87fb9b45eacb69180d6f8]
---
+++
@@ -0,0 +1,659 @@
+// Copyright (C) 2002 Graydon Hoare
+// 2007 Julio M. Merino Vidal
+//
+// This program is made available under the GNU GPL version 2.0 or
+// greater. See the accompanying file COPYING for details.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the
+// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+// PURPOSE.
+
+#include "base.hh"
+#include "cmd.hh"
+
+#include "lua.hh"
+#include "app_state.hh"
+#include "globish.hh"
+#include "options_applicator.hh"
+#include "work.hh"
+#include "ui.hh"
+#include "mt_version.hh"
+#include "charset.hh"
+#include "simplestring_xform.hh"
+#include "vocab_cast.hh"
+
+#ifndef _WIN32
+#include
+#endif
+
+using std::string;
+using std::vector;
+using std::ostream;
+using std::make_pair;
+using std::set;
+
+//
+// Definition of top-level commands, used to classify the real commands
+// in logical groups.
+//
+// These top level commands, while part of the final identifiers and defined
+// as regular command groups, are handled separately. The user should not
+// see them except through the help command.
+//
+// XXX This is to easily maintain compatibilty with older versions. But
+// maybe this should be revised, because exposing the top level category
+// (being optional, of course), may not be a bad idea.
+//
+
+CMD_GROUP(__root__, "__root__", "", NULL, "", "");
+
+CMD_GROUP_NO_COMPLETE(automation, "automation", "", CMD_REF(__root__),
+ N_("Commands that aid in scripted execution"),
+ "");
+CMD_GROUP(database, "database", "", CMD_REF(__root__),
+ N_("Commands that manipulate the database"),
+ "");
+CMD_GROUP(debug, "debug", "", CMD_REF(__root__),
+ N_("Commands that aid in program debugging"),
+ "");
+CMD_GROUP(informative, "informative", "", CMD_REF(__root__),
+ N_("Commands for information retrieval"),
+ "");
+CMD_GROUP(key_and_cert, "key_and_cert", "", CMD_REF(__root__),
+ N_("Commands to manage keys and certificates"),
+ "");
+CMD_GROUP(network, "network", "", CMD_REF(__root__),
+ N_("Commands that access the network"),
+ "");
+CMD_GROUP(packet_io, "packet_io", "", CMD_REF(__root__),
+ N_("Commands for packet reading and writing"),
+ "");
+CMD_GROUP(vcs, "vcs", "", CMD_REF(__root__),
+ N_("Commands for interaction with other version control systems"),
+ "");
+CMD_GROUP(review, "review", "", CMD_REF(__root__),
+ N_("Commands to review revisions"),
+ "");
+CMD_GROUP(tree, "tree", "", CMD_REF(__root__),
+ N_("Commands to manipulate the tree"),
+ "");
+CMD_GROUP(variables, "variables", "", CMD_REF(__root__),
+ N_("Commands to manage persistent variables"),
+ "");
+CMD_GROUP(workspace, "workspace", "", CMD_REF(__root__),
+ N_("Commands that deal with the workspace"),
+ "");
+CMD_GROUP(user, "user", "", CMD_REF(__root__),
+ N_("Commands defined by the user"),
+ "");
+
+namespace commands {
+
+ void remove_command_name_from_args(command_id const & ident,
+ args_vector & args,
+ size_t invisible_length)
+ {
+ MM(ident);
+ MM(args);
+ MM(invisible_length);
+ I(ident.empty() || args.size() >= ident.size() - invisible_length);
+ for (args_vector::size_type i = invisible_length; i < ident.size(); i++)
+ {
+ I(ident[i]().find(args[0]()) == 0);
+ args.erase(args.begin());
+ }
+ }
+
+ void reapply_options(app_state & app,
+ command const * cmd,
+ command_id const & cmd_ident,
+ command const * subcmd,
+ command_id const & subcmd_full_ident,
+ size_t subcmd_invisible_length,
+ args_vector const & subcmd_cmdline,
+ vector > const * const separate_params)
+ {
+ I(cmd);
+ options::opts::all_options().instantiate(&app.opts).reset();
+
+ cmd->preset_options(app.opts);
+
+ option::concrete_option_set optset
+ = (options::opts::globals() | cmd->opts())
+ .instantiate(&app.opts);
+
+ optset.from_command_line(app.reset_info.default_args);
+
+ if (subcmd)
+ {
+ args_vector subcmd_defaults;
+ app.lua.hook_get_default_command_options(subcmd_full_ident,
+ subcmd_defaults);
+ (options::opts::globals() | subcmd->opts())
+ .instantiate(&app.opts)
+ .from_command_line(subcmd_defaults);
+ }
+
+ // at this point we process the data from _MTN/options if
+ // the command needs it.
+ if ((subcmd ? subcmd : cmd)->use_workspace_options())
+ {
+ workspace::check_format();
+ workspace::get_options(app.opts);
+ }
+
+ optset.from_command_line(app.reset_info.cmdline_args);
+
+ if (subcmd)
+ {
+ app.opts.args.clear();
+ option::concrete_option_set subcmd_optset
+ = (options::opts::globals() | subcmd->opts())
+ .instantiate(&app.opts);
+ if (!separate_params)
+ {
+ /* the first argument here is only ever modified if the second is 'true' */
+ subcmd_optset.from_command_line(const_cast(subcmd_cmdline));
+ }
+ else
+ {
+ subcmd_optset.from_key_value_pairs(*separate_params);
+ app.opts.args = subcmd_cmdline;
+ }
+ remove_command_name_from_args(subcmd_full_ident, app.opts.args,
+ subcmd_invisible_length);
+ }
+ else
+ {
+ remove_command_name_from_args(cmd_ident, app.opts.args);
+ }
+ }
+
+ // monotone.cc calls this function after option processing.
+ void process(app_state & app, command_id const & ident,
+ args_vector const & args)
+ {
+ static bool process_called(false);
+ I(!process_called);
+ process_called = true;
+
+ command const * cmd = CMD_REF(__root__)->find_command(ident);
+ app.reset_info.cmd = cmd;
+
+ string visibleid = join_words(vector< utf8 >(ident.begin() + 1,
+ ident.end()))();
+
+ I(cmd->is_leaf() || cmd->is_group());
+ E(!(cmd->is_group() && cmd->parent() == CMD_REF(__root__)),
+ origin::user,
+ F("command '%s' is invalid; it is a group") % join_words(ident));
+
+ if (!cmd->is_leaf())
+ {
+ // args used in the command name have not been stripped yet
+ remove_command_name_from_args(ident, app.opts.args);
+
+ E(!args.empty(), origin::user,
+ F("no subcommand specified for '%s'") % visibleid);
+
+ E(false, origin::user,
+ F("could not match '%s' to a subcommand of '%s'") %
+ join_words(args) % visibleid);
+ }
+
+ L(FL("executing command '%s'") % visibleid);
+
+ reapply_options(app, cmd, ident);
+
+ // intentional leak
+ // we don't want the options to be reset, so don't destruct this
+ new options_applicator(app.opts, options_applicator::for_primary_cmd);
+
+ cmd->exec(app, ident, args);
+ }
+
+ // Prints the abstract description of the given command or command group
+ // properly indented. The tag starts at column two. The description has
+ // to start, at the very least, two spaces after the tag's end position;
+ // this is given by the colabstract parameter.
+ static void describe(const string & tag, const string & abstract,
+ const string & subcommands, size_t colabstract,
+ ostream & out)
+ {
+ I(colabstract > 0);
+
+ size_t col = 0;
+ out << " " << tag << " ";
+ col += display_width(utf8(tag + " ", origin::internal));
+
+ out << string(colabstract - col, ' ');
+ col = colabstract;
+ string desc(abstract);
+ if (!subcommands.empty())
+ {
+ desc += " (" + subcommands + ')';
+ }
+ out << format_text(desc, colabstract, col) << '\n';
+ }
+
+ class cmd_ptr_compare
+ {
+ public:
+ bool operator()(command const * const a, command const * const b) const
+ {
+ return a->primary_name()() < b->primary_name()();
+ }
+ };
+
+ static void explain_children(command::children_set const & children,
+ bool show_hidden_commands,
+ ostream & out)
+ {
+ I(!children.empty());
+
+ vector< command const * > sorted;
+
+ size_t colabstract = 0;
+ for (command::children_set::const_iterator i = children.begin();
+ i != children.end(); i++)
+ {
+ command const * child = *i;
+
+ if (child->hidden() && !show_hidden_commands)
+ continue;
+
+ size_t len = display_width(join_words(child->names(), ", ")) +
+ display_width(utf8(" "));
+ if (colabstract < len)
+ colabstract = len;
+
+ sorted.push_back(child);
+ }
+
+ sort(sorted.begin(), sorted.end(), cmd_ptr_compare());
+
+ for (vector< command const * >::const_iterator i = sorted.begin();
+ i != sorted.end(); i++)
+ {
+ command const * child = *i;
+ describe(join_words(child->names(), ", ")(), child->abstract(),
+ join_words(child->subcommands(show_hidden_commands), ", ")(),
+ colabstract, out);
+ }
+ }
+
+ static command const *
+ find_command(command_id const & ident)
+ {
+ command const * cmd = CMD_REF(__root__)->find_command(ident);
+
+ // This function is only used internally with an identifier returned
+ // by complete_command. Therefore, it must always exist.
+ I(cmd != NULL);
+
+ return cmd;
+ }
+
+ static void explain_cmd_usage(command_id const & ident,
+ bool show_hidden_commands,
+ ostream & out)
+ {
+ I(ident.size() >= 1);
+
+ vector< string > lines;
+ command const * cmd = find_command(ident);
+
+ string visibleid = join_words(vector< utf8 >(ident.begin() + 1,
+ ident.end()))();
+
+ // Print command parameters.
+ string params = cmd->params();
+ split_into_lines(params, lines);
+
+ if (visibleid.empty())
+ out << format_text(F("Commands in group '%s':") %
+ join_words(ident)())
+ << "\n\n";
+ else
+ {
+ if (!cmd->children().empty())
+ out << format_text(F("Subcommands of '%s %s':") %
+ prog_name % visibleid)
+ << "\n\n";
+ else if (!lines.empty())
+ out << format_text(F("Syntax specific to '%s %s':") %
+ prog_name % visibleid)
+ << "\n\n";
+ }
+
+ // lines might be empty, but only when specific syntax is to be
+ // displayed, not in the other cases.
+ if (!lines.empty())
+ {
+ for (vector::const_iterator j = lines.begin();
+ j != lines.end(); ++j)
+ out << " " << visibleid << ' ' << *j << '\n';
+ out << '\n';
+ }
+
+ // Explain children, if any.
+ if (!cmd->is_leaf())
+ {
+ explain_children(cmd->children(), show_hidden_commands, out);
+ out << '\n';
+ }
+
+ // Print command description.
+ if (visibleid.empty())
+ out << format_text(F("Purpose of group '%s':") %
+ join_words(ident)())
+ << "\n\n";
+ else
+ out << format_text(F("Description for '%s %s':") %
+ prog_name % visibleid)
+ << "\n\n";
+ out << format_text(cmd->desc(), 2) << "\n\n";
+
+ // Print all available aliases.
+ if (cmd->names().size() > 1)
+ {
+ command::names_set othernames = cmd->names();
+ othernames.erase(ident[ident.size() - 1]);
+ out << format_text(F("Aliases: %s.") %
+ join_words(othernames, ", ")(), 2)
+ << '\n';
+ }
+ }
+
+ void explain_usage(command_id const & ident,
+ bool show_hidden_commands,
+ ostream & out)
+ {
+ command const * cmd = find_command(ident);
+
+ if (ident.empty())
+ {
+ out << format_text(F("Command groups:")) << "\n\n";
+ explain_children(CMD_REF(__root__)->children(),
+ show_hidden_commands,
+ out);
+ out << '\n'
+ << format_text(F("For information on a specific command, type "
+ "'mtn help [subcommand_name ...]'."))
+ << "\n\n"
+ << format_text(F("To see more details about the commands of a "
+ "particular group, type 'mtn help '."))
+ << "\n\n"
+ << format_text(F("Note that you can always abbreviate a command "
+ "name as long as it does not conflict with other "
+ "names."))
+ << "\n";
+ }
+ else
+ explain_cmd_usage(ident, show_hidden_commands, out);
+ }
+
+ options::options_type command_options(command_id const & ident)
+ {
+ command const * cmd = find_command(ident);
+ return cmd->opts();
+ }
+
+ // Lua-defined user commands.
+ class cmd_lua : public command
+ {
+ lua_State *st;
+ std::string const f_name;
+ public:
+ cmd_lua(std::string const & primary_name,
+ std::string const & params,
+ std::string const & abstract,
+ std::string const & desc,
+ lua_State *L_st,
+ std::string const & func_name) :
+ command(primary_name, "", CMD_REF(user), false, false, params,
+ abstract, desc, true,
+ options::options_type() | options::opts::none, true),
+ st(L_st), f_name(func_name)
+ {
+ // because user commands are inserted after the normal
+ // initialisation process
+ CMD_REF(user)->children().insert(this);
+ }
+
+ void exec(app_state & app, command_id const & execid,
+ args_vector const & args) const
+ {
+ I(st);
+ I(app.lua.check_lua_state(st));
+
+ app_state* app_p = get_app_state(st);
+ I(app_p == & app);
+
+ Lua ll(st);
+ ll.func(f_name);
+
+ for (args_vector::const_iterator it = args.begin();
+ it != args.end(); ++it)
+ ll.push_str((*it)());
+
+ app.mtn_automate_allowed = true;
+
+ ll.call(args.size(),0);
+
+ app.mtn_automate_allowed = false;
+
+ E(ll.ok(), origin::user,
+ F("Call to user command %s (lua command: %s) failed.")
+ % primary_name() % f_name);
+ }
+ };
+}
+
+LUAEXT(alias_command, )
+{
+ const char *old_cmd = luaL_checkstring(LS, -2);
+ const char *new_cmd = luaL_checkstring(LS, -1);
+ E(old_cmd && new_cmd, origin::user,
+ F("%s called with an invalid parameter") % "alias_command");
+
+ args_vector args;
+ args.push_back(arg_type(old_cmd, origin::user));
+ commands::command_id id = commands::complete_command(args);
+ commands::command *old_cmd_p = CMD_REF(__root__)->find_command(id);
+
+ old_cmd_p->add_alias(utf8(new_cmd));
+
+ lua_pushboolean(LS, true);
+ return 1;
+}
+
+
+LUAEXT(register_command, )
+{
+ const char *cmd_name = luaL_checkstring(LS, -5);
+ const char *cmd_params = luaL_checkstring(LS, -4);
+ const char *cmd_abstract = luaL_checkstring(LS, -3);
+ const char *cmd_desc = luaL_checkstring(LS, -2);
+ const char *cmd_func = luaL_checkstring(LS, -1);
+
+ E(cmd_name && cmd_params && cmd_abstract && cmd_desc && cmd_func,
+ origin::user,
+ F("%s called with an invalid parameter") % "register_command");
+
+ // leak this - commands can't be removed anyway
+ new commands::cmd_lua(cmd_name, cmd_params, cmd_abstract, cmd_desc,
+ LS, cmd_func);
+
+ lua_pushboolean(LS, true);
+ return 1;
+}
+
+// Miscellaneous commands and related functions for which there is no
+// better file.
+
+CMD_NO_WORKSPACE(help, "help", "", CMD_REF(informative),
+ N_("command [ARGS...]"),
+ N_("Displays help about commands and options"),
+ "",
+ options::opts::show_hidden_commands)
+{
+ if (args.size() < 1)
+ {
+ app.opts.help = true;
+ throw usage(command_id());
+ }
+
+ command_id id = commands::complete_command(args);
+ app.opts.help = true;
+ throw usage(id);
+}
+
+CMD_NO_WORKSPACE(version, "version", "", CMD_REF(informative), "",
+ N_("Shows the program version"),
+ "",
+ options::opts::full)
+{
+ E(args.empty(), origin::user,
+ F("no arguments allowed"));
+
+ if (app.opts.full)
+ print_full_version();
+ else
+ print_version();
+}
+
+CMD_HIDDEN(check_globish, "check_globish", "", CMD_REF(debug),
+ "globish string",
+ N_("Check that a particular globish matches a particular string"),
+ "",
+ options::opts::none)
+{
+ globish g = typecast_vocab(idx(args,0));
+ string s(idx(args,1)());
+
+ E(g.matches(s), origin::user,
+ F("Globish <%s> does not match string <%s>") % g % s);
+}
+
+CMD_HIDDEN(crash, "crash", "", CMD_REF(debug),
+ "{ N | E | I | double-throw | exception | signal }",
+ N_("Triggers the specified kind of crash"),
+ "",
+ options::opts::none)
+{
+ if (args.size() != 1)
+ throw usage(execid);
+ bool spoon_exists(false);
+ if (idx(args,0)() == "N")
+ E(spoon_exists, origin::user, i18n_format("There is no spoon."));
+ else if (idx(args,0)() == "E")
+ E(spoon_exists, origin::system, i18n_format("There is no spoon."));
+ else if (idx(args,0)() == "I")
+ {
+ I(spoon_exists);
+ }
+ else if (idx(args,0)() == "double-throw")
+ {
+ // This code is rather picky, for example I(false) in the destructor
+ // won't always work like it should; see http://bugs.debian.org/516862
+ class throwing_dtor
+ {
+ public:
+ throwing_dtor() {}
+ ~throwing_dtor()
+ {
+ throw std::exception();
+ }
+ };
+ throwing_dtor td;
+ throw std::exception();
+ }
+#define maybe_throw(ex) if(idx(args,0)()==#ex) throw ex("There is no spoon.")
+#define maybe_throw_bare(ex) if(idx(args,0)()==#ex) throw ex()
+ else maybe_throw_bare(std::bad_alloc);
+ else maybe_throw_bare(std::bad_cast);
+ else maybe_throw_bare(std::bad_typeid);
+ else maybe_throw_bare(std::bad_exception);
+ else maybe_throw_bare(std::exception);
+ else maybe_throw(std::domain_error);
+ else maybe_throw(std::invalid_argument);
+ else maybe_throw(std::length_error);
+ else maybe_throw(std::out_of_range);
+ else maybe_throw(std::range_error);
+ else maybe_throw(std::overflow_error);
+ else maybe_throw(std::underflow_error);
+ else maybe_throw(std::logic_error);
+ else maybe_throw(std::runtime_error);
+ else
+ {
+#ifndef _WIN32
+ try
+ {
+ int signo = boost::lexical_cast(idx(args,0)());
+ if (0 < signo && signo <= 15)
+ {
+ raise(signo);
+ // control should not get here...
+ I(!"crash: raise returned");
+ }
+ }
+ catch (boost::bad_lexical_cast&)
+ { // fall through and throw usage
+ }
+#endif
+ throw usage(execid);
+ }
+#undef maybe_throw
+#undef maybe_throw_bare
+}
+
+// There isn't really a better place for this function.
+
+void
+process_commit_message_args(options const & opts,
+ bool & given,
+ utf8 & log_message,
+ utf8 const & message_prefix)
+{
+ // can't have both a --message and a --message-file ...
+ E(!opts.message_given || !opts.msgfile_given, origin::user,
+ F("--message and --message-file are mutually exclusive"));
+
+ if (opts.message_given)
+ {
+ string msg;
+ join_lines(opts.message, msg);
+ log_message = utf8(msg, origin::user);
+ if (!opts.no_prefix && message_prefix().length() != 0)
+ log_message = utf8(message_prefix() + "\n\n" + log_message(),
+ origin::user);
+ given = true;
+ }
+ else if (opts.msgfile_given)
+ {
+ data dat;
+ read_data_for_command_line(opts.msgfile, dat);
+ external dat2 = typecast_vocab(dat);
+ system_to_utf8(dat2, log_message);
+ if (!opts.no_prefix && message_prefix().length() != 0)
+ log_message = utf8(message_prefix() + "\n\n" + log_message(),
+ origin::user);
+ given = true;
+ }
+ else if (message_prefix().length() != 0)
+ {
+ log_message = message_prefix;
+ given = true;
+ }
+ else
+ given = false;
+}
+
+// Local Variables:
+// mode: C++
+// fill-column: 76
+// c-file-style: "gnu"
+// indent-tabs-mode: nil
+// End:
+// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
--- globish.cc
+++ globish.cc
@@ -0,0 +1,558 @@
+// Copyright (C) 2005 Nathaniel Smith
+// 2007 Zack Weinberg
+//
+// This program is made available under the GNU GPL version 2.0 or
+// greater. See the accompanying file COPYING for details.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the
+// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+// PURPOSE.
+
+#include "base.hh"
+#include "sanity.hh"
+#include "globish.hh"
+#include "option.hh" // for arg_type
+#include "numeric_vocab.hh"
+
+#include
+#include
+
+using std::string;
+using std::vector;
+using std::back_inserter;
+using std::back_insert_iterator;
+
+// The algorithm here is originally from pdksh 5. That implementation uses
+// the high bit of unsigned chars as a quotation flag. We can't do that,
+// because we need to be utf8 clean. Instead, we copy the string and
+// replace "live" metacharacters with single bytes from the
+// control-character range. This is why bytes <= 0x1f are not allowed in the
+// pattern.
+
+enum metachar {
+ META_STAR = 1, // *
+ META_QUES, // ?
+ META_CC_BRA, // [
+ META_CC_INV_BRA, // [^ or [!
+ META_CC_KET, // ] (matches either of the above two)
+ META_ALT_BRA, // {
+ META_ALT_OR, // , (when found inside unquoted { ... })
+ META_ALT_KET, // }
+};
+
+// Compile a character class.
+
+static string::const_iterator
+compile_charclass(string const & pat, string::const_iterator p,
+ back_insert_iterator & to,
+ origin::type made_from)
+{
+ string in_class;
+ char bra = (char)META_CC_BRA;
+
+ p++;
+ E(p != pat.end(), made_from,
+ F("invalid pattern '%s': unmatched '['") % pat);
+
+ if (*p == '!' || *p == '^')
+ {
+ bra = (char)META_CC_INV_BRA;
+ p++;
+ E(p != pat.end(), made_from,
+ F("invalid pattern '%s': unmatched '['") % pat);
+ }
+
+ while (p != pat.end() && *p != ']')
+ {
+ if (*p == '\\')
+ {
+ p++;
+ if (p == pat.end())
+ break;
+ }
+ // A dash at the beginning or end of the pattern is literal.
+ else if (*p == '-'
+ && !in_class.empty()
+ && p+1 != pat.end()
+ && p[1] != ']')
+ {
+ p++;
+ if (*p == '\\')
+ p++;
+ if (p == pat.end())
+ break;
+
+ // the cast is needed because boost::format will not obey the %x
+ // if given a 'char'.
+ E((widen(*p)) >= ' ', made_from,
+ F("invalid pattern '%s': control character 0x%02x is not allowed")
+ % pat % (widen(*p)));
+
+ unsigned int start = widen(in_class.end()[-1]);
+ unsigned int stop = widen(*p);
+
+ E(start != stop, made_from,
+ F("invalid pattern '%s': "
+ "one-element character ranges are not allowed") % pat);
+ E(start < stop, made_from,
+ F("invalid pattern '%s': "
+ "endpoints of a character range must be in "
+ "ascending numeric order") % pat);
+ E(start < 0x80 && stop < 0x80, made_from,
+ F("invalid pattern '%s': cannot use non-ASCII characters "
+ "in classes") % pat);
+
+ L(FL("expanding range from %X (%c) to %X (%c)")
+ % (start+1) % (char)(start+1) % stop % (char)stop);
+
+ for (unsigned int r = start + 1; r < stop; r++)
+ in_class.push_back((char)r);
+ }
+ else
+ E(*p != '[', made_from,
+ F("syntax error in '%s': "
+ "character classes may not be nested") % pat);
+
+ E((widen(*p)) >= ' ', made_from,
+ F("invalid pattern '%s': control character 0x%02x is not allowed")
+ % pat % (widen(*p)));
+
+ E((widen(*p)) < 0x80, made_from,
+ F("invalid pattern '%s': cannot use non-ASCII characters in classes")
+ % pat);
+
+ in_class.push_back(*p);
+ p++;
+ }
+
+ E(p != pat.end(), made_from,
+ F("invalid pattern '%s': unmatched '['") % pat);
+
+ E(!in_class.empty(), made_from,
+ F("invalid pattern '%s': empty character class") % pat);
+
+ // minor optimization: one-element non-inverted character class becomes
+ // the character.
+ if (bra == (char)META_CC_BRA && in_class.size() == 1)
+ *to++ = in_class[0];
+ else
+ {
+ *to++ = bra;
+ std::sort(in_class.begin(), in_class.end());
+ std::copy(in_class.begin(), in_class.end(), to);
+ *to++ = (char)META_CC_KET;
+ }
+ return p;
+}
+
+// Compile one fragment of a glob pattern.
+
+static void
+compile_frag(string const & pat, back_insert_iterator & to,
+ origin::type made_from)
+{
+ unsigned int brace_depth = 0;
+
+ for (string::const_iterator p = pat.begin(); p != pat.end(); p++)
+ switch (*p)
+ {
+ default:
+ E((widen(*p)) >= ' ', made_from,
+ F("invalid pattern '%s': control character 0x%02x is not allowed")
+ % pat % (widen(*p)));
+
+ *to++ = *p;
+ break;
+
+ case '*':
+ // optimization: * followed by any sequence of ?s and *s is
+ // equivalent to the number of ?s that appeared in the sequence,
+ // followed by a single star. the latter can be matched without
+ // nearly as much backtracking.
+
+ for (p++; p != pat.end(); p++)
+ {
+ if (*p == '?')
+ *to++ = META_QUES;
+ else if (*p != '*')
+ break;
+ }
+
+ p--;
+ *to++ = META_STAR;
+ break;
+
+ case '?':
+ *to++ = META_QUES;
+ break;
+
+ case '\\':
+ p++;
+ E(p != pat.end(), made_from,
+ F("invalid pattern '%s': un-escaped \\ at end") % pat);
+
+ E((widen(*p)) >= ' ', made_from,
+ F("invalid pattern '%s': control character 0x%02x is not allowed")
+ % pat % (widen(*p)));
+
+ *to++ = *p;
+ break;
+
+ case '[':
+ p = compile_charclass(pat, p, to, made_from);
+ break;
+
+ case ']':
+ E(false, made_from, F("invalid pattern '%s': unmatched ']'") % pat);
+
+ case '{':
+ // There's quite a bit of optimization we could be doing on
+ // alternatives, but it's hairy, especially if you get into
+ // nested alternatives; so we're not doing any of it now.
+ // (Look at emacs's regexp-opt.el for inspiration.)
+ brace_depth++;
+ E(brace_depth < 6, made_from,
+ F("invalid pattern '%s': braces nested too deeply") % pat);
+ *to++ = META_ALT_BRA;
+ break;
+
+ case ',':
+ if (brace_depth > 0)
+ *to++ = META_ALT_OR;
+ else
+ *to++ = ',';
+ break;
+
+ case '}':
+ E(brace_depth > 0, made_from,
+ F("invalid pattern '%s': unmatched '}'") % pat);
+ brace_depth--;
+ *to++ = META_ALT_KET;
+ break;
+ }
+
+ E(brace_depth == 0, made_from,
+ F("invalid pattern '%s': unmatched '{'") % pat);
+}
+
+// common code used by the constructors.
+
+static inline string
+compile(string const & pat, origin::type made_from)
+{
+ string s;
+ back_insert_iterator to = back_inserter(s);
+ compile_frag(pat, to, made_from);
+ return s;
+}
+
+static inline string
+compile(vector::const_iterator const & beg,
+ vector::const_iterator const & end)
+{
+ if (end - beg == 0)
+ return "";
+ if (end - beg == 1)
+ return compile((*beg)(), origin::user);
+
+ string s;
+ back_insert_iterator to = back_inserter(s);
+
+ *to++ = META_ALT_BRA;
+ vector::const_iterator i = beg;
+ for (;;)
+ {
+ compile_frag((*i)(), to, origin::user);
+ i++;
+ if (i == end)
+ break;
+ *to++ = META_ALT_OR;
+ }
+ *to++ = META_ALT_KET;
+ return s;
+}
+
+globish::globish(string const & p, origin::type made_from)
+ : origin_aware(made_from),
+ compiled_pattern(compile(p, made_from)) {}
+globish::globish(char const * p, origin::type made_from)
+ : origin_aware(made_from),
+ compiled_pattern(compile(p, made_from)) {}
+
+globish::globish(vector const & p)
+ : origin_aware(origin::user),
+ compiled_pattern(compile(p.begin(), p.end())) {}
+globish::globish(vector::const_iterator const & beg,
+ vector::const_iterator const & end)
+ : origin_aware(origin::user),
+ compiled_pattern(compile(beg, end)) {}
+
+// Debugging.
+
+static string
+decode(string::const_iterator p, string::const_iterator end)
+{
+ string s;
+ for (; p != end; p++)
+ switch (*p)
+ {
+ case META_STAR: s.push_back('*'); break;
+ case META_QUES: s.push_back('?'); break;
+ case META_CC_BRA: s.push_back('['); break;
+ case META_CC_KET: s.push_back(']'); break;
+ case META_CC_INV_BRA: s.push_back('[');
+ s.push_back('!'); break;
+
+ case META_ALT_BRA: s.push_back('{'); break;
+ case META_ALT_KET: s.push_back('}'); break;
+ case META_ALT_OR: s.push_back(','); break;
+
+ // Some of these are only special in certain contexts,
+ // but it does no harm to escape them always.
+ case '[': case ']': case '-': case '!': case '^':
+ case '{': case '}': case ',':
+ case '*': case '?': case '\\':
+ s.push_back('\\');
+ // fall through
+ default:
+ s.push_back(*p);
+ }
+ return s;
+}
+
+string
+globish::operator()() const
+{
+ return decode(compiled_pattern.begin(), compiled_pattern.end());
+}
+
+bool
+globish::contains_meta_chars() const
+{
+ string::const_iterator p = compiled_pattern.begin();
+ for (; p != compiled_pattern.end(); p++)
+ switch (*p)
+ {
+ case META_STAR:
+ case META_QUES:
+ case META_CC_BRA:
+ case META_CC_KET:
+ case META_CC_INV_BRA:
+ case META_ALT_BRA:
+ case META_ALT_KET:
+ case META_ALT_OR:
+ return true;
+ }
+ return false;
+}
+
+template <> void dump(globish const & g, string & s)
+{
+ s = g();
+}
+
+std::ostream & operator<<(std::ostream & o, globish const & g)
+{
+ return o << g();
+}
+
+// Matching.
+
+static string::const_iterator
+find_next_subpattern(string::const_iterator p,
+ string::const_iterator pe,
+ bool want_alternatives)
+{
+ L(FL("Finding subpattern in '%s'") % decode(p, pe));
+ unsigned int depth = 1;
+ for (; p != pe; p++)
+ switch (*p)
+ {
+ default:
+ break;
+
+ case META_ALT_BRA:
+ depth++;
+ break;
+
+ case META_ALT_KET:
+ depth--;
+ if (depth == 0)
+ return p+1;
+ break;
+
+ case META_ALT_OR:
+ if (depth == 1 && want_alternatives)
+ return p+1;
+ break;
+ }
+
+ I(false);
+}
+
+
+static bool
+do_match(string::const_iterator sb, string::const_iterator se,
+ string::const_iterator p, string::const_iterator pe)
+{
+ unsigned int sc, pc;
+ string::const_iterator s(sb);
+
+ L(FL("subpattern: '%s' against '%s'") % string(s,se) % decode(p,pe));
+
+ while (p < pe)
+ {
+ // pc will be the current pattern character
+ // p will point after pc
+ pc = widen(*p++);
+ // sc will be the current string character
+ // s will point to sc
+ if(s < se) {
+ sc = widen(*s);
+ } else {
+ sc = 0;
+ }
+ switch (pc)
+ {
+ default: // literal
+ if (sc != pc)
+ return false;
+ break;
+
+ case META_QUES: // any single character
+ if (sc == 0)
+ return false;
+ break;
+
+ case META_CC_BRA: // any of these characters
+ {
+ bool matched = false;
+ I(p < pe);
+ I(*p != META_CC_KET);
+ do
+ {
+ if (widen(*p) == sc)
+ matched = true;
+ p++;
+ I(p < pe);
+ }
+ while (*p != META_CC_KET);
+ if (!matched)
+ return false;
+ }
+ p++;
+ break;
+
+ case META_CC_INV_BRA: // any but these characters
+ I(p < pe);
+ I(*p != META_CC_KET);
+ do
+ {
+ if (widen(*p) == sc)
+ return false;
+ p++;
+ I(p < pe);
+ }
+ while (*p != META_CC_KET);
+ p++;
+ break;
+
+ case META_STAR: // zero or more arbitrary characters
+ if (p == pe)
+ return true; // star at end always matches, if we get that far
+
+ pc = widen(*p);
+ // If the next character in p is not magic, we can only match
+ // starting from places in s where that character appears.
+ if (pc >= ' ')
+ {
+ L(FL("after *: looking for '%c' in '%s'")
+ % (char)pc % string(s, se));
+ p++;
+ for (;;)
+ {
+ ++s;
+ if (sc == pc && do_match(s, se, p, pe))
+ return true;
+ if (s >= se)
+ break;
+ sc = widen(*s);
+ }
+ }
+ else
+ {
+ L(FL("metacharacter after *: doing it the slow way"));
+ do
+ {
+ if (do_match(s, se, p, pe))
+ return true;
+ s++;
+ }
+ while (s < se);
+ }
+ return false;
+
+ case META_ALT_BRA:
+ {
+ string::const_iterator prest, psub, pnext;
+ string::const_iterator srest;
+
+ prest = find_next_subpattern(p, pe, false);
+ psub = p;
+ // [ psub ... prest ) is the current bracket pair
+ // (including the *closing* braket, but not the opening braket)
+ do
+ {
+ pnext = find_next_subpattern(psub, pe, true);
+ // pnext points just after a comma or the closing braket
+ // [ psub ... pnext ) is one branch with trailing delimiter
+ srest = (prest == pe ? se : s);
+ for (; srest < se; srest++)
+ {
+ if (do_match(s, srest, psub, pnext - 1)
+ && do_match(srest, se, prest, pe))
+ return true;
+ }
+ // try the empty target too
+ if (do_match(s, srest, psub, pnext - 1)
+ && do_match(srest, se, prest, pe))
+ return true;
+
+ psub = pnext;
+ }
+ while (pnext < prest);
+ return false;
+ }
+ }
+ if (s < se)
+ {
+ ++s;
+ }
+ }
+ return s == se;
+}
+
+bool globish::matches(string const & target) const
+{
+ bool result;
+
+ // The empty pattern matches nothing.
+ if (compiled_pattern.empty())
+ result = false;
+ else
+ result = do_match (target.begin(), target.end(),
+ compiled_pattern.begin(), compiled_pattern.end());
+
+ L(FL("matching '%s' against '%s': %s")
+ % target % (*this)() % (result ? "matches" : "does not match"));
+ return result;
+}
+
+
+// Local Variables:
+// mode: C++
+// fill-column: 76
+// c-file-style: "gnu"
+// indent-tabs-mode: nil
+// End:
+// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s: