#
#
# patch "ChangeLog"
# from [49c9aa7b2d19c2e0d8a55800e9f9082df054ec9d]
# to [59ff63b9f2b1f9cfbe8c39483127487fbef17381]
#
# patch "pcrewrap.cc"
# from [cf2674222370e2902e15901902961e4e603c12c0]
# to [ed186faf44253b9bf2d169bb900a31bd75c479f2]
#
# patch "pcrewrap.hh"
# from [206922378c80ba9fd283ccd510af1e7857328ef6]
# to [7cb55a52af7a1085561a5ace6451893fce56d804]
#
============================================================
--- ChangeLog 49c9aa7b2d19c2e0d8a55800e9f9082df054ec9d
+++ ChangeLog 59ff63b9f2b1f9cfbe8c39483127487fbef17381
@@ -1,3 +1,12 @@
+2006-12-27 Zack Weinberg
+
+ * pcrewrap.cc, pcrewrap.hh: Remove redundant includes.
+ Add a couple of "using"s. Improve commentary. Fix formatting.
+ Add editor control comments to bottoms of files, and de-tabify.
+ Refactor construction of regex objects to facilitate precompilation.
+ Always call pcre_study at construction time, and remove separate
+ study() method.
+
2006-12-18 Zack Weinberg
* pcre: New directory, contains trimmed-down version of PCRE library.
============================================================
--- pcrewrap.cc cf2674222370e2902e15901902961e4e603c12c0
+++ pcrewrap.cc ed186faf44253b9bf2d169bb900a31bd75c479f2
@@ -1,15 +1,13 @@
-#include
-#include
-#include
-#include
-
#include "pcrewrap.hh"
#define pcre pcre_t
#include "pcre.h"
#undef pcre
-static unsigned int
+using std::string;
+using std::runtime_error;
+
+inline unsigned int
flags_to_internal(pcre::flags f)
{
using namespace pcre;
@@ -17,7 +15,7 @@ flags_to_internal(pcre::flags f)
unsigned int i = 0;
i |= C(f, NEWLINE_CR);
i |= C(f, NEWLINE_LF);
- // NEWLINE_CRLF is handled above
+ // NEWLINE_CRLF == NEWLINE_CR|NEWLINE_LF and so is handled above
i |= C(f, ANCHORED);
i |= C(f, NOTBOL);
i |= C(f, NOTEOL);
@@ -30,110 +28,117 @@ flags_to_internal(pcre::flags f)
i |= C(f, FIRSTLINE);
i |= C(f, MULTILINE);
i |= C(f, UNGREEDY);
+#undef C
return i;
}
-namespace pcre
+inline std::pair
+compile(const char * pattern, pcre::flags options)
{
- void regex::init(const char *pattern, pcre::flags options)
- {
- int erroff;
- const char *err;
- basedat = static_cast
- (pcre_compile(pattern, flags_to_internal(options), &err, &erroff, 0));
- if (!basedat)
- throw compile_error(err, erroff, pattern);
+ int erroff;
+ const char * err;
+ const pcre_t * basedat = pcre_compile(pattern, flags_to_internal(options),
+ &err, &erroff, 0);
+ if (!basedat)
+ throw pcre::compile_error(err, erroff, pattern);
- int errcode = pcre_fullinfo(static_cast(basedat), 0,
- PCRE_INFO_CAPTURECOUNT,
- static_cast(&capturecount));
- if (errcode < 0)
- throw compile_error((F("pcre_fullinfo error %d") % errcode).str().c_str(),
- 0, pattern);
- }
+ const pcre_extra * extradat = pcre_study(basedat, 0, &err);
+ if (err)
+ throw pcre::study_error(err);
- regex::regex(const char *pattern, pcre::flags options)
- : basedat(0), extradat(0), capturecount(0)
- {
- this->init(pattern, options);
- }
+ return std::make_pair(static_cast(basedat),
+ static_cast(extradat));
+}
- regex::regex(const std::string &pattern, pcre::flags options)
- : basedat(0), extradat(0), capturecount(0)
- {
- this->init(pattern.c_str(), options);
- }
+inline unsigned int
+get_capturecount(const void * bd)
+{
+ unsigned int cc;
+ int err = pcre_fullinfo(static_cast(bd), 0,
+ PCRE_INFO_CAPTURECOUNT,
+ static_cast(&cc));
+ if (err < 0)
+ throw pcre::fullinfo_error(err);
+ return cc;
+}
+namespace pcre
+{
+ regex::regex(const char * pattern, flags options)
+ : basic_regex(compile(pattern, options))
+ {}
+
+ regex::regex(const string & pattern, flags options)
+ : basic_regex(compile(pattern.c_str(), options))
+ {}
+
regex::~regex()
{
if (basedat)
- pcre_free(const_cast(basedat));
+ pcre_free(const_cast(basedat));
if (extradat)
- pcre_free(const_cast(extradat));
+ pcre_free(const_cast(extradat));
}
- void regex::study()
+ bool
+ basic_regex::match(const string & subject, matches & result,
+ string::const_iterator startptr,
+ flags options) const
{
- const char *err;
- extradat = static_cast
- (pcre_study(static_cast(basedat), 0, &err));
- if (err)
- throw study_error(err);
- }
+ // pcre_exec wants its caller to provide three integer slots per
+ // capturing paren, plus three more for the whole-pattern match.
+ // On exit from pcre_exec, the first two-thirds of the vector will be
+ // pairs of integers representing [start, end) offsets within the
+ // string. pcre_exec uses the remaining third of the vector for a
+ // scratchpad. (Why can't it allocate its own damn scratchpad?)
+ unsigned int capturecount = get_capturecount(basedat);
+ std::vector ovec((capturecount + 1) * 3);
- bool
- regex::match(const std::string &subject, matches &result,
- std::string::const_iterator startptr,
- pcre::flags options) const
- {
+ // convert the start pointer to an offset within the string (the &*
+ // converts each iterator to a bare pointer, which can be subtracted --
+ // you should be able to subtract random-access iterators directly,
+ // grumble)
int startoffset = 0;
- if (startptr != std::string::const_iterator(0))
+ if (startptr != string::const_iterator(0))
startoffset = &*startptr - &*subject.data();
-
- // pcre_exec has a bizarro calling convention. It wants ovec to
- // provide three integer slots per capturing paren, plus three
- // more (for the whole-pattern match). The first two-thirds of
- // the vector will contain useful pairs of integers on exit from
- // pcre_exec; the last third will be used as scribble space by
- // pcre_exec. (Why can't it allocate its own damn scribble space?)
- std::vector ovec((capturecount + 1) * 3);
+
int rc = pcre_exec(static_cast(basedat),
- static_cast(extradat),
- subject.data(), subject.size(),
- startoffset,
- flags_to_internal(options),
- &ovec.front(), ovec.size()); // ??? ovec.data()
+ static_cast(extradat),
+ subject.data(), subject.size(),
+ startoffset,
+ flags_to_internal(options),
+ &ovec.front(), ovec.size()); // ??? ovec.data()
if (rc >= 0)
{
- // If the return value is nonnegative, the pattern matched,
- // and rc is one more than the number of pairs of integers in
- // ovec that are meaningful.
- result.clear();
- result.reserve(capturecount + 1);
- for (int i = 0; i < rc * 2; i += 2)
- {
- if (ovec[i] == -1 && ovec[i+1] == -1)
- result.push_back(capture(std::string::const_iterator(0),
- std::string::const_iterator(0)));
- else if (ovec[i] == -1 || ovec[i+1] == -1)
- throw match_error(PCRE_ERROR_INTERNAL); // should never happen
- else
- result.push_back(capture(subject.begin() + ovec[i],
- subject.begin() + ovec[i+1]));
- }
- for (int i = rc; i < capturecount + 1; i++)
- result.push_back(capture(std::string::const_iterator(0),
- std::string::const_iterator(0)));
- I(result.size() == capturecount + 1);
- return true;
+ // If the return value is nonnegative, the pattern matched,
+ // and rc is one more than the number of pairs of integers in
+ // ovec that are meaningful.
+ result.clear();
+ result.reserve(capturecount + 1);
+ for (int i = 0; i < rc * 2; i += 2)
+ {
+ if (ovec[i] == -1 && ovec[i+1] == -1)
+ result.push_back(capture(string::const_iterator(0),
+ string::const_iterator(0)));
+ else if (ovec[i] == -1 || ovec[i+1] == -1)
+ throw match_error(PCRE_ERROR_INTERNAL); // should never happen
+ else
+ result.push_back(capture(subject.begin() + ovec[i],
+ subject.begin() + ovec[i+1]));
+ }
+ for (unsigned int i = rc; i < capturecount + 1; i++)
+ result.push_back(capture(string::const_iterator(0),
+ string::const_iterator(0)));
+ I(result.size() == capturecount + 1);
+ return true;
}
else if (rc == PCRE_ERROR_NOMATCH)
{
- result = matches(capturecount + 1,
- capture(std::string::const_iterator(0),
- std::string::const_iterator(0)));
- I(result.size() == capturecount + 1);
- return false;
+ result = matches(capturecount + 1,
+ capture(string::const_iterator(0),
+ string::const_iterator(0)));
+ I(result.size() == capturecount + 1);
+ return false;
}
else
throw match_error(rc);
@@ -142,18 +147,18 @@ namespace pcre
// This overload is for when you don't care about captures, only
// whether or not it matched.
bool
- regex::match(const std::string &subject,
- std::string::const_iterator startptr,
- pcre::flags options) const
+ basic_regex::match(const string & subject,
+ string::const_iterator startptr,
+ flags options) const
{
int startoffset = 0;
- if (startptr != std::string::const_iterator(0))
+ if (startptr != string::const_iterator(0))
startoffset = &*startptr - &*subject.data();
int rc = pcre_exec(static_cast(basedat),
- static_cast(extradat),
- subject.data(), subject.size(),
- startoffset, flags_to_internal(options), 0, 0);
+ static_cast(extradat),
+ subject.data(), subject.size(),
+ startoffset, flags_to_internal(options), 0, 0);
if (rc == 0)
return true;
else if (rc == PCRE_ERROR_NOMATCH)
@@ -164,14 +169,15 @@ namespace pcre
// error handling.
- static std::string
- compile_error_message(const char *err, int offset, const char *pattern)
+ static string
+ compile_error_message(const char * err, int offset, const char * pattern)
{
return (F("parse error at char %d in pattern '%s': %s")
- % offset % pattern % err).str();
+ % offset % pattern % err).str();
}
- compile_error::compile_error(const char *err, int offset, const char *pattern)
+ compile_error::compile_error(const char * err, int offset,
+ const char * pattern)
: std::runtime_error(compile_error_message(err, offset, pattern))
{}
@@ -179,4 +185,17 @@ namespace pcre
: std::runtime_error((F("Error during matching, code %d") % code).str())
{}
+ fullinfo_error::fullinfo_error(int code)
+ : std::runtime_error((F("Error getting capture count, code %d") % code)
+ .str())
+ {}
+
} // namespace pcre
+
+// Local Variables:
+// mode: C++
+// fill-column: 76
+// c-file-style: "gnu"
+// indent-tabs-mode: nil
+// End:
+// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
============================================================
--- pcrewrap.hh 206922378c80ba9fd283ccd510af1e7857328ef6
+++ pcrewrap.hh 7cb55a52af7a1085561a5ace6451893fce56d804
@@ -19,12 +19,12 @@ namespace pcre
NEWLINE_CR = 0x0001, // newline is \r
NEWLINE_LF = 0x0002, // newline is \n
NEWLINE_CRLF = (NEWLINE_CR|NEWLINE_LF), // newline is \r\n
- ANCHORED = 0x0004, // match only at beginning
+ ANCHORED = 0x0004, // match only at beginning
// of string (\A in pat)
// flags usable only with pcre_exec
- NOTBOL = 0x0008, // beginning of string isn't beginning of line
- NOTEOL = 0x0010, // end of string isn't end of line
- NOTEMPTY = 0x0020, // an empty match is a match failure
+ NOTBOL = 0x0008, // beginning of string isn't beginning of line
+ NOTEOL = 0x0010, // end of string isn't end of line
+ NOTEMPTY = 0x0020, // an empty match is a match failure
// flags usable only with pcre_compile
CASELESS = 0x0040, // case insensitive match (?i)
@@ -47,16 +47,16 @@ namespace pcre
// object provides a couple of helper operations, matched() and str(),
// for common use cases.
struct capture : public std::pair
+ std::string::const_iterator>
{
capture(std::string::const_iterator a,
- std::string::const_iterator b)
+ std::string::const_iterator b)
: std::pair
- (a, b)
+ (a, b)
{ I((a == std::string::const_iterator(0)
- && b == std::string::const_iterator(0))
- || (a != std::string::const_iterator(0)
- && b != std::string::const_iterator(0))); }
+ && b == std::string::const_iterator(0))
+ || (a != std::string::const_iterator(0)
+ && b != std::string::const_iterator(0))); }
bool matched() { return (this->first != std::string::const_iterator(0)); }
std::string str() { return std::string(this->first, this->second); }
@@ -69,73 +69,92 @@ namespace pcre
// expression.
typedef std::vector matches;
- // A regex object is the compiled form of a PCRE regular expression.
- class regex
+ // A basic_regex object is the compiled form of a PCRE regular expression.
+ // You never construct this directly.
+ struct basic_regex
{
- const void *basedat;
- const void *extradat;
- int capturecount;
+ private:
+ // disable the default and copy constructors
+ basic_regex();
+ basic_regex(const basic_regex &);
+ basic_regex & operator=(const basic_regex &);
- // default and copy constructors are restricted
- regex();
- regex(const regex &);
- regex &operator=(const regex &);
+ protected:
+ const void * const basedat;
+ const void * const extradat;
- // thanks to silly C++ we have to have an internal "initialize" method
- void init(const char *, pcre::flags);
+ // for use only by subclass constructors
+ basic_regex(const void * b, const void * e) : basedat(b), extradat(e) {}
+ basic_regex(std::pair p)
+ : basedat(p.first), extradat(p.second) {}
public:
- regex(const char *pattern, pcre::flags options = DEFAULT);
- regex(const std::string &pattern, pcre::flags options = DEFAULT);
- ~regex();
+ ~basic_regex() {}
- void study(); // do extra upfront work to speed up subsequent matches
+ bool match(const std::string & subject, matches & result,
+ std::string::const_iterator startoffset
+ = std::string::const_iterator(),
+ pcre::flags options = DEFAULT) const;
- bool match(const std::string &subject, matches &result,
- std::string::const_iterator startoffset
- = std::string::const_iterator(),
- pcre::flags options = DEFAULT) const;
+ bool match(const std::string & subject,
+ std::string::const_iterator startoffset
+ = std::string::const_iterator(),
+ pcre::flags options = DEFAULT) const;
- bool match(const std::string &subject,
- std::string::const_iterator startoffset
- = std::string::const_iterator(),
- pcre::flags options = DEFAULT) const;
-
// helper function which starts successive matches at the position
// where the last match left off.
- bool nextmatch(const std::string &subject, matches &result,
- pcre::flags options = DEFAULT) const
+ bool nextmatch(const std::string & subject, matches & result,
+ pcre::flags options = DEFAULT) const
{
std::string::const_iterator startoffset(0);
if (result.size() > 0 && result[0].matched())
- startoffset = result[0].second;
+ startoffset = result[0].second;
return match(subject, result, startoffset, options);
}
};
- // For later: regex variant that takes monotone's "utf8" pseudostrings and
- // sets PCRE_UTF8; named capture support.
+ // A regex is the class you are intended to use directly, in normal usage.
+ struct regex : public basic_regex
+ {
+ regex(const char * pattern, pcre::flags options = DEFAULT);
+ regex(const std::string & pattern, pcre::flags options = DEFAULT);
+ ~regex();
+ };
// exceptions thrown for errors from PCRE APIs
struct compile_error : public std::runtime_error
{
explicit compile_error(char const * error, int offset,
- char const * pattern);
- virtual ~compile_error() throw() {};
+ char const * pattern);
+ virtual ~compile_error() throw() {}
};
struct study_error : public std::runtime_error
{
explicit study_error(char const * error) : runtime_error(error) {};
- virtual ~study_error() throw() {};
+ virtual ~study_error() throw() {}
};
+ struct fullinfo_error : public std::runtime_error
+ {
+ explicit fullinfo_error(int code);
+ virtual ~fullinfo_error() throw() {}
+ };
+
struct match_error : public std::runtime_error
{
explicit match_error(int code);
- virtual ~match_error() throw() {};
+ virtual ~match_error() throw() {}
};
} // namespace pcre
#endif
+
+// Local Variables:
+// mode: C++
+// fill-column: 76
+// c-file-style: "gnu"
+// indent-tabs-mode: nil
+// End:
+// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s: