# # # patch "basic_io.hh" # from [1bdef9843eae1bdf3bf71991e25e64a7aa92e956] # to [d5795fa3518a57ec4401d063e0270ff9d7f5d6a6] # # patch "cset.cc" # from [d4b4ae843138b3b3ed84bcc6527ae3b0fec7dced] # to [7201a4eb30f678d4c9b78f540f85bcbbf3923b5c] # # patch "lua.cc" # from [a8b53b2c09dab49e7b0e932e2b7d0d4f8bca1c8b] # to [0781fe1aade5099b668056264a9424f0fbeb76c7] # # patch "revision.cc" # from [99d7804b784e245f65fea75cd1ea7d2a025ce913] # to [c70161ded1af39115360f97be69884f847e43940] # # patch "roster.cc" # from [c7931214acea75e6ad38b0b10863194796a35a1b] # to [2c97935dff20a8256d7e482fc1cd5ef95d9b4d6c] # # patch "vocab.cc" # from [d465ac7c42bafcb39d03956fc9869b928cae0184] # to [d8da5a3ac67bb8c2b29a5cb5f3e3704e1375d954] # # patch "work.cc" # from [f8ee64c721cb624b0e58ad9139b93e0abd930129] # to [b9dfdced17ced6f9849d0a7353dd9af4556c8d74] # ============================================================ --- basic_io.hh 1bdef9843eae1bdf3bf71991e25e64a7aa92e956 +++ basic_io.hh d5795fa3518a57ec4401d063e0270ff9d7f5d6a6 @@ -21,6 +21,38 @@ namespace basic_io { + inline bool is_xdigit(char x) + { + return ((x >= '0' && x <= '9') + || (x >= 'a' && x <= 'f') + || (x >= 'A' && x <= 'F')); + } + + inline bool is_alpha(char x) + { + return ((x >= 'a' && x <= 'z') + || (x >= 'A' && x <= 'Z')); + } + + inline bool is_alnum(char x) + { + return ((x >= '0' && x <= '9') + || (x >= 'a' && x <= 'z') + || (x >= 'A' && x <= 'Z')); + } + + inline bool is_space(char x) + { + return (x == ' ') + || (x == '\n') + || (x == '\t') + || (x == '\r') + || (x == '\v') + || (x == '\f'); + } + + + typedef enum { TOK_SYMBOL, @@ -33,17 +65,27 @@ input_source { size_t line, col; - std::istream & in; + std::string const & in; + std::string::const_iterator curr; std::string name; int lookahead; char c; - input_source(std::istream & i, std::string const & nm) - : line(1), col(1), in(i), name(nm), lookahead(0), c('\0') + input_source(std::string const & in, std::string const & nm) + : line(1), col(1), in(in), curr(in.begin()), name(nm), lookahead(0), c('\0') {} - inline void peek() { lookahead = in.peek(); } + inline void peek() + { + if (curr == in.end()) + lookahead = EOF; + else + lookahead = *curr; + } inline void eat() - { - in.get(c); + { + if (curr == in.end()) + return; + c = *curr; + ++curr; ++col; if (c == '\n') { @@ -59,86 +101,103 @@ tokenizer { input_source & in; - tokenizer(input_source & i) : in(i) {} + std::string::const_iterator begin; + std::string::const_iterator end; + tokenizer(input_source & i) : in(i), begin(in.curr), end(in.curr) + {} + + inline void mark() + { + begin = in.curr; + end = begin; + } + + inline void advance() + { + in.advance(); + end = in.curr; + } + + inline void store(std::string & val) + { + val.assign(begin, end); + } + inline token_type get_token(std::string & val) { - val.clear(); - val.reserve(80); in.peek(); while (true) { if (in.lookahead == EOF) return TOK_NONE; - if (!std::isspace(in.lookahead)) + if (!is_space(in.lookahead)) break; in.advance(); } - - switch (in.lookahead) - { - case '"': - { - in.advance(); - while (static_cast(in.lookahead) != '"') - { - if (in.lookahead == EOF) - in.err("input stream ended in string"); - if (static_cast(in.lookahead) == '\\') - { - // possible escape: we understand escaped quotes - // and escaped backslashes. nothing else. - in.advance(); - if (!(static_cast(in.lookahead) == '"' - || static_cast(in.lookahead) == '\\')) - { - in.err("unrecognized character escape"); - } - } - in.advance(); - val += in.c; - } - - if (static_cast(in.lookahead) != '"') - in.err("string did not end with '\"'"); - in.eat(); - - return basic_io::TOK_STRING; - } - - case '[': - { - in.advance(); - while (static_cast(in.lookahead) != ']') - { - if (in.lookahead == EOF) - in.err("input stream ended in hex string"); - if (!std::isxdigit(in.lookahead)) + if (is_alpha(in.lookahead)) + { + mark(); + while (is_alnum(in.lookahead) || in.lookahead == '_') + advance(); + store(val); + return basic_io::TOK_SYMBOL; + } + else if (in.lookahead == '[') + { + in.advance(); + mark(); + while (static_cast(in.lookahead) != ']') + { + if (in.lookahead == EOF) + in.err("input stream ended in hex string"); + if (!is_xdigit(in.lookahead)) in.err("non-hex character in hex string"); - in.advance(); - val += in.c; - } + advance(); + } + + if (static_cast(in.lookahead) != ']') + in.err("hex string did not end with ']'"); + in.eat(); - if (static_cast(in.lookahead) != ']') - in.err("hex string did not end with ']'"); - in.eat(); - - return basic_io::TOK_HEX; - } - default: - if (std::isalpha(in.lookahead)) - { - while (std::isalnum(in.lookahead) || in.lookahead == '_') - { - in.advance(); - val += in.c; - } - return basic_io::TOK_SYMBOL; - } - } - return basic_io::TOK_NONE; + store(val); + return basic_io::TOK_HEX; + } + else if (in.lookahead == '"') + { + // We can't use mark/store here, because there might + // be escaping in the string which we have to convert. + val.clear(); + in.advance(); + while (static_cast(in.lookahead) != '"') + { + if (in.lookahead == EOF) + in.err("input stream ended in string"); + if (static_cast(in.lookahead) == '\\') + { + // possible escape: we understand escaped quotes + // and escaped backslashes. nothing else. + in.advance(); + if (!(static_cast(in.lookahead) == '"' + || static_cast(in.lookahead) == '\\')) + { + in.err("unrecognized character escape"); + } + } + in.advance(); + val += in.c; + } + + if (static_cast(in.lookahead) != '"') + in.err("string did not end with '\"'"); + in.eat(); + + return basic_io::TOK_STRING; + } + else + return basic_io::TOK_NONE; } void err(std::string const & s); }; @@ -173,8 +232,9 @@ parser { tokenizer & tok; - parser(tokenizer & t) : tok(t) + parser(tokenizer & t) : tok(t) { + token.reserve(128); advance(); } ============================================================ --- cset.cc d4b4ae843138b3b3ed84bcc6527ae3b0fec7dced +++ cset.cc 7201a4eb30f678d4c9b78f540f85bcbbf3923b5c @@ -477,8 +477,7 @@ { MM(dat); MM(cs); - std::istringstream iss(dat()); - basic_io::input_source src(iss, "cset"); + basic_io::input_source src(dat(), "cset"); basic_io::tokenizer tok(src); basic_io::parser pars(tok); parse_cset(pars, cs); ============================================================ --- lua.cc a8b53b2c09dab49e7b0e932e2b7d0d4f8bca1c8b +++ lua.cc 0781fe1aade5099b668056264a9424f0fbeb76c7 @@ -685,8 +685,7 @@ { vector > > res; const char *str = lua_tostring(L, -1); - std::istringstream iss(str); - basic_io::input_source in(iss, "monotone_parse_basic_io_for_lua"); + basic_io::input_source in(string(str), string("monotone_parse_basic_io_for_lua")); basic_io::tokenizer tok(in); try { ============================================================ --- revision.cc 99d7804b784e245f65fea75cd1ea7d2a025ce913 +++ revision.cc c70161ded1af39115360f97be69884f847e43940 @@ -1072,8 +1072,7 @@ static void read_oldstyle_dot_mt_attrs(data const & dat, oldstyle_attr_map & attr) { - std::istringstream iss(dat()); - basic_io::input_source src(iss, ".mt-attrs"); + basic_io::input_source src(dat(), ".mt-attrs"); basic_io::tokenizer tok(src); basic_io::parser parser(tok); @@ -1415,8 +1414,7 @@ { revision_data dat; app.db.get_revision(ident,dat); - std::istringstream iss(dat.inner()()); - basic_io::input_source src(iss, "revision"); + basic_io::input_source src(dat.inner()(), "revision"); basic_io::tokenizer tok(src); basic_io::parser pars(tok); while (pars.symp()) @@ -1501,8 +1499,7 @@ revision_set & rev) { MM(rev); - std::istringstream iss(dat()); - basic_io::input_source src(iss, "revision"); + basic_io::input_source src(dat(), "revision"); basic_io::tokenizer tok(src); basic_io::parser pars(tok); parse_revision(pars, rev); ============================================================ --- roster.cc c7931214acea75e6ad38b0b10863194796a35a1b +++ roster.cc 2c97935dff20a8256d7e482fc1cd5ef95d9b4d6c @@ -2320,8 +2320,7 @@ roster_t & ros, marking_map & mm) { - std::istringstream iss(dat()); - basic_io::input_source src(iss, "roster"); + basic_io::input_source src(dat(), "roster"); basic_io::tokenizer tok(src); basic_io::parser pars(tok); ros.parse_from(pars, mm); ============================================================ --- vocab.cc d465ac7c42bafcb39d03956fc9869b928cae0184 +++ vocab.cc d8da5a3ac67bb8c2b29a5cb5f3e3704e1375d954 @@ -48,6 +48,13 @@ val.ok = true; } +inline bool is_xdigit(char x) +{ + return ((x >= '0' && x <= '9') + || (x >= 'a' && x <= 'f') + || (x >= 'A' && x <= 'F')); +} + inline void verify(hexenc & val) { @@ -59,10 +66,11 @@ N(val().size() == constants::idlen, F("hex encoded ID '%s' size != %d") % val % constants::idlen); - string::size_type pos = val().find_first_not_of(constants::legal_id_bytes); - N(pos == string::npos, - F("bad character '%c' in id name '%s'") % val().at(pos) % val); - + for (string::const_iterator i = val().begin(); i != val().end(); ++i) + { + N(is_xdigit(*i), + F("bad character '%c' in id name '%s'") % *i % val); + } val.ok = true; } ============================================================ --- work.cc f8ee64c721cb624b0e58ad9139b93e0abd930129 +++ work.cc b9dfdced17ced6f9849d0a7353dd9af4556c8d74 @@ -513,8 +513,7 @@ void read_options_map(data const & dat, options_map & options) { - std::istringstream iss(dat()); - basic_io::input_source src(iss, "MT/options"); + basic_io::input_source src(dat(), "MT/options"); basic_io::tokenizer tok(src); basic_io::parser parser(tok);