# # # add_file "split.cc" # content [2857722407feaf88eabc78fb024c1b5e77a21497] # # patch "grep.cc" # from [5cd119b4d61791c2aa43f955e8ce4be8c334582c] # to [46744d7d2fe94bd1dc87ee124b463fda60dd57ab] # ============================================================ --- split.cc 2857722407feaf88eabc78fb024c1b5e77a21497 +++ split.cc 2857722407feaf88eabc78fb024c1b5e77a21497 @@ -0,0 +1,80 @@ +// 2006-04-13 Timothy Brownawell +// GPL v2 or greater +// +// link with boost_program_options +// +// split [-n num] -d delim [-d delim...] -f basename +// split input into chunks of num stanzas +// delim is the stanza delimiter + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "io.hh" + +using std::string; +using std::vector; +using std::cin; +using std::cout; +using std::cerr; +using std::ofstream; +using std::pair; +using std::make_pair; +using basic_io::stanza; +using basic_io::stanza_reader; +using boost::lexical_cast; + +namespace po = boost::program_options; + +int main(int argc, char **argv) +{ + po::options_description desc("Allowed options"); + desc.add_options() + ("help", "print this message") + ("num,n", po::value(), "number of stanzas to put in each output file (default 1)") + ("basename,f", po::value(), "basename of output files") + ("delim,d", po::value >(), "key that begins a stanza") + ; + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + if (vm.count("help") || !vm.count("basename") || !vm.count("delim")) + { + cerr << desc << "\n"; + cerr << "--basename is required, " + << "and --delim must be given at least once.\n"; + return 1; + } + + int num(1); + if (vm.count("num")) + num = vm["num"].as(); + string basename(vm["basename"].as()); + + stanza st; + stanza_reader sr(cin, vm["delim"].as >()); + int n = 0; + ofstream out; + while (sr.get(st)) + { + if (!(n % num)) + { + if (out.is_open()) + out.close(); + string fn = basename + lexical_cast(n); + cerr<<"Opening file '"<