# 
# 
# add_file "mtn_benchmark/random_content.py"
#  content [654582744bd8a67d13964429f5330cab51e306ae]
# 
# patch "mtn_benchmark/benchmarks.py"
#  from [27e5079920b8abad19ad7bad2a2153a385a57014]
#    to [3a942d0be3b8d79dd0e2c0460e878be547321b1d]
# 
# patch "mtn_benchmark/instrumenters.py"
#  from [5ce55a21058a7ebc43fe0df2163b4e6f7a514d9e]
#    to [7c5ebdf22732696a15f3e92207d8216879965edc]
# 
# patch "mtn_benchmark/mtn.py"
#  from [773ecd0cb44dd07077bf972e0bd38ed2e33e8e5f]
#    to [6fbfc58a51891b3f96c1022432c34804566d57cd]
# 
# patch "mtn_benchmark/repo.py"
#  from [6f250d61245dc45bb1815a3864c454532ed2b69c]
#    to [4242d0244903f51354a513e9d36dc400c1391f73]
# 
============================================================
--- mtn_benchmark/random_content.py	654582744bd8a67d13964429f5330cab51e306ae
+++ mtn_benchmark/random_content.py	654582744bd8a67d13964429f5330cab51e306ae
@@ -0,0 +1,96 @@
+try:
+    import scipy.stats
+except ImportError:
+    # Allow importing, in case the user isn't actually going to use us... but
+    # we probably won't work if they do.
+    pass
+
+# Requires scipy, and contortions.
+def binomial_variate(r, n, p):
+    scipy.stats.seed(r.randrange(1 << 31), 1)
+    return scipy.stats.binom.rvs(n, p)[0]
+
+class RandomContentSource(object):
+    # We generate lines as printable ascii nonsense, with uniformly distributed
+    # lengths.  This isn't that great a model, but it's unlikely that what we do
+    # here matters that much...
+    def genline(self, r):
+        length = r.randrange(20, 70)
+        return "".join([chr(r.randint(ord(" "), ord("}"))) for i in xrange(length)]) + "\n"
+
+    def genbytes(self, r, size):
+        total = 0
+        lines = []
+        while total < size:
+            line = self.genline(r)
+            total += len(line)
+            lines.append(line)
+        return ("".join(lines))[:size]
+
+class FileSource(RandomContentSource):
+    def __init__(self, meanlog, sdlog):
+        self.meanlog = meanlog
+        self.sdlog = sdlog
+
+    # This model is much more important, and much better -- we generate file
+    # sizes as log normal.  This provides a rather tight fit to many source
+    # trees -- I checked monotone, the linux kernel, the uclinux distribution
+    # (which includes a 100,000 files, mostly other projects source), pygame,
+    # xfree86, diffutils, and they all eyeballed "close enough".  (These were
+    # just some tarballs I had lying around.)  There are definitely things in
+    # this data that this model does not capture --
+    # http://www.cs.hmc.edu/~geoff/filesizes.html discusses the kinds of
+    # phenemona that occur reasonably well -- but it's a good start.
+    #
+    # For the above-mentioned source trees, I found:
+    # 
+    #    tree      mean(log(data))   sd(log(data))
+    # -----------  ---------------   -------------
+    # monotone          7.59             1.80
+    # linux kernel      8.21             1.62
+    # uclinux           7.87             1.69
+    # pygame            8.57             1.53
+    # diffutils         8.21             1.77
+    # xfree86           8.40             1.83
+    #
+    # Which is surprisingly consistent -- picking a meanlog of ~8 and a sdlog
+    # of ~1.7 should give quite reasonable sizes.
+    def generate(self, r):
+        size = int(r.lognormvariate(self.meanlog, self.sdlog))
+        return self.genbytes(r, size)
+
+class EditSource(RandomContentSource):
+    # Arguments are parameters to define two binomial distributions -- one
+    # describes how many hunks we edit, and the other how many bytes we edit
+    # in each hunk.  This is totally ad-hoc.
+    # NB: the binomial requires: sd_size**2 < mean_size
+    def __init__(self, mean_num, sd_num, mean_size, sd_size):
+        self.n_num, self.p_num = self.mean_sd_to_n_p(mean_num, sd_num)
+        self.n_size, self.p_size = self.mean_sd_to_n_p(mean_size, sd_size)
+
+    def mean_sd_to_n_p(self, mean, sd):
+        #  n*p = mean
+        #  sqrt(n*p*(1-p)) = sd
+        # -->
+        #  1 - (sd^2 / mean) = p
+        #  mean / p = n
+        p = 1 - (sd ** 2 / float(mean))
+        assert 1 > p > 0
+        n = int(mean / p)
+        return (n, p)
+
+    def edit(self, r, start_text):
+        text = start_text
+        num = binomial_variate(r, self.n_num, self.p_num)
+        for i in xrange(num):
+            del_size = binomial_variate(r, self.n_size, self.p_size)
+            if not del_size:
+                continue
+            if del_size > len(start_text):
+                del_size = len(start_text)
+            pivot1 = r.randint(0, len(start_text) - del_size)
+            pivot2 = pivot1 + del_size
+            insert_size = binomial_variate(r, self.n_size, self.p_size)
+            replacement = self.genbytes(r, insert_size)
+            text = text[:pivot1] + replacement + text[pivot2:]
+        return text
============================================================
--- mtn_benchmark/benchmarks.py	27e5079920b8abad19ad7bad2a2153a385a57014
+++ mtn_benchmark/benchmarks.py	3a942d0be3b8d79dd0e2c0460e878be547321b1d
@@ -17,7 +17,7 @@
 
     def setup(self, vcs):
         vcs.init_repo("target")
-        self.repo_source.setup()
+        self.repo_source.setup(vcs)
 
     def run(self, vcs):
         vcs.pull(self.repo_source.repo_path(), "target")
============================================================
--- mtn_benchmark/instrumenters.py	5ce55a21058a7ebc43fe0df2163b4e6f7a514d9e
+++ mtn_benchmark/instrumenters.py	7c5ebdf22732696a15f3e92207d8216879965edc
@@ -5,6 +5,7 @@
 class NullInstrumenter(instrumenter.Instrumenter):
     objclass = instrumenter.InstrumenterObj
 
+
 class TimingInstrumenterObj(instrumenter.RecordingInstrumenterObj):
     def parse_time_str(self, s):
         # 1.7 -> 1.7
@@ -34,11 +35,11 @@
         process.hook(timing_hook)
         return process
 
-
 class TimingInstrumenter(instrumenter.Instrumenter):
     repeats = 3
     objclass = TimingInstrumenterObj
 
+
 class MemTimingInstrumenterObj(instrumenter.RecordingInstrumenterObj):
     def __init__(self, record_dir, path=None):
         super(MemTimingInstrumenterObj, self).__init__(record_dir)
@@ -69,7 +70,6 @@
         process.hook(timing_hook)
         return process
 
-
 class MemTimingInstrumenter(instrumenter.Instrumenter):
     repeats = 3
     objclass = MemTimingInstrumenterObj
============================================================
--- mtn_benchmark/mtn.py	773ecd0cb44dd07077bf972e0bd38ed2e33e8e5f
+++ mtn_benchmark/mtn.py	6fbfc58a51891b3f96c1022432c34804566d57cd
@@ -1,7 +1,8 @@
 import os
 import os.path
 import shutil
 import time
+import mtn_benchmark.util
 
 # VCSes all have a factory, with method: .new(instrumenter)
 
@@ -78,4 +79,21 @@
         self.instrumenter.run("init_repo",
                               ["./mtn", "db", "init", "-d", repo])
 
+    def init_ws(self, ws, repo):
+        self.instrumenter.run("init_ws",
+                              ["./mtn", "setup", ws, "-d", repo, "-b", "testbranch"])
 
+    # Must be run in a workspace
+    # Creates a file and schedules it for addition at the next commit
+    def add_file(self, name, contents):
+        d, basename = os.path.split(name)
+        if d:
+            mtn_benchmark.util.ensure_dir(d)
+        f = open(name, "w")
+        f.write(contents)
+        f.close()
+        self.instrumenter.run("add_file", ["./mtn", "add", name])
+
+    def add_dir(self, name):
+        mtn_benchmark.util.ensure_dir(name)
+        self.instrumenter.run("add_dir", ["./mtn", "add", name])
============================================================
--- mtn_benchmark/repo.py	6f250d61245dc45bb1815a3864c454532ed2b69c
+++ mtn_benchmark/repo.py	4242d0244903f51354a513e9d36dc400c1391f73
@@ -1,4 +1,6 @@
+import os
 import os.path
+import random
 
 # repos have setup() and repo_path() methods.
 
@@ -6,11 +8,79 @@
     def __init__(self, path):
         self.path = os.path.abspath(path)
 
-    def setup(self):
+    def setup(self, vcs):
         pass
 
     def repo_path(self):
         return self.path
 
 
+# Fancy random-history generation
+
+DIR = "dir"
+FILE = "file"
+
+class Node(object):
+    def __init__(self, parent, name, type):
+        self.parent = parent
+        self.name = name
+        self.type = type
+
+    def fullname(self):
+        if self.parent:
+            return self.parent.fullname() + "/" + self.name
+        else:
+            return self.name
+
+class Tree(object):
+    def __init__(self, vcs):
+        self.vcs = vcs
+        self.nodes = {}
+        self.files = {}
+        self.dirs = {}
+
+    def clone(self):
+        t = Tree(self.vcs)
+        t.nodes = dict(self.nodes)
+        t.files = dict(self.files)
+        t.dirs = dict(self.dirs)
+        
+    def add_node(self, parent, name, type):
+        n = Node(parent, name, type)
+        assert name not in self.nodes
+        self.nodes[name] = n
+        return n
+
+    def add_file(self, parent, name, contents):
+        self.add_node(parent, name, FILE)
+        self.files[name] = n
+        vcs.add_file(n.fullname(), contents)
+
+    def add_dir(self, parent, name):
+        self.add_node(parent, name, DIR)
+        self.dirs[name] = n
+        vcs.add_dir(n.fullname())
+
+class SimpleRandomRepo(object):
+    magic_numbers = {
+        "seed": 0,
+        "start_dirs": 20,
+        "start_files": 400,
+        }
+
+    def __init__(self, **kwargs):
+        self.magic_numbers.update(kwargs)
+        self.random = random.Random(self.magic_numbers["seed"])
+        self.path = "randrepo"
+
+    def setup(self, vcs):
+        vcs.init_repo(self.path)
+
+        scratchdir = "randrepo-scratch"
+        vcs.init_ws(scratchdir)
+
+        startdir = os.getcwd()
+        os.chdir(scratchdir)
+        
+        os.chdir(startdir)
+        
-# TODO: add synthetic repo generation classes