# # # patch "mtn_benchmark/random_content.py" # from [654582744bd8a67d13964429f5330cab51e306ae] # to [9ae8b38da49a0de5c5737a6d3259057a286309e2] # ============================================================ --- mtn_benchmark/random_content.py 654582744bd8a67d13964429f5330cab51e306ae +++ mtn_benchmark/random_content.py 9ae8b38da49a0de5c5737a6d3259057a286309e2 @@ -94,3 +94,23 @@ replacement = self.genbytes(r, insert_size) text = text[:pivot1] + replacement + text[pivot2:] return text + + +# Directory topology generation: +# While there are directories to add: +# Pick a directory +# Sample a Bernoulli distribution (p)^depth, where depth=0 for the root +# dir, 1 for a child of the root, etc. +# If this comes up true, attach a new dir here +# This has a reasonable-looking empirical fit to real trees. The linux kernel +# has p = 0.4, uclinux has p = 0.5, xfree86 (which is rather pathological) has +# something like p = 0.8, but even this has a lower density of deep +# directories than the actual tree (while getting the maximum deepness right). +# ACE is ~0.6, my other test trees are two sparse. These are all eyeball +# fits, looking only at distribution of depths, I didn't try any other forms +# of generative models, and there are some systematic irregularities visible +# even in this one (mostly that there needs to be some sort of "bottleneck" +# control, some trees like X have about the same number of depth 5 dirs as +# depth 3 dirs, for others this drops off much quicker). +# +# But this should be good enough for now.