# # # delete "mimetypes" # # add_file "mk2.py" # content [fd9cea464ab12217dc6f0004ecad13b5a9c51baf] # # patch "ChangeLog" # from [6bcd5af9c26c0b608797df77504f6ca232f558c1] # to [356aa12709506db7f10ce9cbe43b7a69da776e2f] # # patch "README" # from [51ea9bcf58e2383de5b13e36130946447ed07462] # to [86f9112184651535771db7b381225f3053a9d8f0] # # patch "config.py.example" # from [5caa9b078dfa5d31e27b8f21464014d619dc9b66] # to [782031278d2d977661b7e566495c264626557f3a] # # patch "fdo/sharedmimeinfo.py" # from [4ee53b521b7103c8434435a904cd6692b569548d] # to [4b489fb1a64f036d67adc35089de586cac893ae7] # # patch "templates/index.html" # from [5d46c8360e2af3a6dfa935e68ca9421502877d92] # to [a0f30606651e27222da815d33c892fc3685303b4] # # patch "templates/revisionbrowse.html" # from [66d38381f187bc229bccd5489ad81693460b68e9] # to [1e37ebc587a7706215d068e0e27b9b3de31b0656] # # patch "viewmtn.py" # from [076d0c375da6061f4076d409d9fefe2d4621aa29] # to [045fb4d288dfb3824fbfbad1c5815abecdec6e96] # # set "mk2.py" # attr "mtn:execute" # value "true" # ============================================================ --- mk2.py fd9cea464ab12217dc6f0004ecad13b5a9c51baf +++ mk2.py fd9cea464ab12217dc6f0004ecad13b5a9c51baf @@ -0,0 +1,171 @@ +#!/usr/bin/env python + +import cPickle +import random +random.seed() +import heapq +import math +import sys + +class MarkovState(object): + def __init__(self, state): + self.state = state + self.h = None + self.total = 0 + self.scores = {} + + def increment(self, token): + self.total += 1 + self.scores[token] = self.scores.get(token, 0) + 1 + self.h = None + + def __entropy(self): + return -1 * sum(map(lambda p: p * math.log(p, 2), + map(lambda x: (self.scores[x] / float(self.total)), self.scores))) + + def entropy(self): + if self.h == None: + self.h = self.__entropy() + return self.h + + def __repr__(self): + return "state" + repr(self.scores) + + def __cmp__(self, other): + if other == None: + return -1 + return cmp(other.entropy(), self.entropy()) + +class MarkovChain(object): + def __init__(self, length, join_token='', cutoff_func=None): + self.length = length + self.join_token = join_token + self.upchunked = set() + self.cutoff_func = cutoff_func or MarkovChain.log_chunkable + self.clear() + + @classmethod + def log_chunkable (cls, self, entropies): + # fast, but not necessarily as correct + return math.log (len(self.states.keys ()), 2) / 8 + + @classmethod + def standard_deviation_chunkable (cls, self, entropies): + l_h = len(entropies) + mean_h = sum(entropies) / l_h + sd_h = math.sqrt(sum([ pow(t - mean_h, 2) for t in entropies ]) / l_h) + print >> sys.stderr, l_h, mean_h, sd_h + cutoff = mean_h + 2.5 * sd_h # should really justify in some way other than 'it works' + return cutoff + + def update(self, gen): + buffer = [] + this_stash = [] + for token in gen: + this_stash.append(token) + if len(buffer) == self.length: + tbuffer = tuple(buffer) + if self.states.has_key(tbuffer): + state = self.states[tbuffer] + else: + state = self.states[tbuffer] = MarkovState(tbuffer) + state.increment(token) + buffer = buffer[1:] + buffer.append(token) + self.stash.append(this_stash) + + def clear(self): + self.states = {} + self.stash = [] + + def random_next(self, from_state): + def next_state(token): + return from_state.state[:-1] + (token,) + # eliminate dead-ends + def not_dead_end(token): + return self.states.has_key (next_state (token)) + possible = filter (not_dead_end, from_state.scores.keys()) +# print >>sys.stderr, (from_state, possible) + if not possible: + return None + total = sum (map (lambda s: from_state.scores[s], possible)) + choice = random.randrange(0, total) + for k in possible: + total -= from_state.scores[k] + if total <= 0: + return self.states[next_state(k)] + raise Exception("Unreachable") + + def upchunk(self): + while True: + to_upchunk, to_upchunk_value = self.__select_upchunk() + if to_upchunk == None: + break + stash_copy = self.stash + self.clear() + self.update_upchunked (to_upchunk, to_upchunk_value) + for stash in stash_copy: + self.update(self.__upchunk_gen (stash, to_upchunk, to_upchunk_value)) + del stash_copy + + def update_upchunked (self, to_upchunk, replace_with): + self.upchunked.add (replace_with) + for token in to_upchunk: + if token in self.upchunked: + self.upchunked.remove (token) + + def __select_upchunk(self): + q = [] + keys = self.states.keys() + keylen = len(keys) + if keylen == 0: + return None + max_h = -1 + candidate = None + entropies = [] + for idx, tokens in enumerate(keys): + state = self.states[tokens] + h = state.entropy () + entropies.append (h) + if h > max_h: + max_h = h + candidate = state + cutoff = self.cutoff_func (self, entropies) + print >>sys.stderr, "best entropy vs. cutoff is: %s :: %.2f vs. cutoff %.2f" % (candidate.state, candidate.entropy(), cutoff) + if candidate.entropy() < cutoff: + return None, None + else: + return candidate.state, self.join_token.join(candidate.state) + + def __upchunk_gen(self, gen, to_upchunk, replace_with): + buffer = [] + for i in gen: + buffer.append(i) + if len(buffer) == len(to_upchunk): + if tuple(buffer) == to_upchunk: + buffer = [ replace_with ] + else: + to_yield, buffer = buffer[0], buffer[1:] + yield to_yield + for i in buffer: + yield i + + def pprint(self): + from pprint import pprint + pprint(chain.states) + +def simple_gen(fname): + for line in open(fname, 'rb'): + for char in line: + yield char +# for word in line.split(): +# yield word.lower() + +if __name__ == '__main__': + chain = MarkovChain(2) + for infile in sys.argv[1:]: + print >> sys.stderr, "Reading input file:", infile + chain.update(simple_gen (infile)) + chain.upchunk() + print >>sys.stderr, "processing produced", len(chain.states.keys()), "states." + cPickle.dump(chain, sys.stdout, protocol=2) ============================================================ --- ChangeLog 6bcd5af9c26c0b608797df77504f6ca232f558c1 +++ ChangeLog 356aa12709506db7f10ce9cbe43b7a69da776e2f @@ -1,3 +1,10 @@ +2007-07-05 Grahame Bowland + + * support remapping MIME types, to allow + work arounds for unhelpful shared-mime-info. + * show the MIME type via "title" attribute + on icon links + 2007-07-04 Grahame Bowland * apply selection_func in __get_last_changes ============================================================ --- README 51ea9bcf58e2383de5b13e36130946447ed07462 +++ README 86f9112184651535771db7b381225f3053a9d8f0 @@ -28,7 +28,7 @@ I generally sync all my changes to the following public monotone repositories: - venge.net + venge.net / off.net monotone.ucc.gu.uwa.edu.au You should be able to grab the latest viewmtn from any of them. ============================================================ --- config.py.example 5caa9b078dfa5d31e27b8f21464014d619dc9b66 +++ config.py.example 782031278d2d977661b7e566495c264626557f3a @@ -80,4 +80,9 @@ icon_size = '16' icon_theme = 'gnome' icon_size = '16' - +# Some installations may have shared MIME info that is +# unhelpful. Forced remappings can be placed in the +# following hash table (uncomment it to enable it) +# +# mime_map = { 'application/x-python' : 'text/plain' } +# ============================================================ --- fdo/sharedmimeinfo.py 4ee53b521b7103c8434435a904cd6692b569548d +++ fdo/sharedmimeinfo.py 4b489fb1a64f036d67adc35089de586cac893ae7 @@ -252,7 +252,7 @@ class LookupHelper: return rv class LookupHelper: - def __init__(self): + def __init__(self, remap_lookup=None): self.glob_lookup = GlobLookup() self.magic_lookup = MagicLookup() nontext_chars = "\x01\x02\x03\x04\x05\x06\x0e\x0f"\ @@ -261,6 +261,7 @@ class LookupHelper: self.nontext = {} for char in nontext_chars: self.nontext[char] = True + self.remap_lookup = remap_lookup def is_binary(self, str): for char in str: @@ -268,7 +269,7 @@ class LookupHelper: return True return False - def lookup(self, filename, data): + def __lookup(self, filename, data): # spec says we try >= 80 priority magic matchers, then filename, then the other matchers threshold = 80 priorities = self.magic_lookup.priorities() @@ -289,6 +290,13 @@ class LookupHelper: return 'application/octet-stream' else: return 'text/plain' + + def lookup(self, *args, **kwargs): + rv = self.__lookup(*args, **kwargs) + if self.remap_lookup and self.remap_lookup.has_key(rv): + return self.remap_lookup[rv] + else: + return rv if __name__ == '__main__': c = LookupHelper() ============================================================ --- templates/index.html 5d46c8360e2af3a6dfa935e68ca9421502877d92 +++ templates/index.html a0f30606651e27222da815d33c892fc3685303b4 @@ -1,6 +1,18 @@ #extends base #def body + +

Welcome to this ViewMTN installation. The list below shows all branches served within this Monotone database. @@ -12,16 +24,19 @@ might be useful. might be useful.

- - -#for branch in $branches - - - + +#end if #end for
Branch
+#for t, did, branch, offset in $branches +#if $t == "d" +
[+] $branch.name
+ +#else +
    #filter Filter $link($branch).html() #end filter -
============================================================ --- templates/revisionbrowse.html 66d38381f187bc229bccd5489ad81693460b68e9 +++ templates/revisionbrowse.html 1e37ebc587a7706215d068e0e27b9b3de31b0656 @@ -28,7 +28,7 @@ $branch_links #for $stanza_type, $this_path, $author, $ago, $content_mark, $shortlog, $mime_type in $entries - $mime_type + $mime_type #filter Filter ============================================================ --- viewmtn.py 076d0c375da6061f4076d409d9fefe2d4621aa29 +++ viewmtn.py 045fb4d288dfb3824fbfbad1c5815abecdec6e96 @@ -355,12 +355,67 @@ ops = mtn.Operations([config.monotone, c renderer = Renderer() ops = mtn.Operations([config.monotone, config.dbfile]) -mimehelp = sharedmimeinfo.LookupHelper() +mimehelp = sharedmimeinfo.LookupHelper(getattr(config, "mime_map", None)) mimeicon = icontheme.MimeIcon(icontheme.IconTheme(config.icon_theme), config.icon_size) +from mk2 import MarkovChain + +class BranchDivisions: + def __init__ (self): + self.divisions = None + + def calculate_divisions (self, branches): + if self.divisions != None: + return + chain = MarkovChain (2, join_token='.', cutoff_func=MarkovChain.standard_deviation_chunkable) + for branch in branches: + chain.update (branch.name.split ('.')) + chain.upchunk () + divisions = set () + for branch in branches: + for chunk in chain.upchunked: + idx = branch.name.find (chunk) + if idx != -1: + divisions.add (branch.name[idx:idx+len(chunk)]) + self.divisions = list(divisions) + self.divisions.sort () + +divisions = BranchDivisions () + class Index: def GET(self): - renderer.render('index.html', page_title="Branches", branches=ops.branches()) + branches = map(None, ops.branches ()) + divisions.calculate_divisions (branches) + def division_iter(): + bitter = iter(branches) + divs = divisions.divisions + n_divs = len(divs) + in_divs = {} + look_for = 0 + def new_div (n): + did = look_for + in_divs[n] = did + return "d", did, mtn.Branch(n), len(in_divs.keys ()) * 10 + def end_div (n): + did = in_divs.pop (n) + return "e", did, mtn.Branch(n), len(in_divs.keys ()) * 10 + def branch_line (b): + return "b", 0, branch, 0 + for branch in bitter: + for div in in_divs.keys(): # we alter it in the loop, copy.. + if branch.name.find (div) != 0: + yield end_div (div) + if look_for < n_divs: + if cmp(branch, divs[look_for]) > 0: + look_for += 1 + if branch.name.find (divs[look_for]) == 0: + yield new_div (divs[look_for]) + look_for += 1 + yield branch_line (branch) + # any stragglers need to be closed + for div in in_divs.keys(): + yield end_div (div) + renderer.render('index.html', page_title="Branches", branches=division_iter()) class About: def GET(self):