#
#
# delete "mimetypes"
# 
# add_file "mk2.py"
#  content [fd9cea464ab12217dc6f0004ecad13b5a9c51baf]
# 
# patch "ChangeLog"
#  from [6bcd5af9c26c0b608797df77504f6ca232f558c1]
#    to [356aa12709506db7f10ce9cbe43b7a69da776e2f]
# 
# patch "README"
#  from [51ea9bcf58e2383de5b13e36130946447ed07462]
#    to [86f9112184651535771db7b381225f3053a9d8f0]
# 
# patch "config.py.example"
#  from [5caa9b078dfa5d31e27b8f21464014d619dc9b66]
#    to [782031278d2d977661b7e566495c264626557f3a]
# 
# patch "fdo/sharedmimeinfo.py"
#  from [4ee53b521b7103c8434435a904cd6692b569548d]
#    to [4b489fb1a64f036d67adc35089de586cac893ae7]
# 
# patch "templates/index.html"
#  from [5d46c8360e2af3a6dfa935e68ca9421502877d92]
#    to [a0f30606651e27222da815d33c892fc3685303b4]
# 
# patch "templates/revisionbrowse.html"
#  from [66d38381f187bc229bccd5489ad81693460b68e9]
#    to [1e37ebc587a7706215d068e0e27b9b3de31b0656]
# 
# patch "viewmtn.py"
#  from [076d0c375da6061f4076d409d9fefe2d4621aa29]
#    to [045fb4d288dfb3824fbfbad1c5815abecdec6e96]
# 
#   set "mk2.py"
#  attr "mtn:execute"
# value "true"
#
============================================================
--- mk2.py	fd9cea464ab12217dc6f0004ecad13b5a9c51baf
+++ mk2.py	fd9cea464ab12217dc6f0004ecad13b5a9c51baf
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+
+import cPickle
+import random
+random.seed()
+import heapq
+import math
+import sys
+
+class MarkovState(object):
+    def __init__(self, state):
+        self.state = state
+        self.h = None
+        self.total = 0
+        self.scores = {}
+    
+    def increment(self, token):
+        self.total += 1
+        self.scores[token] = self.scores.get(token, 0) + 1
+        self.h = None
+    
+    def __entropy(self):
+        return -1 * sum(map(lambda p: p * math.log(p, 2), 
+                        map(lambda x: (self.scores[x] / float(self.total)), self.scores)))
+
+    def entropy(self):
+        if self.h == None:
+            self.h = self.__entropy()
+        return self.h
+    
+    def __repr__(self):
+        return "state" + repr(self.scores)
+
+    def __cmp__(self, other):
+        if other == None:
+            return -1
+        return cmp(other.entropy(), self.entropy())
+
+class MarkovChain(object):
+    def __init__(self, length, join_token='', cutoff_func=None):
+        self.length = length
+        self.join_token = join_token
+        self.upchunked = set()
+        self.cutoff_func = cutoff_func or MarkovChain.log_chunkable
+        self.clear()
+    
+    @classmethod
+    def log_chunkable (cls, self, entropies):
+        # fast, but not necessarily as correct
+        return math.log (len(self.states.keys ()), 2) / 8
+    
+    @classmethod
+    def standard_deviation_chunkable (cls, self, entropies):
+        l_h = len(entropies)
+        mean_h = sum(entropies) / l_h
+        sd_h = math.sqrt(sum([ pow(t - mean_h, 2) for t in entropies ]) / l_h)
+        print >> sys.stderr, l_h, mean_h, sd_h
+        cutoff = mean_h + 2.5 * sd_h # should really justify in some way other than 'it works'
+        return cutoff
+        
+    def update(self, gen):
+        buffer = []
+        this_stash = []
+        for token in gen:
+            this_stash.append(token)
+            if len(buffer) == self.length:
+                tbuffer = tuple(buffer)
+                if self.states.has_key(tbuffer):
+                    state = self.states[tbuffer]
+                else:
+                    state = self.states[tbuffer] = MarkovState(tbuffer)
+                state.increment(token)
+                buffer = buffer[1:]
+            buffer.append(token)
+        self.stash.append(this_stash)
+    
+    def clear(self):
+        self.states = {}
+        self.stash = []
+    
+    def random_next(self, from_state):
+        def next_state(token):
+            return from_state.state[:-1] + (token,)
+        # eliminate dead-ends
+        def not_dead_end(token):
+            return self.states.has_key (next_state (token))
+        possible = filter (not_dead_end, from_state.scores.keys())
+#        print >>sys.stderr, (from_state, possible)
+        if not possible:
+            return None
+        total = sum (map (lambda s: from_state.scores[s], possible))
+        choice = random.randrange(0, total)
+        for k in possible:
+            total -= from_state.scores[k]
+            if total <= 0:
+                return self.states[next_state(k)]
+        raise Exception("Unreachable")
+        
+    def upchunk(self):
+        while True:
+            to_upchunk, to_upchunk_value = self.__select_upchunk()
+            if to_upchunk == None:
+                break
+            stash_copy = self.stash
+            self.clear()
+            self.update_upchunked (to_upchunk, to_upchunk_value)
+            for stash in stash_copy:
+                self.update(self.__upchunk_gen (stash, to_upchunk, to_upchunk_value))
+            del stash_copy
+        
+    def update_upchunked (self, to_upchunk, replace_with):
+        self.upchunked.add (replace_with)
+        for token in to_upchunk:
+            if token in self.upchunked:
+                self.upchunked.remove (token)
+
+    def __select_upchunk(self):
+        q = []
+        keys = self.states.keys()
+        keylen = len(keys)
+        if keylen == 0:
+            return None
+        max_h = -1
+        candidate = None
+        entropies = []
+        for idx, tokens in enumerate(keys):
+            state = self.states[tokens]
+            h = state.entropy ()
+            entropies.append (h)
+            if h > max_h:
+                max_h = h
+                candidate = state
+        cutoff = self.cutoff_func (self, entropies)
+        print >>sys.stderr, "best entropy vs. cutoff is: %s :: %.2f vs. cutoff %.2f" % (candidate.state, candidate.entropy(), cutoff)
+        if candidate.entropy() < cutoff:
+            return None, None
+        else:
+            return candidate.state, self.join_token.join(candidate.state)
+    
+    def __upchunk_gen(self, gen, to_upchunk, replace_with):
+        buffer = []
+        for i in gen:
+            buffer.append(i)
+            if len(buffer) == len(to_upchunk):
+                if tuple(buffer) == to_upchunk:
+                    buffer = [ replace_with ]
+                else:
+                    to_yield, buffer = buffer[0], buffer[1:]
+                    yield to_yield
+        for i in buffer:
+            yield i
+    
+    def pprint(self):
+        from pprint import pprint
+        pprint(chain.states)
+        
+def simple_gen(fname):
+    for line in open(fname, 'rb'):
+        for char in line:
+            yield char
+#        for word in line.split():
+#            yield word.lower()
+
+if __name__ == '__main__':
+    chain = MarkovChain(2)
+    for infile in sys.argv[1:]:
+        print >> sys.stderr, "Reading input file:", infile
+        chain.update(simple_gen (infile))
+    chain.upchunk()
+    print >>sys.stderr, "processing produced", len(chain.states.keys()), "states."
+    cPickle.dump(chain, sys.stdout, protocol=2)
============================================================
--- ChangeLog	6bcd5af9c26c0b608797df77504f6ca232f558c1
+++ ChangeLog	356aa12709506db7f10ce9cbe43b7a69da776e2f
@@ -1,3 +1,10 @@
+2007-07-05  Grahame Bowland  <address@hidden>
+
+	* support remapping MIME types, to allow 
+	work arounds for unhelpful shared-mime-info.
+	* show the MIME type via "title" attribute 
+	on icon links
+
 2007-07-04  Grahame Bowland  <address@hidden>
 
 	* apply selection_func in __get_last_changes 
============================================================
--- README	51ea9bcf58e2383de5b13e36130946447ed07462
+++ README	86f9112184651535771db7b381225f3053a9d8f0
@@ -28,7 +28,7 @@
 
 I generally sync all my changes to the following public monotone 
 repositories:
-    venge.net
+    venge.net / off.net
     monotone.ucc.gu.uwa.edu.au
 You should be able to grab the latest viewmtn from any of them.
 
============================================================
--- config.py.example	5caa9b078dfa5d31e27b8f21464014d619dc9b66
+++ config.py.example	782031278d2d977661b7e566495c264626557f3a
@@ -80,4 +80,9 @@ icon_size = '16'
 icon_theme = 'gnome'
 icon_size = '16'
 
-
+# Some installations may have shared MIME info that is 
+# unhelpful. Forced remappings can be placed in the 
+# following hash table (uncomment it to enable it)
+#
+# mime_map = { 'application/x-python' : 'text/plain' }
+#
============================================================
--- fdo/sharedmimeinfo.py	4ee53b521b7103c8434435a904cd6692b569548d
+++ fdo/sharedmimeinfo.py	4b489fb1a64f036d67adc35089de586cac893ae7
@@ -252,7 +252,7 @@ class LookupHelper:
         return rv
 
 class LookupHelper:
-    def __init__(self):
+    def __init__(self, remap_lookup=None):
         self.glob_lookup = GlobLookup()
         self.magic_lookup = MagicLookup()
         nontext_chars = "\x01\x02\x03\x04\x05\x06\x0e\x0f"\
@@ -261,6 +261,7 @@ class LookupHelper:
         self.nontext = {}
         for char in nontext_chars:
             self.nontext[char] = True
+        self.remap_lookup = remap_lookup
             
     def is_binary(self, str):
         for char in str:
@@ -268,7 +269,7 @@ class LookupHelper:
                 return True
         return False
         
-    def lookup(self, filename, data):
+    def __lookup(self, filename, data):
         # spec says we try >= 80 priority magic matchers, then filename, then the other matchers
         threshold = 80
         priorities = self.magic_lookup.priorities()
@@ -289,6 +290,13 @@ class LookupHelper:
             return 'application/octet-stream'
         else:
             return 'text/plain'
+    
+    def lookup(self, *args, **kwargs):
+        rv = self.__lookup(*args, **kwargs)
+        if self.remap_lookup and self.remap_lookup.has_key(rv):
+            return self.remap_lookup[rv]
+        else:
+            return rv
 
 if __name__ == '__main__':
     c = LookupHelper()
============================================================
--- templates/index.html	5d46c8360e2af3a6dfa935e68ca9421502877d92
+++ templates/index.html	a0f30606651e27222da815d33c892fc3685303b4
@@ -1,6 +1,18 @@
 #extends base
 
 #def body
+<script lang="javascript">
+function toggle_hidden (did, update_did) {
+    var s = getElement (did);
+    if (s.style['display'] && s.style['display'].length > 0) {
+        s.style['display'] = '';
+    } else {
+        s.style['display'] = 'none';
+    }
+}
+
+</script>
+
 <p>
 Welcome to this <a href="http://grahame.angrygoats.net/viewmtn.shtml">ViewMTN</a> installation.
 The list below shows all branches served within this <a href="http://www.monotone.ca/">Monotone</a> database.
@@ -12,16 +24,19 @@ might be useful.
 might be useful.
 </p>
 
-<table class="pretty">
-<tr><th>Branch</th></tr>
-#for branch in $branches
-<tr>
-  <td>
+#for t, did, branch, offset in $branches
+#if $t == "d"
+    <div onClick="javascript:toggle_hidden('t_$did', 'e_$did');"><span style="font-family:monospace;">[<span id="e_$did">+</span>]</span> $branch.name</div>
+    <div id="t_$did" style="display: none; position: relative; left: 1em;">
+#else if $t == "e"
+    </div> <!-- end of $did -->
+#else
+    <div><span style="font-family:monospace;">&nbsp;&nbsp;&nbsp;</span>
     #filter Filter
     $link($branch).html()
     #end filter
-  </td>
-</tr>
+    </div>
+#end if
 #end for
 </table>
 
============================================================
--- templates/revisionbrowse.html	66d38381f187bc229bccd5489ad81693460b68e9
+++ templates/revisionbrowse.html	1e37ebc587a7706215d068e0e27b9b3de31b0656
@@ -28,7 +28,7 @@ $branch_links
 #for $stanza_type, $this_path, $author, $ago, $content_mark, $shortlog, $mime_type in $entries
 <tr class="$row_class.next()">
     <td>
-        <img src="$mime_icon($mime_type)" alt="$mime_type" />
+        <img src="$mime_icon($mime_type)" alt="$mime_type" title="$mime_type" />
     </td>
     <td>
         #filter Filter
============================================================
--- viewmtn.py	076d0c375da6061f4076d409d9fefe2d4621aa29
+++ viewmtn.py	045fb4d288dfb3824fbfbad1c5815abecdec6e96
@@ -355,12 +355,67 @@ ops = mtn.Operations([config.monotone, c
 
 renderer = Renderer()
 ops = mtn.Operations([config.monotone, config.dbfile])
-mimehelp = sharedmimeinfo.LookupHelper()
+mimehelp = sharedmimeinfo.LookupHelper(getattr(config, "mime_map", None))
 mimeicon = icontheme.MimeIcon(icontheme.IconTheme(config.icon_theme), config.icon_size)
 
+from mk2 import MarkovChain
+
+class BranchDivisions:
+    def __init__ (self):
+        self.divisions = None
+        
+    def calculate_divisions (self, branches):
+        if self.divisions != None:
+            return
+        chain = MarkovChain (2, join_token='.', cutoff_func=MarkovChain.standard_deviation_chunkable)
+        for branch in branches:
+            chain.update (branch.name.split ('.'))
+        chain.upchunk ()
+        divisions = set ()
+        for branch in branches:
+            for chunk in chain.upchunked:
+                idx = branch.name.find (chunk)
+                if idx != -1:
+                   divisions.add (branch.name[idx:idx+len(chunk)])
+        self.divisions = list(divisions)
+        self.divisions.sort ()
+
+divisions = BranchDivisions ()
+
 class Index:
     def GET(self):
-        renderer.render('index.html', page_title="Branches", branches=ops.branches())
+        branches = map(None, ops.branches ())
+        divisions.calculate_divisions (branches)
+        def division_iter():
+            bitter = iter(branches)
+            divs = divisions.divisions
+            n_divs = len(divs)
+            in_divs = {}
+            look_for = 0
+            def new_div (n):
+                did = look_for
+                in_divs[n] = did
+                return "d", did, mtn.Branch(n), len(in_divs.keys ()) * 10
+            def end_div (n):
+                did = in_divs.pop (n)
+                return "e", did, mtn.Branch(n), len(in_divs.keys ()) * 10
+            def branch_line (b):
+                return "b", 0, branch, 0
+            for branch in bitter:
+                for div in in_divs.keys(): # we alter it in the loop, copy..
+                    if branch.name.find (div) != 0:
+                        yield end_div (div)
+                if look_for < n_divs:
+                    if cmp(branch, divs[look_for]) > 0:
+                        look_for += 1
+                    if branch.name.find (divs[look_for]) == 0:
+                        yield new_div (divs[look_for])
+                        look_for += 1
+                yield branch_line (branch)
+            # any stragglers need to be closed
+            for div in in_divs.keys():
+                yield end_div (div)
+        renderer.render('index.html', page_title="Branches", branches=division_iter())
 
 class About:
     def GET(self):