# # # patch "fdo/sharedmimeinfo.py" # from [d177ae1d1b6cd3626696b870fc3973ce0f2535c7] # to [8b641fe2ffdb7f33397cf474b13d216bad3263aa] # ============================================================ --- fdo/sharedmimeinfo.py d177ae1d1b6cd3626696b870fc3973ce0f2535c7 +++ fdo/sharedmimeinfo.py 8b641fe2ffdb7f33397cf474b13d216bad3263aa @@ -80,6 +80,9 @@ class MagicLookup(object): class MagicLookup(object): def __init__(self): + # hashed by priority, then by mime type, then a list of headers + # (multiple files could easily have conflicts otherwise) + self.headers = {} mime_dirs = mime() for dir in mime_dirs: magic_file = os.path.join(dir, 'magic') @@ -90,17 +93,21 @@ class MagicLookup(object): fd = open(fname, 'rb') if fd.readline() != 'MIME-Magic\0\n': raise Exception("Not a Mime Magic file: %s" % fname) + header_re = re.compile(r'^\[([0-9]+):([^\]]+)\]$') value_re = re.compile(r'^([0-9]*)>([0-9]+)=') options_re = re.compile(r'^(\~[0-9]+)?(\+[0-9]+)?$') + # read a header, followed by a number of lines self.__buf = '' + def skip_line(): nl = self.__buf.find('\n') if nl == -1: self.__buf = '' else: self.__buf = self.__buf[nl+1:] + def read_header(): m = header_re.match(self.__buf) if not m: @@ -108,7 +115,9 @@ class MagicLookup(object): skip_line() else: self.__buf = self.__buf[m.end()+1:] - return m.groups() + priority, mime_type = m.groups() + return int(priority), mime_type + def read_line(): # the next line will have indent, start_offset and the # start of the value @@ -117,10 +126,10 @@ class MagicLookup(object): skip_line() return indent, start_offset = m.groups() - try: - indent = int(indent) - except: - indent = 0 + try: indent = int(indent) + except: indent = 0 + try: start_offset = int(start_offset) + except: start_offset = 0 self.__buf = self.__buf[m.end():] # the next two bytes are the length, in big-endian format length = (ord(self.__buf[0]) << 8) + ord(self.__buf[1]) @@ -145,7 +154,7 @@ class MagicLookup(object): # does what we want. see whether or not we need to call it.. if len(self.__buf) == 0 or self.__buf[-1] != '\n': self.__buf += fd.readline() - word_size, range_length = 1, 0 + word_size, range_length = 1, 1 m = options_re.match(self.__buf) if m: for group in m.groups(): @@ -156,27 +165,96 @@ class MagicLookup(object): elif group[0] == '+': range_length = int(group[1:]) self.__buf = self.__buf[m.end()+1:] - return indent, start_offset, repr(value), repr(mask), word_size, range_length + # fix the byte order, on little-endian systems + if sys.byteorder == 'little' and word_size > 1: + if len(value) % word_size != 0: + raise Exception("value is not an integer multiple of word size!") + # make your sanity save now! + fix = lambda x: ''.join([''.join(reversed(value[t:t+word_size])) for t in xrange(0,len(x)/word_size,word_size)]) + value = fix(value) + mask = fix(mask) + return { 'indent' : indent, + 'start_offset' : start_offset, + 'value' : value, + 'mask' : mask, + 'range_length' : range_length, + 'word_size' : word_size } + + current_header = None while True: if self.__buf == '': nd = fd.readline() if not nd: break self.__buf += nd - print "loop:", repr(self.__buf) +# print "loop:", repr(self.__buf) if self.__buf[0] == '[': - header = read_header() - print "header:", header + priority, mime_type = read_header() + current_header = [] + self.headers.setdefault(priority, {}).setdefault(mime_type, []).append(current_header) + line_stack = [] else: + if current_header == None: + raise Exception("non-header before header!") line = read_line() - print "line:", line -# print indent, start_offset, repr(value), repr(mask), word_size, range_length - def lookup(self, data): + current_header.append(line) + + def lookup(self, data, min_priority=None): + def match_line(line): + data_size = len(data) + value_size = len(line['value']) + + def match_with(data_chunk): + if line['mask'] != None: + masked = ''.join([chr(ord(t) & ord(line['mask'][i])) for i, t in enumerate(data_chunk)]) + else: + masked = data_chunk + if masked == line['value']: + return True + + for i in range(line['range_length']): + from_offset = line['start_offset'] + i + to_offset = from_offset + value_size + if to_offset > value_size: + continue + if match_with(data[from_offset:to_offset]): + return True + + def match_lines(lines): + # we need to maintain a current indent depth; we don't need to + # actually care what our parent was, as if we made it to checking + # we have necessarily succeeded + depth = -1 + length = len(lines) + for idx, line in enumerate(lines): + indent = line['indent'] + if indent > depth+1: + continue + if match_line(line): + if (idx+1 == length) or lines[idx+1]['indent'] <= indent: + # this is a match by itself + return True + depth = indent + elif indent <= depth: + depth = indent - 1 + + # do a lookup, until we reach min_priority + priorities = self.headers.keys() + priorities.sort() + priorities.reverse() + for priority in priorities: + if priority < min_priority: + break + for mime_type in self.headers[priority]: + for lines in self.headers[priority][mime_type]: + if match_lines(lines): + return mime_type return None if __name__ == '__main__': a = GlobLookup() b = MagicLookup() + print b.lookup(open('/Users/grahame/mtn/viewmtn/mimetypes/x-office-calendar.png').read()) sys.exit(0) for line in sys.stdin: line = line.strip()