Back to index

plone3  3.1.7
magic.py
Go to the documentation of this file.
00001 """
00002 magic.py
00003 
00004  Initial Author: Jason Petrone <jp@demonseed.net>
00005 
00006  Updated by Gabriel Wicke <groups@gabrielwicke.de>
00007     Thu Oct 16 23:00:03 CEST 2003
00008     with magic data from gnome-vfs-mime-magic
00009 
00010 """
00011 
00012 import re
00013 import struct
00014 import string
00015 from StringIO import StringIO
00016 from zipfile import ZipFile
00017 from zipfile import BadZipfile
00018 from xml.dom import minidom
00019 
00020 __version__ = '$Revision: 1.2 $'[11:-2]
00021 
00022 magic = [
00023 
00024     [0L, 'string', '=', '%PDF-', 'application/pdf'],
00025     [0L, 'string', '=', '\177ELF', 'application/x-executable-binary'],
00026     [0L, 'string', '=', '\004%!', 'application/postscript'],
00027     [0L, 'string', '=', '\000\000\001\272', 'video/mpeg'],
00028     [0L, 'string', '=', '\000\000\001\263', 'video/mpeg'],
00029     [0L, 'string', '=', '\x47\x3f\xff\x10', 'video/mpeg'],
00030     [0L, 'string', '=', '\377\330\377', 'image/jpeg'],
00031     [0L, 'string', '=', '\xed\xab\xee\xdb', 'application/x-rpm'],
00032     [0L, 'string', '=', 'Rar!', 'application/x-rar'],
00033     [257L, 'string', '=', 'ustar\0', 'application/x-tar'],
00034     [257L, 'string', '=', 'ustar\040\040\0', 'application/x-gtar'],
00035     # the following detection of OOo is according to 
00036     # http://books.evc-cit.info/oobook/ch01.html
00037     # and some heuristics found in hexeditor. if theres a better way to detect,
00038     # we should replace the signatures below.
00039     # best would to just read and evaluate the manifest file of the zip, but
00040     # the magic tests are running on the first 8kB, so we cant unzip the 
00041     # manifest in files >8kB.
00042     [30L, 'string', '=', 'mimetypeapplication/vnd.sun.xml.writer', 
00043      'application/vnd.sun.xml.writer'],
00044     [30L, 'string', '=', 'mimetypeapplication/vnd.sun.xml.calc', 
00045      'application/vnd.sun.xml.calc'],
00046     [30L, 'string', '=', 'mimetypeapplication/vnd.sun.xml.draw', 
00047      'application/vnd.sun.xml.draw'],
00048     [30L, 'string', '=', 'mimetypeapplication/vnd.sun.xml.impress', 
00049      'application/vnd.sun.xml.impress'],
00050     [30L, 'string', '=', 'mimetypeapplication/vnd.sun.xml.chart', 
00051      'application/vnd.sun.xml.chart'],
00052     [30L, 'string', '=', 'mimetypeapplication/vnd.sun.xml.global', 
00053      'application/vnd.sun.xml.global'],
00054     # zip works now, after we have it with lower priority than OOo
00055     [0L, 'string', '=', 'PK\003\004', 'application/zip'],
00056     [0L, 'string', '=', 'GIF8', 'image/gif'],
00057     [4L, 'string', '=', 'moov', 'video/quicktime'],
00058     [4L, 'string', '=', 'mdat', 'video/quicktime'],
00059     [8L, 'string', '=', 'mp42', 'video/quicktime'],
00060     [12L, 'string', '=', 'mdat', 'video/quicktime'],
00061     [36L, 'string', '=', 'mdat', 'video/quicktime'],
00062     [0L, 'belong', '=', '0x3026b275', 'video/x-ms-asf'],
00063     [0L, 'string', '=', 'ASF ', 'audio/x-ms-asx'],
00064     [0L, 'string', '=', '<ASX', 'audio/x-ms-asx'],
00065     [0L, 'string', '=', '<asx', 'audio/x-ms-asx'],
00066     [0L, 'string', '=', 'MThd', 'audio/x-midi'],
00067     [0L, 'string', '=', 'IMPM', 'audio/x-it'],
00068     [2L, 'string', '=', '-lh0-', 'application/x-lha'],
00069     [2L, 'string', '=', '-lh1-', 'application/x-lha'],
00070     [2L, 'string', '=', '-lz4-', 'application/x-lha'],
00071     [2L, 'string', '=', '-lz5-', 'application/x-lha'],
00072     [2L, 'string', '=', '-lzs-', 'application/x-lha'],
00073     [2L, 'string', '=', '-lh\40-', 'application/x-lha'],
00074     [2L, 'string', '=', '-lhd-', 'application/x-lha'],
00075     [2L, 'string', '=', '-lh2-', 'application/x-lha'],
00076     [2L, 'string', '=', '-lh3-', 'application/x-lha'],
00077     [2L, 'string', '=', '-lh4-', 'application/x-lha'],
00078     [2L, 'string', '=', '-lh5-', 'application/x-lha'],
00079     [20L, 'string', '=', '\375\304\247\334', 'application/x-zoo'],
00080     [0L, 'string', '=', 'StuffIt ', 'application/x-stuffit'],
00081     [11L, 'string', '=', 'must be converted with BinHex', 'application/mac-binhex40'],
00082     [102L, 'string', '=', 'mBIN', 'application/x-macbinary'],
00083     [4L, 'string', '=', 'gtktalog ', 'application/x-gtktalog'],
00084     [0L, 'string', '=', 'diff ', 'text/x-patch'],
00085     [0L, 'string', '=', 'Index:', 'text/x-patch'],
00086     [0L, 'string', '=', '*** ', 'text/x-patch'],
00087     [0L, 'string', '=', 'Only in ', 'text/x-patch'],
00088     [0L, 'string', '=', 'Common subdirectories: ', 'text/x-patch'],
00089     [0L, 'string', '=', 'FONT', 'application/x-font-vfont'],
00090     [0L, 'string', '=', 'IIN1', 'image/tiff'],
00091     [0L, 'string', '=', 'MM\x00\x2a', 'image/tiff'],
00092     [0L, 'string', '=', 'II\x2a\x00', 'image/tiff'],
00093     [0L, 'string', '=', '\x89PNG', 'image/png'],
00094     [0L, 'string', '=', '8BPS\ \ \000\000\000\000 &0xffffffff0000ffffffff', 'image/x-psd'],
00095     [0L, 'string', '=', '#LyX', 'text/x-lyx'],
00096     [0L, 'string', '=', 'DCMw', 'image/x-dcm'],
00097     [0L, 'string', '=', 'gimp xcf', 'application/x-gimp-image'],
00098     [0L, 'belong', '=', '0x59a66a95', 'image/x-sun-raster'],
00099     [0L, 'belong', '=', '0x01da0000 &0xfcfeffff', 'image/x-sgi'],
00100     [0L, 'belong', '=', '0xb168de3a', 'image/x-pcx'],
00101     [0L, 'string', '=', '\x28\x00\x00\x00', 'image/x-dib'],
00102     [0L, 'string', '=', 'SIMPLE  =', 'image/x-fits'],
00103     [0L, 'belong', '=', '0x46506978', 'image/x-fpx'],
00104     [0L, 'belong', '=', '0x00000200', 'image/x-icb'],
00105     [0L, 'belong', '=', '0x53445058', 'image/x-dpx'],
00106     [0L, 'string', '=', '[Desktop Entry]', 'application/x-gnome-app-info'],
00107     [0L, 'string', '=', '[X-GNOME-Metatheme]', 'application/x-gnome-theme'],
00108     [0L, 'string', '=', '<nautilus_object nautilus_link', 'application/x-nautilus-link'],
00109     [0L, 'string', '=', 'URL:', 'application/x-gmc-link'],
00110     [0L, 'string', '=', '/* XPM */', 'image/x-xpixmap'],
00111     [0L, 'string', '=', '<!DOCTYPE xbel', 'application/xbel'],
00112     [0L, 'string', '=', '<xbel', 'application/xbel'],
00113     [0L, 'string', '=', '<!DOCTYPE NETSCAPE-Bookmark-file-1>', 'application/x-mozilla-bookmarks'],
00114     [0L, 'string', '=', '<!DOCTYPE NETSCAPE-Bookmark-file-1>', 'application/x-netscape-bookmarks'],
00115     [0L, 'string', '=', '<ephy_bookmarks        ', 'application/x-epiphany-bookmarks'],
00116     [0L, 'string', '=', '<!DOCTYPE svg', 'image/svg'],
00117     [0L, 'string', '=', '<svg', 'image/svg'],
00118     [0L, 'string', '=', '<?php', 'application/x-php'],
00119     [0L, 'string', '=', '<smil>', 'application/x-smil'],
00120     [0L, 'string', '=', '<SMIL>', 'application/x-smil'],
00121     [0L, 'string', '=', '<!DOCTYPE HTML', 'text/html'],
00122     [0L, 'string', '=', '<!DOCTYPE html', 'text/html'],
00123     [0L, 'string', '=', '<!doctype html', 'text/html'],
00124     [0L, 'string', '=', '<!doctype Html', 'text/html'],
00125     [0L, 'string', '=', '<!doctype HTML', 'text/html'],
00126     [10L, 'string', '=', '<HEAD', 'text/html'],
00127     [10L, 'string', '=', '<head', 'text/html'],
00128     [16L, 'string', '=', '<TITLE', 'text/html'],
00129     [16L, 'string', '=', '<title', 'text/html'],
00130     [10L, 'string', '=', '<html', 'text/html'],
00131     [0L, 'string', '=', '<HTML', 'text/html'],
00132     [0L, 'string', '=', '<dia:diagram', 'application/x-dia-diagram'],
00133     [0L, 'string', '=', '<abiword', 'application/x-abiword'],
00134     [0L, 'string', '=', '<\!DOCTYPE abiword', 'application/x-abiword'],
00135     [0L, 'string', '=', 'gmr:Workbook', 'application/x-gnumeric'],
00136     [0L, 'string', '=', '<?xml', 'text/xml'],
00137     [0L, 'string', '=', '{\\rtf', 'application/rtf'],
00138     [0L, 'string', '=', '#!/bin/sh', 'text/x-sh'],
00139     [0L, 'string', '=', '#!/bin/bash', 'text/x-sh'],
00140     [0L, 'string', '=', '#!/bin/csh', 'text/x-csh'],
00141     [0L, 'string', '=', '#!/bin/ksh', 'text/x-ksh'],
00142     [0L, 'string', '=', '#!/bin/perl', 'text/x-perl'],
00143     [0L, 'string', '=', '#!/bin/zsh', 'text/x-zsh'],
00144     [1L, 'string', '=', '/bin/sh', 'text/x-sh'],
00145     [1L, 'string', '=', '/bin/bash', 'text/x-sh'],
00146     [1L, 'string', '=', '/bin/csh', 'text/x-csh'],
00147     [1L, 'string', '=', '/bin/ksh', 'text/x-ksh'],
00148     [1L, 'string', '=', '/bin/perl', 'text/x-perl'],
00149     [0L, 'string', '=', 'BEGIN:VCARD', 'text/x-vcard'],
00150     [0L, 'string', '=', 'BEGIN:VCALENDAR', 'text/calendar'],
00151     [8L, 'string', '=', 'CDR vrsn', 'application/vnd.corel-draw'],
00152     [8L, 'string', '=', 'AVI ', 'video/x-msvideo'],
00153     [0L, 'string', '=', 'MOVI', 'video/x-sgi-movie'],
00154     [0L, 'string', '=', '.snd', 'audio/basic'],
00155     [8L, 'string', '=', 'AIFC', 'audio/x-aifc'],
00156     [8L, 'string', '=', 'AIFF', 'audio/x-aiff'],
00157     [0L, 'string', '=', '.ra\375', 'audio/x-pn-realaudio'],
00158     [0L, 'belong', '=', '0x2e7261fd', 'audio/x-pn-realaudio'],
00159     [0L, 'string', '=', '.RMF', 'audio/x-pn-realaudio'],
00160     [8L, 'string', '=', 'WAVE', 'audio/x-wav'],
00161     [8L, 'string', '=', 'WAV ', 'audio/x-wav'],
00162     [0L, 'string', '=', 'ID3', 'audio/mpeg'],
00163     [0L, 'string', '=', '0xfff0', 'audio/mpeg'],
00164     [0L, 'string', '=', '\x00\x00\x01\xba', 'video/mpeg'],
00165     [8L, 'string', '=', 'CDXA', 'video/mpeg'],
00166     [0L, 'belong', '=', '0x000001ba', 'video/mpeg'],
00167     [0L, 'belong', '=', '0x000001b3', 'video/mpeg'],
00168     [0L, 'string', '=', 'RIFF', 'audio/x-riff'],
00169     [0L, 'string', '=', 'OggS   ', 'application/ogg'],
00170     [0L, 'string', '=', 'pnm:\/\/', 'audio/x-real-audio'],
00171     [0L, 'string', '=', 'rtsp:\/\/', 'audio/x-real-audio'],
00172     [0L, 'string', '=', 'SIT!', 'application/x-stuffit'],
00173     [0L, 'string', '=', '\312\376\272\276', 'application/x-java-byte-code'],
00174     [0L, 'string', '=', 'Joy!', 'application/x-pef-executable'],
00175     [4L, 'string', '=', '\x11\xAF', 'video/x-fli'],
00176     [4L, 'string', '=', '\x12\xAF', 'video/x-flc'],
00177     [0L, 'string', '=', '\x31\xbe\x00\x00', 'application/msword'],
00178     [0L, 'string', '=', 'PO^Q`', 'application/msword'],
00179     [0L, 'string', '=', '\376\067\0\043', 'application/msword'],
00180     [0L, 'string', '=', '\320\317\021\340\241\261', 'application/msword'],
00181     [0L, 'string', '=', '\333\245-\0\0\0', 'application/msword'],
00182     [0L, 'string', '=', 'Microsoft Excel 5.0 Worksheet', 'application/vnd.ms-excel'],
00183     [0L, 'string', '=', 'Biff5', 'application/vnd.ms-excel'],
00184     [0L, 'string', '=', '*BEGIN SPREADSHEETS    ', 'application/x-applix-spreadsheet'],
00185     [0L, 'string', '=', '*BEGIN SPREADSHEETS    ', 'application/x-applix-spreadsheet'],
00186     [0L, 'string', '=', '\x00\x00\x02\x00', 'application/vnd.lotus-1-2-3'],
00187     [0L, 'belong', '=', '0x00001a00', 'application/vnd.lotus-1-2-3'],
00188     [0L, 'belong', '=', '0x00000200', 'application/vnd.lotus-1-2-3'],
00189     [0L, 'string', '=', 'PSID', 'audio/prs.sid'],
00190     [31L, 'string', '=', 'Oleo', 'application/x-oleo'],
00191     [0L, 'string', '=', 'FFIL', 'application/x-font-ttf'],
00192     [65L, 'string', '=', 'FFIL', 'application/x-font-ttf'],
00193     [0L, 'string', '=', 'LWFN', 'application/x-font-type1'],
00194     [65L, 'string', '=', 'LWFN', 'application/x-font-type1'],
00195     [0L, 'string', '=', 'StartFont', 'application/x-font-sunos-news'],
00196     [0L, 'string', '=', '\x13\x7A\x29', 'application/x-font-sunos-news'],
00197     [8L, 'string', '=', '\x13\x7A\x2B', 'application/x-font-sunos-news'],
00198     [0L, 'string', '=', '%!PS-AdobeFont-1.', 'application/x-font-type1'],
00199     [6L, 'string', '=', '%!PS-AdobeFont-1.', 'application/x-font-type1'],
00200     [0L, 'string', '=', '%!FontType1-1.', 'application/x-font-type1'],
00201     [6L, 'string', '=', '%!FontType1-1.', 'application/x-font-type1'],
00202     [0L, 'string', '=', 'STARTFONT\040', 'application/x-font-bdf'],
00203     [0L, 'string', '=', '\001fcp', 'application/x-font-pcf'],
00204     [0L, 'string', '=', 'D1.0\015', 'application/x-font-speedo'],
00205     [0L, 'string', '=', '\x14\x02\x59\x19', 'application/x-font-libgrx'],
00206     [0L, 'string', '=', '\xff\x46\x4f\x4e', 'application/x-font-dos'],
00207     [7L, 'string', '=', '\x00\x45\x47\x41', 'application/x-font-dos'],
00208     [7L, 'string', '=', '\x00\x56\x49\x44', 'application/x-font-dos'],
00209     [0L, 'string', '=', '<MakerScreenFont', 'application/x-font-framemaker'],
00210     [0L, 'string', '=', '\000\001\000\000\000', 'application/x-font-ttf'],
00211     [1L, 'string', '=', 'WPC', 'application/x-wordperfect'],
00212     [0L, 'string', '=', 'ID;', 'text/spreadsheet'],
00213     [0L, 'string', '=', 'MZ', 'application/x-ms-dos-executable'],
00214     [0L, 'string', '=', '%!', 'application/postscript'],
00215     [0L, 'string', '=', 'BZh', 'application/x-bzip'],
00216     [0L, 'string', '=', '\x1f\x8b', 'application/x-gzip'],
00217     [0L, 'string', '=', '\037\235', 'application/x-compress'],
00218     [0L, 'string', '=', '\367\002', 'application/x-dvi'],
00219     [0L, 'string', '=', '\367\203', 'application/x-font-tex'],
00220     [0L, 'string', '=', '\367\131', 'application/x-font-tex'],
00221     [0L, 'string', '=', '\367\312', 'application/x-font-tex'],
00222     [2L, 'string', '=', '\000\022', 'application/x-font-tex-tfm'],
00223     [0L, 'string', '=', '\x36\x04', 'application/x-font-linux-psf'],
00224     [0L, 'string', '=', 'FWS', 'application/x-shockwave-flash'],
00225     [0L, 'string', '=', 'CWS', 'application/x-shockwave-flash'],
00226     [0L, 'string', '=', 'NSVf', 'video/x-nsv'],
00227     [0L, 'string', '=', 'BMxxxx\000\000 &0xffff00000000ffff', 'image/bmp'],
00228     [0L, 'string', '=', 'Return-Path:', 'message/rfc822'],
00229     [0L, 'string', '=', 'Path:', 'message/news'],
00230     [0L, 'string', '=', 'Xref:', 'message/news'],
00231     [0L, 'string', '=', 'From:', 'message/rfc822'],
00232     [0L, 'string', '=', 'Article', 'message/news'],
00233     [0L, 'string', '=', 'Received:', 'message/rfc822'],
00234     [0L, 'string', '=', '[playlist]', 'audio/x-scpls'],
00235     [0L, 'string', '=', '[Reference]', 'video/x-ms-asf'],
00236     [0L, 'string', '=', 'fLaC', 'application/x-flac'],
00237     [32769L, 'string', '=', 'CD001', 'application/x-iso-image'],
00238     [37633L, 'string', '=', 'CD001', 'application/x-iso-image'],
00239     [32776L, 'string', '=', 'CDROM', 'application/x-iso-image'],
00240     [0L, 'string', '=', 'OTTO', 'application/x-font-otf'],
00241     [54L, 'string', '=', 'S T O P', 'application/x-ipod-firmware'],
00242     [0L, 'string', '=', 'BLENDER', 'application/x-blender'],
00243     [20L, 'string', '=', 'import', 'text/python-source'],
00244 ]
00245 
00246 magicNumbers = []
00247 
00248 def strToNum(n):
00249     val = 0
00250     col = long(1)
00251     if n[:1] == 'x': n = '0' + n
00252     if n[:2] == '0x':
00253         # hex
00254         n = string.lower(n[2:])
00255         while len(n) > 0:
00256             l = n[len(n) - 1]
00257             val = val + string.hexdigits.index(l) * col
00258             col = col * 16
00259             n = n[:len(n)-1]
00260     elif n[0] == '\\':
00261         # octal
00262         n = n[1:]
00263         while len(n) > 0:
00264             l = n[len(n) - 1]
00265             if ord(l) < 48 or ord(l) > 57: break
00266             val = val + int(l) * col
00267             col = col * 8
00268             n = n[:len(n)-1]
00269     else:
00270         val = string.atol(n)
00271     return val
00272 
00273 class magicTest:
00274     def __init__(self, offset, t, op, value, msg, mask = None):
00275         if t.count('&') > 0:
00276             mask = strToNum(t[t.index('&')+1:])
00277             t = t[:t.index('&')]
00278         if type(offset) == type('a'):
00279             self.offset = strToNum(offset)
00280         else:
00281             self.offset = offset
00282         self.type = t
00283         self.msg = msg
00284         self.subTests = []
00285         self.op = op
00286         self.mask = mask
00287         self.value = value
00288 
00289     def test(self, data):
00290         if self.mask:
00291             data = data & self.mask
00292         if self.op == '=':
00293             if self.value == data: return self.msg
00294         elif self.op ==  '<':
00295             pass
00296         elif self.op ==  '>':
00297             pass
00298         elif self.op ==  '&':
00299             pass
00300         elif self.op ==  '^':
00301             pass
00302         return None
00303 
00304     def compare(self, data):
00305     #print str([self.type, self.value, self.msg])
00306         try:
00307             if self.type == 'string':
00308                 c = ''; s = ''
00309                 for i in range(0, len(self.value)+1):
00310                     if i + self.offset > len(data) - 1: break
00311                     s = s + c
00312                     [c] = struct.unpack('c', data[self.offset + i])
00313                 data = s
00314             elif self.type == 'short':
00315                 [data] = struct.unpack('h', data[self.offset : self.offset + 2])
00316             elif self.type == 'leshort':
00317                 [data] = struct.unpack('<h', data[self.offset : self.offset + 2])
00318             elif self.type == 'beshort':
00319                 [data] = struct.unpack('>H', data[self.offset : self.offset + 2])
00320             elif self.type == 'long':
00321                 [data] = struct.unpack('l', data[self.offset : self.offset + 4])
00322             elif self.type == 'lelong':
00323                 [data] = struct.unpack('<l', data[self.offset : self.offset + 4])
00324             elif self.type == 'belong':
00325                 [data] = struct.unpack('>l', data[self.offset : self.offset + 4])
00326             else:
00327                 #print 'UNKNOWN TYPE: ' + self.type
00328                 pass
00329         except:
00330             return None
00331 
00332 #    print str([self.msg, self.value, data])
00333         return self.test(data)
00334 
00335 
00336 def guessMime(data):
00337     for test in magicNumbers:
00338         m = test.compare(data)
00339         if m: 
00340             return m
00341     # no matching, magic number.
00342     return
00343 
00344 #import sys
00345 for m in magic:
00346     magicNumbers.append(magicTest(m[0], m[1], m[2], m[3], m[4]))