Back to index

plone3  3.1.7
MimeTypesRegistry.py
Go to the documentation of this file.
00001 import os
00002 import re
00003 import fnmatch
00004 from types import UnicodeType
00005 from zope.interface import implements
00006 from zope.contenttype import guess_content_type
00007 
00008 from OFS.Folder import Folder
00009 from Globals import InitializeClass
00010 from Acquisition import aq_base
00011 from Globals import PersistentMapping
00012 from AccessControl import ClassSecurityInfo
00013 from BTrees.OOBTree import OOBTree
00014 from Products.CMFCore.permissions import ManagePortal
00015 
00016 from Products.CMFCore.ActionProviderBase import ActionProviderBase
00017 from Products.CMFCore.utils import registerToolInterface
00018 from Products.CMFCore.utils import UniqueObject
00019 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
00020 
00021 from Products.MimetypesRegistry.interfaces import ISourceAdapter
00022 from Products.MimetypesRegistry.interfaces import IMimetypesRegistry
00023 from Products.MimetypesRegistry.interfaces import IMimetypesRegistryTool
00024 from Products.MimetypesRegistry.interfaces import IMimetype
00025 from Products.MimetypesRegistry.interfaces import IClassifier
00026 from Products.MimetypesRegistry.MimeTypeItem import MimeTypeItem
00027 from Products.MimetypesRegistry.mime_types import initialize
00028 from Products.MimetypesRegistry.mime_types import magic
00029 from Products.MimetypesRegistry.common import log
00030 from Products.MimetypesRegistry.common import MimeTypeException
00031 from Products.MimetypesRegistry.common import _www
00032 from Products.MimetypesRegistry.encoding import guess_encoding
00033 
00034 suffix_map = {
00035     'tgz': '.tar.gz',
00036     'taz': '.tar.gz',
00037     'tz': '.tar.gz',
00038     }
00039 
00040 encodings_map = {
00041     'gz': 'gzip',
00042     'Z': 'compress',
00043     }
00044 
00045 class MimeTypesRegistry(UniqueObject, ActionProviderBase, Folder):
00046     """Mimetype registry that deals with
00047     a) registering types
00048     b) wildcarding of rfc-2046 types
00049     c) classifying data into a given type
00050     """
00051 
00052     __implements__ = (IMimetypesRegistry, ISourceAdapter)
00053     implements(IMimetypesRegistryTool)
00054 
00055     id        = 'mimetypes_registry'
00056     meta_type = 'MimeTypes Registry'
00057     isPrincipiaFolderish = 1 # Show up in the ZMI
00058 
00059     meta_types = all_meta_types = (
00060         { 'name'   : 'MimeType',
00061           'action' : 'manage_addMimeTypeForm'},
00062         )
00063 
00064     manage_options = (
00065         ( { 'label'   : 'MimeTypes',
00066             'action' : 'manage_main'},) +
00067         Folder.manage_options[2:]
00068         )
00069 
00070     manage_addMimeTypeForm = PageTemplateFile('addMimeType', _www)
00071     manage_main = PageTemplateFile('listMimeTypes', _www)
00072     manage_editMimeTypeForm = PageTemplateFile('editMimeType', _www)
00073 
00074     security = ClassSecurityInfo()
00075 
00076     # FIXME
00077     __allow_access_to_unprotected_subobjects__ = 1
00078 
00079     def __init__(self,):
00080         self.encodings_map = encodings_map.copy()
00081         self.suffix_map = suffix_map.copy()
00082         # Major key -> minor IMimetype objects
00083         self._mimetypes  = PersistentMapping()
00084         # ext -> IMimetype mapping
00085         self.extensions = PersistentMapping()
00086         # glob -> (regex, mimetype) mapping
00087         self.globs = OOBTree()
00088         self.manage_addProperty('defaultMimetype', 'text/plain', 'string')
00089         self.manage_addProperty('unicodePolicies', 'strict ignore replace',
00090                                 'tokens')
00091         self.manage_addProperty('unicodePolicy', 'unicodePolicies', 'selection')
00092         self.manage_addProperty('fallbackEncoding', 'latin1', 'string')
00093 
00094         # initialize mime types
00095         initialize(self)
00096         self._new_style_mtr = 1
00097 
00098     security.declareProtected(ManagePortal, 'register')
00099     def register(self, mimetype):
00100         """ Register a new mimetype
00101 
00102         mimetype must implement IMimetype
00103         """
00104         mimetype = aq_base(mimetype)
00105         assert IMimetype.isImplementedBy(mimetype)
00106         for t in mimetype.mimetypes:
00107             self.register_mimetype(t, mimetype)
00108         for extension in mimetype.extensions:
00109             self.register_extension(extension, mimetype)
00110         for glob in mimetype.globs:
00111             self.register_glob(glob, mimetype)
00112 
00113     security.declareProtected(ManagePortal, 'register_mimetype')
00114     def register_mimetype(self, mt, mimetype):
00115         major, minor = split(mt)
00116         if not major or not minor or minor == '*':
00117             raise MimeTypeException('Can\'t register mime type %s' % mt)
00118         group = self._mimetypes.setdefault(major, PersistentMapping())
00119         if group.has_key(minor):
00120             if group.get(minor) != mimetype:
00121                 log('Warning: redefining mime type %s (%s)' % (
00122                     mt, mimetype.__class__))
00123         group[minor] = mimetype
00124 
00125     security.declareProtected(ManagePortal, 'register_extension')
00126     def register_extension(self, extension, mimetype):
00127         """ Associate a file's extension to a IMimetype
00128 
00129         extension is a string representing a file extension (not
00130         prefixed by a dot) mimetype must implement IMimetype
00131         """
00132         mimetype = aq_base(mimetype)
00133         if self.extensions.has_key(extension):
00134             if self.extensions.get(extension) != mimetype:
00135                 log('Warning: redefining extension %s from %s to %s' % (
00136                     extension, self.extensions[extension], mimetype))
00137         # we don't validate fmt yet, but its ["txt", "html"]
00138         self.extensions[extension] = mimetype
00139 
00140     security.declareProtected(ManagePortal, 'register_glob')
00141     def register_glob(self, glob, mimetype):
00142         """ Associate a glob to a IMimetype
00143 
00144         glob is a shell-like glob that will be translated to a regex
00145         to match against whole filename.
00146         mimetype must implement IMimetype.
00147         """
00148         globs = getattr(self, 'globs', None)
00149         if globs is None:
00150             self.globs = globs = OOBTree()
00151         mimetype = aq_base(mimetype)
00152         existing = globs.get(glob)
00153         if existing is not None:
00154             regex, mt = existing
00155             if mt != mimetype:
00156                 log('Warning: redefining glob %s from %s to %s' % (
00157                     glob, mt, mimetype))
00158         # we don't validate fmt yet, but its ["txt", "html"]
00159         pattern = re.compile(fnmatch.translate(glob))
00160         globs[glob] = (pattern, mimetype)
00161 
00162     security.declareProtected(ManagePortal, 'unregister')
00163     def unregister(self, mimetype):
00164         """ Unregister a new mimetype
00165 
00166         mimetype must implement IMimetype
00167         """
00168         assert IMimetype.isImplementedBy(mimetype)
00169         for t in mimetype.mimetypes:
00170             major, minor = split(t)
00171             group = self._mimetypes.get(major, {})
00172             if group.get(minor) == mimetype:
00173                 del group[minor]
00174         for e in mimetype.extensions:
00175             if self.extensions.get(e) == mimetype:
00176                 del self.extensions[e]
00177         globs = getattr(self, 'globs', None)
00178         if globs is not None:
00179             for glob in mimetype.globs:
00180                 existing = globs.get(glob)
00181                 if existing is None:
00182                     continue
00183                 regex, mt = existing
00184                 if mt == mimetype:
00185                     del globs[glob]
00186 
00187     security.declarePublic('mimetypes')
00188     def mimetypes(self):
00189         """Return all defined mime types, each one implements at least
00190         IMimetype
00191         """
00192         res = {}
00193         for g in self._mimetypes.values():
00194             for mt in g.values():
00195                 res[mt] =1
00196         return [aq_base(mtitem) for mtitem in res.keys()]
00197 
00198 
00199     security.declarePublic('list_mimetypes')
00200     def list_mimetypes(self):
00201         """Return all defined mime types, as string"""
00202         return [str(mt) for mt in self.mimetypes()]
00203 
00204     security.declarePublic('lookup')
00205     def lookup(self, mimetypestring):
00206         """Lookup for IMimetypes object matching mimetypestring
00207 
00208         mimetypestring may have an empty minor part or containing a
00209         wildcard (*) mimetypestring may and IMimetype object (in this
00210         case it will be returned unchanged
00211 
00212         Return a list of mimetypes objects associated with the
00213         RFC-2046 name return an empty list if no one is known.
00214         """
00215         if IMimetype.isImplementedBy(mimetypestring):
00216             return (aq_base(mimetypestring), )
00217         __traceback_info__ = (repr(mimetypestring), str(mimetypestring))
00218         major, minor = split(str(mimetypestring))
00219         group = self._mimetypes.get(major, {})
00220         if not minor or minor == '*':
00221             res = group.values()
00222         else:
00223             res = group.get(minor)
00224             if res:
00225                 res = (res,)
00226             else:
00227                 return ()
00228         return tuple([aq_base(mtitem) for mtitem in res])
00229 
00230     security.declarePublic('lookupExtension')
00231     def lookupExtension(self, filename):
00232         """Lookup for IMimetypes object matching filename
00233 
00234         Filename maybe a file name like 'content.txt' or an extension
00235         like 'rest'
00236 
00237         Return an IMimetype object associated with the file's
00238         extension or None
00239         """
00240         if filename.find('.') != -1:
00241             base, ext = os.path.splitext(filename)
00242             ext = ext[1:] # remove the dot
00243             while self.suffix_map.has_key(ext):
00244                 base, ext = os.path.splitext(base + self.suffix_map[ext])
00245                 ext = ext[1:] # remove the dot
00246         else:
00247             ext = filename
00248             base = None
00249 
00250         # XXX This code below make no sense and may break because base
00251         # isn't defined.
00252         if self.encodings_map.has_key(ext) and base:
00253             encoding = self.encodings_map[ext]
00254             base, ext = os.path.splitext(base)
00255             ext = ext[1:] # remove the dot
00256         else:
00257             encoding = None
00258         return aq_base(self.extensions.get(ext))
00259 
00260     security.declarePublic('globFilename')
00261     def globFilename(self, filename):
00262         """Lookup for IMimetypes object matching filename
00263 
00264         Filename must be a complete filename with extension.
00265 
00266         Return an IMimetype object associated with the glob's or None
00267         """
00268         globs = getattr(self, 'globs', None)
00269         if globs is None:
00270             return None
00271         for key in globs.keys():
00272             glob, mimetype = globs[key]
00273             if glob.match(filename):
00274                 return aq_base(mimetype)
00275         return None
00276 
00277     security.declarePublic('lookupGlob')
00278     def lookupGlob(self, glob):
00279         globs = getattr(self, 'globs', None)
00280         if globs is None:
00281             return None
00282         return aq_base(globs.get(glob))
00283 
00284     def _classifiers(self):
00285         return [mt for mt in self.mimetypes() if IClassifier.isImplementedBy(mt)]
00286 
00287     security.declarePublic('classify')
00288     def classify(self, data, mimetype=None, filename=None):
00289         """Classify works as follows:
00290         1) you tell me the rfc-2046 name and I give you an IMimetype
00291            object
00292         2) the filename includes an extension from which we can guess
00293            the mimetype
00294         3) we can optionally introspect the data
00295         4) default to self.defaultMimetype if no data was provided
00296            else to application/octet-stream of no filename was provided,
00297            else to text/plain
00298 
00299         Return an IMimetype object or None 
00300         """
00301         mt = None
00302         if mimetype:
00303             mt = self.lookup(mimetype)
00304             if mt:
00305                 mt = mt[0]
00306         elif filename:
00307             mt = self.lookupExtension(filename)
00308             if mt is None:
00309                 mt = self.globFilename(filename)
00310         if data and not mt:
00311             for c in self._classifiers():
00312                 if c.classify(data):
00313                     mt = c
00314                     break
00315             if not mt:
00316                 mstr = magic.guessMime(data)
00317                 if mstr:
00318                     mt = self.lookup(mstr)[0]
00319         if not mt:
00320             if not data:
00321                 mtlist = self.lookup(self.defaultMimetype)
00322             elif filename:
00323                 mtlist = self.lookup('application/octet-stream')
00324             else:
00325                 failed = 'text/x-unknown-content-type'
00326                 filename = filename or ''
00327                 data = data or ''
00328                 ct, enc = guess_content_type(filename, data, None)
00329                 if ct == failed:
00330                     ct = 'text/plain'
00331                 mtlist = self.lookup(ct)
00332             if len(mtlist)>0:
00333                 mt = mtlist[0]
00334             else:
00335                 return None
00336 
00337         # Remove acquisition wrappers
00338         return aq_base(mt)
00339 
00340     def __call__(self, data, **kwargs):
00341         """ Return a triple (data, filename, mimetypeobject) given
00342         some raw data and optional paramters
00343 
00344         method from the isourceAdapter interface
00345         """
00346         mimetype = kwargs.get('mimetype', None)
00347         filename = kwargs.get('filename', None)
00348         encoding = kwargs.get('encoding', None)
00349         mt = None
00350         if hasattr(data, 'filename'):
00351             filename = os.path.basename(data.filename)
00352         elif hasattr(data, 'name'):
00353             filename = os.path.basename(data.name)
00354 
00355         if hasattr(data, 'read'):
00356             _data = data.read()
00357             if hasattr(data, 'seek'):
00358                 data.seek(0)
00359             data = _data
00360 
00361         # We need to figure out if data is binary and skip encoding if
00362         # it is
00363         mt = self.classify(data, mimetype=mimetype, filename=filename)
00364 
00365         if not mt.binary and not type(data) is UnicodeType:
00366             # if no encoding specified, try to guess it from data
00367             if encoding is None:
00368                 encoding = self.guess_encoding(data)
00369 
00370             # ugly workaround for
00371             # https://sourceforge.net/tracker/?func=detail&aid=1068001&group_id=75272&atid=543430
00372             # covered by
00373             # https://sourceforge.net/tracker/?func=detail&atid=355470&aid=843590&group_id=5470
00374             # dont remove this code unless python is fixed.
00375             if encoding is "macintosh":
00376                 encoding = 'mac_roman'
00377 
00378             try:
00379                 try:
00380                     data = unicode(data, encoding, self.unicodePolicy)
00381                 except (ValueError, LookupError):
00382                     # wrong unicodePolicy
00383                     data = unicode(data, encoding)
00384             except:
00385                 data = unicode(data, self.fallbackEncoding)
00386 
00387         return (data, filename, aq_base(mt))
00388 
00389     security.declarePublic('guess_encoding')
00390     def guess_encoding(self, data):
00391         """ Try to guess encoding from a text value if no encoding
00392         guessed, used the default charset from site properties (Zope)
00393         with a fallback to UTF-8 (should never happen with correct
00394         site_properties, but always raise Attribute error without
00395         Zope)
00396         """
00397         if type(data) is type(u''):
00398             # data maybe unicode but with another encoding specified
00399             data = data.encode('UTF-8')
00400         encoding = guess_encoding(data)
00401         if encoding is None:
00402             try:
00403                 site_props = self.portal_properties.site_properties
00404                 encoding = site_props.getProperty('default_charset', 'UTF-8')
00405             except:
00406                 encoding = 'UTF-8'
00407         return encoding
00408 
00409     security.declareProtected(ManagePortal, 'manage_delObjects')
00410     def manage_delObjects(self, ids, REQUEST=None):
00411         """ delete the selected mime types """
00412         for id in ids:
00413             self.unregister(self.lookup(id)[0])
00414         if REQUEST is not None:
00415             REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')
00416 
00417     security.declareProtected(ManagePortal, 'manage_addMimeType')
00418     def manage_addMimeType(self, id, mimetypes, extensions, icon_path,
00419                            binary=0, globs=None, REQUEST=None):
00420         """add a mime type to the tool"""
00421         mt = MimeTypeItem(id, mimetypes, extensions=extensions,
00422                           binary=binary, icon_path=icon_path, globs=globs)
00423         self.register(mt)
00424         if REQUEST is not None:
00425             REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')
00426 
00427     security.declareProtected(ManagePortal, 'manage_editMimeType')
00428     def manage_editMimeType(self, name, new_name, mimetypes, extensions,
00429                             icon_path, binary=0, globs=None, REQUEST=None):
00430         """Edit a mime type by name
00431         """
00432         mt = self.lookup(name)[0]
00433         self.unregister(mt)
00434         mt.edit(new_name, mimetypes, extensions, icon_path=icon_path,
00435                 binary=binary, globs=globs)
00436         self.register(mt)
00437         if REQUEST is not None:
00438             REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')
00439 
00440 InitializeClass(MimeTypesRegistry)
00441 registerToolInterface('mimetypes_registry', IMimetypesRegistryTool)
00442 
00443 
00444 def split(name):
00445     """ split a mime type in a (major / minor) 2-uple """
00446     try:
00447         major, minor = name.split('/', 1)
00448     except:
00449         raise MimeTypeException('Malformed MIME type (%s)' % name)
00450     return major, minor