Back to index

plone3  3.1.7
validators.py
Go to the documentation of this file.
00001 #  ATContentTypes http://plone.org/products/atcontenttypes/
00002 #  Archetypes reimplementation of the CMF core types
00003 #  Copyright (c) 2003-2006 AT Content Types development team
00004 #
00005 #  This program is free software; you can redistribute it and/or modify
00006 #  it under the terms of the GNU General Public License as published by
00007 #  the Free Software Foundation; either version 2 of the License, or
00008 #  (at your option) any later version.
00009 #
00010 #  This program is distributed in the hope that it will be useful,
00011 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013 #  GNU General Public License for more details.
00014 #
00015 #  You should have received a copy of the GNU General Public License
00016 #  along with this program; if not, write to the Free Software
00017 #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00018 #
00019 """
00020 
00021 
00022 """
00023 __author__  = 'Christian Heimes <tiran@cheimes.de>'
00024 __docformat__ = 'restructuredtext'
00025 
00026 from types import FileType
00027 from Acquisition import aq_base
00028 
00029 from Products.ATContentTypes.config import HAS_MX_TIDY
00030 from Products.ATContentTypes.config import MX_TIDY_ENABLED
00031 from Products.ATContentTypes.config import MX_TIDY_MIMETYPES
00032 from Products.ATContentTypes.config import MX_TIDY_OPTIONS
00033 
00034 from Products.validation.config import validation
00035 from Products.validation.interfaces.IValidator import IValidator
00036 
00037 import re
00038 import encodings
00039 import logging
00040 logger = logging.getLogger('ATCT')
00041 
00042 from ZPublisher.HTTPRequest import FileUpload
00043 
00044 from zope.tal.htmltalparser import HTMLTALParser
00045 from zope.tal.talgenerator import TALGenerator
00046 from Products.PageTemplates.Expressions import getEngine
00047 
00048 if HAS_MX_TIDY:
00049     from mx.Tidy import tidy as mx_tidy
00050 
00051 # matches something like 'line 15 column 1 - Warning: missing ...'
00052 RE_MATCH_WARNING = re.compile('^line (\d+) column (\d+) - Warning: (.*)$')
00053 WARNING_LINE = 'line %d column %d - Warning: %s'
00054 
00055 # matches something like 'line 15 column 1 - Error: missing ...'
00056 RE_MATCH_ERROR = re.compile('^line (\d+) column (\d+) - Error: (.*)$')
00057 ERROR_LINE = 'line %d column %d - Error: %s'
00058 
00059 # the following regex is safe because *? matches the minimal text in the body tag
00060 # and .* matches the maximum text between two body tags including other body tags
00061 # if they exists
00062 RE_BODY = re.compile('<body[^>]*?>(.*)</body>', re.DOTALL )
00063 
00064 # get the encoding from an uploaded html-page 
00065 # e.g. <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"> 
00066 # we get ISO-8859-1 into the second match, the rest into the first and third. 
00067 RE_GET_HTML_ENCODING = re.compile('(<meta.*?content-type.*?charset[\s]*=[\s]*)([^"]*?)("[^>]*?>)', re.S | re.I) 
00068 
00069 # subtract 11 line numbers from the warning/error
00070 SUBTRACT_LINES = 11
00071 
00072 validatorList = []
00073 
00074 class TALValidator:
00075     """Validates a text to be valid TAL code
00076 
00077     """
00078     __implements__ = IValidator
00079 
00080     def __init__(self, name, title='', description=''):
00081         self.name = name
00082         self.title = title or name
00083         self.description = description
00084 
00085     def __call__(self, value, *args, **kw):
00086         gen = TALGenerator(getEngine(), xml=1, source_file=None)
00087         parser = HTMLTALParser(gen)
00088         try:
00089             parser.parseString(value)
00090         except Exception, err:
00091             return ("Validation Failed(%s): \n %s" % (self.name, err))
00092         return 1
00093 
00094 validatorList.append(TALValidator('isTAL', title='', description=''))
00095 
00096 
00097 class TidyHtmlValidator:
00098     """use mxTidy to check HTML
00099 
00100     Fail on errors and warnings
00101     Do not clean up the value
00102     """
00103 
00104     __implements__ = IValidator
00105 
00106     def __init__(self, name, title='', description=''):
00107         self.name = name
00108         self.title = title or name
00109         self.description = description
00110 
00111     def __call__(self, value, *args, **kw):
00112         if not (HAS_MX_TIDY and MX_TIDY_ENABLED):
00113             # no mxTidy installed
00114             return 1
00115 
00116         request = kw['REQUEST']
00117         field   = kw['field']
00118 
00119         result = doTidy(value, field, request)
00120         if result is None:
00121             return 1
00122 
00123         nerrors, nwarnings, outputdata, errordata = result
00124         errors = nerrors + nwarnings
00125 
00126         if errors:
00127             return ("Validation Failed(%s): \n %s" % (self.name, errordata))
00128         else:
00129             return 1
00130 
00131 validatorList.append(TidyHtmlValidator('isTidyHtml', title='', description=''))
00132 
00133 
00134 class TidyHtmlWithCleanupValidator:
00135     """use mxTidy to check HTML
00136 
00137     Fail only on errors
00138     Clean up
00139     """
00140 
00141     __implements__ = IValidator
00142 
00143     def __init__(self, name, title='', description=''):
00144         self.name = name
00145         self.title = title or name
00146         self.description = description
00147 
00148 
00149     def __call__(self, value, *args, **kw):
00150         if not (HAS_MX_TIDY and MX_TIDY_ENABLED):
00151             # no mxTidy installed
00152             return 1
00153 
00154         request = kw['REQUEST']
00155         field   = kw['field']
00156 
00157         result = doTidy(value, field, request, cleanup=1)
00158         if result is None:
00159             return 1
00160 
00161         nerrors, nwarnings, outputdata, errordata = result
00162         errors = nerrors
00163 
00164         # save the changed output in the request
00165         tidyAttribute = '%s_tidier_data' % field.getName()
00166         request[tidyAttribute] = outputdata
00167 
00168         if nwarnings:
00169             tidiedFields = list(request.get('tidiedFields', []))
00170             tidiedFields.append(field)
00171             request.set('tidiedFields', tidiedFields)
00172 
00173         if errors:
00174             return ("Validation Failed(%s): \n %s" % (self.name, errordata))
00175         else:
00176             return 1
00177 
00178 
00179 validatorList.append(TidyHtmlWithCleanupValidator('isTidyHtmlWithCleanup', title='', description=''))
00180 
00181 class NonEmptyFileValidator:
00182     """Fails on empty non-existant files
00183     """
00184 
00185     __implements__ = IValidator
00186 
00187     def __init__(self, name, title='', description=''):
00188         self.name = name
00189         self.title = title or name
00190         self.description = description
00191 
00192     def __call__(self, value, *args, **kwargs):
00193         instance = kwargs.get('instance', None)
00194         field    = kwargs.get('field', None)
00195 
00196         # calculate size
00197         if isinstance(value, FileUpload) or type(value) is FileType \
00198           or hasattr(aq_base(value), 'tell'):
00199             value.seek(0, 2) # eof
00200             size = value.tell()
00201             value.seek(0)
00202         else:
00203             try:
00204                 size = len(value)
00205             except TypeError:
00206                 size = 1
00207 
00208         if size == 0:
00209             return ("Validation failed: Uploaded file is empty")
00210         else:
00211             return True
00212 
00213 
00214 validatorList.append(NonEmptyFileValidator('isNonEmptyFile', title='', description=''))
00215 
00216 for validator in validatorList:
00217     # register the validators
00218     validation.register(validator)
00219 
00220 
00221 def doTidy(value, field, request, cleanup=0):
00222     """Tidy the data in 'value' for the field in the current request
00223 
00224     Optional cleanup:
00225       * removes header/footer of the output data
00226       * Removes warnings from the error data
00227 
00228     Return None for 'nothing done'
00229     else return (nerrors, nwarnings, outputdata, errordata)
00230     """
00231     # we can't use the mimetype from the field because it's updated *after*
00232     # validation so we must get it from the request
00233     tf_name     = '%s_text_format' % field.getName()
00234     text_format = getattr(request, tf_name, '')
00235 
00236     # MX_TIDY_MIMETYPES configuration option isn't empty
00237     # and the current text_format isn't in the list
00238     if MX_TIDY_MIMETYPES and text_format not in MX_TIDY_MIMETYPES:
00239         # do not filter this mime type
00240         return
00241 
00242     # it's a file upload
00243     if isinstance(value, FileUpload):
00244         # *mmh* ok it's a file upload but a file upload could destroy
00245         # the layout, too.
00246         # the validator can be called many times, we have to rewind
00247         # the FileUpload.
00248         value.seek(0)
00249         value = correctEncoding(value.read())
00250     else:
00251         value = wrapValueInHTML(value)
00252 
00253     result = mx_tidy(value, **MX_TIDY_OPTIONS)
00254     nerrors, nwarnings, outputdata, errordata = result
00255 
00256     # parse and change the error data
00257     errordata = parseErrorData(errordata, removeWarnings=cleanup)
00258     if cleanup:
00259         # unwrap tidied output data
00260         outputdata = unwrapValueFromHTML(outputdata)
00261 
00262     return nerrors, nwarnings, outputdata, errordata
00263 
00264 def wrapValueInHTML(value):
00265     """Wrap the data in a valid html construct to remove the missing title error
00266     """
00267     return """
00268 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
00269     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
00270 <html xmlns="http://www.w3.org/1999/xhtml">
00271 <head>
00272 <title></title>
00273 </head>
00274 <body>
00275 %s
00276 </body>
00277 </html>
00278 """ % value
00279 
00280 def unwrapValueFromHTML(value):
00281     """Remove the html stuff around the body
00282     """
00283     # get the body text
00284     result = RE_BODY.search(value)
00285     if result:
00286         body = result.group(1)
00287     else:
00288         raise ValueError('%s is not a html string' % value)
00289 
00290 ##    # remove 2 spaces from the beginning of each line
00291 ##    nlines = []
00292 ##    for line in body.split('\n'):
00293 ##        print line
00294 ##        if line[:2] == '  ':
00295 ##            nlines.append(line[2:])
00296 ##        else:
00297 ##            nlines.append(line)
00298 ##
00299 ##    return '\n'.join(nlines)
00300     return body
00301 
00302 def correctEncoding(value):
00303     """correct the encoding of a html-page if we know it an mxTidy
00304        expects an other encoding 
00305     """
00306 
00307     # we have nothing to do if mxTidy has no
00308     # fixed char_encoding
00309     if not MX_TIDY_OPTIONS.has_key('char_encoding')  \
00310            or ( MX_TIDY_OPTIONS['char_encoding'] == 'raw'):
00311         return value
00312 
00313     match = RE_GET_HTML_ENCODING.search(value)
00314     if match:
00315         groups = match.groups()
00316 
00317         # lookup encodings in the pyhon encodings database
00318         # returns function-pointers that we can compare
00319         # need to normalize encodings a bit before
00320         html_encoding = groups[1].strip().lower()
00321         char_encoding = MX_TIDY_OPTIONS['char_encoding'].lower().strip()
00322         h_enc = encodings.search_function(html_encoding)
00323         c_enc = encodings.search_function(char_encoding)
00324 
00325         # one encoding is missing or they are equal
00326         if not (h_enc and c_enc) or  h_enc == c_enc:
00327             return value
00328         else:
00329             try:
00330                 return unicode(value, html_encoding).encode(char_encoding)
00331             except:
00332                 logger.info("Error correcting encoding from %s to %s" % (html_encoding, char_encoding))
00333     return value
00334 
00335 def parseErrorData(data, removeWarnings=0):
00336     """Parse the error data to change some stuff
00337     """
00338     lines  = data.split('\n')
00339     nlines = []
00340     for line in lines:
00341         # substract 11 lines from line
00342         error = RE_MATCH_ERROR.search(line)
00343         if error:
00344             # an error line
00345             lnum, cnum, text = error.groups()
00346             lnum  = int(lnum) - SUBTRACT_LINES
00347             cnum  = int(cnum)
00348             nlines.append(ERROR_LINE % (lnum, cnum, text))
00349         else:
00350             warning = RE_MATCH_WARNING.search(line)
00351             if warning and not removeWarnings:
00352                 # a warning line and add warnings to output
00353                 lnum, cnum, text = warning.groups()
00354                 lnum  = int(lnum) - SUBTRACT_LINES
00355                 cnum  = int(cnum)
00356                 nlines.append(WARNING_LINE % (lnum, cnum, text))
00357             else:
00358                 # something else
00359                 nlines.append(line)
00360     return '\n'.join(nlines)