Back to index

plone3  3.1.7
atxml.py
Go to the documentation of this file.
00001 ##################################################################
00002 # Marshall: A framework for pluggable marshalling policies
00003 # Copyright (C) 2004 ObjectRealms, LLC
00004 # Copyright @ 2004 Enfold Systems, LLC
00005 #
00006 # This program is free software; you can redistribute it and/or modify
00007 # it under the terms of the GNU General Public License as published by
00008 # the Free Software Foundation; either version 2 of the License, or
00009 # (at your option) any later version.
00010 #
00011 # This program is distributed in the hope that it will be useful,
00012 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 # GNU General Public License for more details.
00015 #
00016 # You should have received a copy of the GNU General Public License
00017 # along with this program; if not, write to the Free Software
00018 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019 ##################################################################
00020 
00021 """
00022 generic xml marshaller
00023 
00024  based on registering namespaces with the marshaller,
00025  the marshaller tries to delegate as much as possible
00026  to the namespaces, the default implementation of which
00027  delegates as much as possible to schema attributes
00028  within that namespace.
00029 
00030  see the Marshall.namespaces package for some sample and
00031  default namespaces.
00032  
00033 caveats
00034 
00035  - if you want to use multiple namespaces on the same
00036    xml node, then this isn't the parser for you. you
00037    can do some basic hacks around it w/ ParseContext
00038    namespace delegation.
00039 
00040 Authors: kapil thangavelu <k_vertigo@objectrealms.net> (current impl)
00041          sidnei da silva <sidnei@awkly.org>
00042          
00043 """
00044 
00045 #################################
00046 import sys
00047 import thread
00048 import traceback
00049 from cStringIO import StringIO
00050 from xml.dom import minidom
00051 try:
00052     from celementtree import ElementTree
00053 except ImportError:
00054     from elementtree import ElementTree
00055 from Products.Marshall.handlers.base import Marshaller
00056 from Products.Archetypes.debug import log
00057 from Products.Marshall import config
00058 from Products.Marshall.exceptions import MarshallingException
00059 from Products.Marshall import utils
00060 
00061 #################################
00062 
00063 _marker = object()
00064 
00065 XMLNS_NS = 'http://www.w3.org/2000/xmlns/'
00066 XMLREADER_START_ELEMENT_NODE_TYPE = 1
00067 XMLREADER_END_ELEMENT_NODE_TYPE = 15
00068 XMLREADER_TEXT_ELEMENT_NODE_TYPE = 3
00069 
00070 class SchemaAttributeDemarshallException(Exception):
00071     """Exception mus be raised when demershall of SchemaAtribute fails."""
00072 
00073 class ErrorCallback:
00074 
00075     def __init__(self):
00076         self.msgs = {}
00077 
00078     def __call__(self, ctx, msg):
00079         self.append(msg)
00080 
00081     def append(self, msg):
00082         tid = thread.get_ident()
00083         msgs = self.msgs.setdefault(tid, [])
00084         msgs.append(msg)
00085 
00086     def get(self, clear=False):
00087         tid = thread.get_ident()
00088         msgs = self.msgs.setdefault(tid, [])
00089         if clear: self.clear()
00090         return ''.join(msgs)
00091 
00092     def clear(self):
00093         tid = thread.get_ident()
00094         msgs = self.msgs[tid] = []
00095 
00096 
00097 class XmlNamespace(object):
00098 
00099 
00100     #################################
00101     # the framework does a bit of introspection on
00102     # namespaces for some attributes, defined below
00103 
00104     # whether or not this namespace uses fields from an
00105     # object's at schema. if true then this namespace
00106     # should also define the get getATFields below
00107     uses_at_fields = False
00108 
00109     # the xml namespace uri
00110     xmlns = "http://example.com"
00111 
00112     # the xml namespace prefix
00113     prefix = "xxx"
00114     #################################
00115 
00116     def __init__(self):
00117         for attribute in self.attributes:
00118             attribute.setNamespace( self )
00119 
00120     def getAttributeByName(self, name):
00121         """ given an xml name return the schema attribute
00122         """
00123         for attribute in self.attributes:
00124             if attribute.name == name:
00125                 return attribute
00126         return None
00127 
00128     def getRelaxNG(self):
00129         """ get the relaxng fragment that defines
00130         whats in the namespace
00131         """
00132         raise NotImplemented("Subclass Responsiblity")
00133 
00134     def getATFields(self):
00135         """ return the at schema field names which are
00136         handled by this namespace, this is utilized by
00137         the AT namespace so it doesn't also handle these
00138         fields. """
00139         raise NotImplemented("Subclass Responsiblity")
00140 
00141     def serialize(self, dom_node, parent_node, instance, options):
00142         """ serialize the instance values to xml
00143         based on attributes in this namespace
00144         """
00145         for attribute in self.attributes:
00146             attribute.serialize( dom_node, parent_node, instance)
00147 
00148     def deserialize(self, instance, ns_data, options):
00149         """ given the instance and the namespace data for
00150         instance, reconstitute this namespace's attributes
00151         on the instance.
00152         """
00153         if not ns_data:
00154             return 
00155         for attribute in self.attributes:
00156             try:
00157                 attribute.deserialize( instance, ns_data )
00158             except Exception, e:                
00159                 ec, e, tb = sys.exc_info()
00160                 ftb = traceback.format_tb(tb,)
00161                 msg = "failure while demarshalling schema attribute %s\n" % \
00162                       attribute.name 
00163                 msg += "data: %s\n" % ns_data.get(attribute.name, None)
00164                 msg += "original exception: %s\n" % str(ec)
00165                 msg += "original traceback:\n%s" % '\n'.join(ftb)
00166                 raise SchemaAttributeDemarshallException, msg
00167                 
00168 
00169     def processXml(self, context, node):
00170         """ handle the start of a xml tag with this namespace
00171         the namespace and the name of the tag are bound to node.
00172 
00173         if this method return false then the node is assumed to
00174         be junk and it is discarded.
00175         """ 
00176         tag, ns = utils.fixtag(node.tag,context.ns_map)
00177         attribute = self.getAttributeByName( tag )
00178         if attribute is None:
00179             return False
00180         node.attribute = attribute
00181         return attribute.processXml( context, node)
00182 
00183     def processXmlEnd(self, name, context):
00184         """ callback invoked when the parser reaches the
00185         end of an xml node in this namespace.
00186         """
00187 
00188     def getSchemaInfo( self ):
00189         """ return information on this namespace's rng schema
00190 
00191         should be an iterable of sets of ( 'defined_name', 'occurence', 'schema')
00192         where defined name is the name of any top level defined entities in
00193         the schema, occurence defines the rng occurence value for that entity
00194         in the object's xml representation, and schema is the rng schema
00195         definition for the defined entities
00196         """
00197         return ()
00198     
00199     
00200         
00201     
00202 
00203 class SchemaAttribute(object):
00204 
00205     def __init__(self, name, field_name=None):
00206         self.name, self.field_id = name, field_name or name
00207         self.namespace = None
00208         
00209     def set(self, instance, data):
00210         """ set the attribute's value on the instance
00211         """
00212         raise NotImplemented
00213 
00214     def get(self, instance):
00215         """ retrieve the schema attribute's value from the instance
00216         """
00217         raise NotImplemented
00218 
00219     def serialize(self, dom, instance):
00220         """ serialize the attribute's instance value into the dom
00221         """
00222         raise NotImplemented
00223 
00224     def deserialize(self, instance, ns_data):
00225         """ give the instance and the namespace data for
00226         instance, reconstitute this attribute on the instance
00227         """ 
00228         self.set( instance, ns_data )
00229 
00230     def processXml(self, context, ctx_node):
00231         """ callback invoked with a node from the xml stream
00232         if false is returned the current node is assumed to be
00233         not interesting and is ignored.
00234         """
00235         return True
00236 
00237     def processXmlValue(self, context, value):
00238         """ callback to process text nodes
00239         """
00240         value = value and value.strip()
00241         if not value:
00242             return
00243         data = context.getDataFor( self.namespace.xmlns )
00244         data[self.name] = value
00245 
00246     def setNamespace(self, namespace):
00247         """ sets which namespace the attribute belongs to
00248         """
00249         self.namespace = namespace
00250 
00251 class DataNode(object):
00252     """ a data bag holding a namespace uri and a node name
00253     """
00254     __slots__ = (
00255         'ns',
00256         'name',
00257         'attribute',
00258         )
00259 
00260     def __init__(self, ns, name):
00261         self.ns = ns
00262         self.name = name
00263         self.attribute = None
00264 
00265 class ParseContext(object):
00266     """ a bag for holding data values from and for parsing
00267     """
00268     def __init__(self, instance, root, ns_map):
00269         self.instance = instance
00270         self.root = root # root node
00271         self.ns_map = ns_map # ns_uri -> namepace
00272         self.data = {} # ns_uri -> ns_data
00273         self.node = None # current node if any
00274         self.ns_delegate = None
00275         
00276     def getDataFor(self, ns_uri):
00277         return self.data.setdefault(ns_uri, {})
00278 
00279     def getNamespaceFor(self, ns_uri):
00280         if self.ns_delegate is not None:
00281             return self.ns_delegate
00282         return self.ns_map.get( ns_uri )
00283 
00284     def setNamespaceDelegate( self, namespace):
00285         self.ns_delegate = namespace
00286 
00287 class ATXMLMarker: pass
00288 
00289 _marker = ATXMLMarker()
00290 
00291 class ATXMLMarshaller(Marshaller):
00292 
00293     # Just a plain list of ns objects.
00294     namespaces = []
00295 
00296     # options for a subclass
00297     use_validation = False
00298 
00299     
00300     def __init__(self, demarshall_hook=None, marshall_hook=None, 
00301                  root_tag='metadata', namespace=_marker):
00302         Marshaller.__init__(self, demarshall_hook, marshall_hook)
00303         self.root_tag = root_tag
00304         self.namespace = namespace
00305         if namespace is _marker:
00306             self.namespace = config.AT_NS
00307 
00308     def getFieldNamespace(self, field):
00309         namespaces = self.getNamespaceURIMap()
00310         # Flatten ns into (ns, attr) tuples
00311         flat_ns = []
00312         [flat_ns.extend(zip((n,)*len(n.attrs), n.attrs)) for n in namespaces]        
00313         # Dict mapping an AT fieldname to a (prefix, element name) tuple
00314         field_map = dict([(a.field, (n.prefix, a.name)) for n, a in flat_ns])
00315         return field_map
00316     
00317     def getNamespaceURIMap(self):
00318         """ Mapping of xmlns URI to ns object
00319         """
00320         ns_map = dict([(ns.xmlns, ns) for ns in self.namespaces])
00321         return ns_map
00322 
00323     def getNamespacePrefixMap(self):
00324         """ Mapping of prefix -> xmlns URI
00325         """
00326         prefix_map = dict([(ns.prefix, ns.xmlns) for ns in namespaces])
00327 
00328     def getNamespaces(self, namespaces=None):
00329         if namespaces is None:
00330             for ns in getRegisteredNamespaces():
00331                 yield ns
00332             raise StopIteration
00333 
00334         ns = getRegisteredNamespaces()
00335         for n in ns:
00336             if n.prefix in namespaces or \
00337                n.xmlns in namespaces:
00338                 yield n
00339 
00340     def demarshall(self, instance, data, **kwargs):
00341         context = self.parseContext( instance, data)
00342         self.processContext( instance, context, kwargs )
00343 
00344     def marshall(self, instance, use_namespaces=None, **kwargs):
00345         doc = minidom.Document()
00346         node = doc.createElementNS( self.namespace, self.root_tag)         
00347         doc.appendChild( node )
00348 
00349         # setup default namespace
00350         attr = doc.createAttribute('xmlns')
00351         attr.value = self.namespace
00352         node.setAttributeNode(attr)
00353     
00354         for ns in self.getNamespaces( use_namespaces ):
00355             ns.serialize( doc, node, instance, kwargs )
00356             if not ns.prefix:
00357                 continue
00358             attrname = 'xmlns:%s' % ns.prefix
00359             attr = doc.createAttribute(attrname)
00360             attr.value = ns.xmlns
00361             node.setAttributeNode(attr)                
00362 
00363         content_type = 'text/xml'
00364         data = doc.toprettyxml()#.encode('utf-8')
00365         length = len(data)
00366         return (content_type, length, data)
00367 
00368     def parseContext(self, instance, data):  
00369         #parser = XmlParser( instance, data, use_validation=self.use_validation)
00370         root = ElementTree.fromstring(data)
00371         ns_map = self.getNamespaceURIMap()
00372         context = ParseContext(instance, root, ns_map)
00373         context.xmlsource = data
00374         self.parseXml( root, context, ns_map )
00375 
00376 
00377         if self.use_validation: # and not reader.IsValid():
00378             errors = error_callback.get(clear=True)
00379             log(errors)
00380             raise MarshallingException, ("Input failed to validate against "
00381                                          "the ATXML RelaxNG schema.\n"
00382                                          "%s" % errors)
00383 
00384         return context
00385 
00386 
00387     def parseXml(self, root, context, ns_map):
00388         """
00389         input read and dispatch loop
00390         """
00391         read_result = 1
00392         for node in root:
00393             tag, namespace = utils.fixtag(node.tag, context.ns_map)
00394                 
00395             if namespace.processXml(context, node):
00396                 context.node=node
00397                 context.node.attribute.processXmlValue(context, node.text)
00398             else:
00399                 ## XXX: raise a warning that the attribute isnt defined in the schema
00400                 pass
00401 
00402         return read_result
00403 
00404     def processContext(self, instance, context, options):
00405         """ instantiate instance with data from context
00406         """
00407         for ns in getRegisteredNamespaces():
00408             ns_data = context.getDataFor( ns.xmlns )
00409             ns.deserialize( instance, ns_data, options )
00410 
00411 
00412 class _NamespaceCatalog( object ):
00413 
00414     def __init__( self ):
00415         self._namespaces = {}
00416         self._order = []
00417 
00418     def registerNamespace( self, namespace, override=False, position=-1):
00419         if namespace.xmlns in self._namespaces and not override:
00420             raise RuntimeError("Duplicate Namespace Registration %s"%namespace.xmlns )
00421         self._namespaces[ namespace.xmlns ] = namespace
00422         if position == -1:
00423             position = len( self._order )
00424         self._order.append( position, namespace.xmlns )
00425 
00426     def getRegisteredNamespaces( self ):
00427         return [ self._namespaces[ xmlns ] for xmlns in self._order ]
00428 
00429 
00430 NamespaceCatalog = _NamespaceCatalog()
00431 
00432 registerNamespace = NamespaceCatalog.registerNamespace
00433 getRegisteredNamespaces = NamespaceCatalog.getRegisteredNamespaces
00434 
00435 def registerNamespace( namespace ):
00436     if not isinstance( namespace, XmlNamespace):
00437         namespace = namespace()
00438     ATXMLMarshaller.namespaces.append( namespace )
00439 
00440 def getRegisteredNamespaces():
00441     return tuple( ATXMLMarshaller.namespaces )
00442 
00443