Back to index

python-biopython  1.60
Public Member Functions | Public Attributes
Bio.SCOP.Astral Class Reference

List of all members.

Public Member Functions

def __init__
def domainsClusteredByEv
def domainsClusteredById
def getAstralDomainsFromFile
def getAstralDomainsFromSQL
def getSeqBySid
def getSeq
def hashedDomainsById
def hashedDomainsByEv
def isDomainInId
def isDomainInEv
def writeToSQL

Public Attributes

 scop
 db_handle
 version
 path
 fasta_dict
 astral_file
 EvDatasets
 EvDatahash
 IdDatasets
 IdDatahash

Detailed Description

Abstraction of the ASTRAL database, which has sequences for all the SCOP domains,
as well as clusterings by percent id or evalue.

Definition at line 686 of file __init__.py.


Constructor & Destructor Documentation

def Bio.SCOP.Astral.__init__ (   self,
  dir_path = None,
  version = None,
  scop = None,
  astral_file = None,
  db_handle = None 
)
Initialise the astral database.

You must provide either a directory of SCOP files:

dir_path - string, the path to location of the scopseq-x.xx directory
   (not the directory itself), and
version   -a version number.

or, a FASTA file:

astral_file - string, a path to a fasta file (which will be loaded in memory)

or, a MYSQL database:

db_handle - a database handle for a MYSQL database containing a table
    'astral' with the astral data in it.  This can be created
    using writeToSQL.

Definition at line 692 of file __init__.py.

00692 
00693                   astral_file=None, db_handle=None):
00694         """
00695         Initialise the astral database.
00696         
00697         You must provide either a directory of SCOP files:
00698                 
00699         dir_path - string, the path to location of the scopseq-x.xx directory
00700                    (not the directory itself), and
00701         version   -a version number.
00702         
00703         or, a FASTA file:
00704         
00705         astral_file - string, a path to a fasta file (which will be loaded in memory)
00706         
00707         or, a MYSQL database:
00708         
00709         db_handle - a database handle for a MYSQL database containing a table
00710                     'astral' with the astral data in it.  This can be created
00711                     using writeToSQL.
00712         """
00713 
00714         if astral_file==dir_path==db_handle==None:
00715             raise RuntimeError("Need either file handle, or (dir_path + "\
00716                        + "version) or database handle to construct Astral")
00717         if not scop:
00718             raise RuntimeError("Must provide a Scop instance to construct")
00719 
00720         self.scop = scop
00721         self.db_handle = db_handle 
00722 
00723         
00724         if not astral_file and not db_handle:
00725             if dir_path == None or version == None:
00726                 raise RuntimeError("must provide dir_path and version")
00727 
00728             self.version = version
00729             self.path = os.path.join( dir_path, "scopseq-%s" % version)
00730             astral_file = "astral-scopdom-seqres-all-%s.fa" % self.version
00731             astral_file = os.path.join (self.path, astral_file)
00732 
00733         if astral_file:
00734             #Build a dictionary of SeqRecord objects in the FASTA file, IN MEMORY
00735             self.fasta_dict = SeqIO.to_dict(SeqIO.parse(open(astral_file), "fasta"))
00736 
00737         self.astral_file = astral_file
00738         self.EvDatasets = {}
00739         self.EvDatahash = {}
00740         self.IdDatasets = {}
00741         self.IdDatahash = {}
00742                 
        

Member Function Documentation

def Bio.SCOP.Astral.domainsClusteredByEv (   self,
  id 
)
get domains clustered by evalue

Definition at line 743 of file __init__.py.

00743 
00744     def domainsClusteredByEv(self,id):
00745         """get domains clustered by evalue"""
00746         if id not in self.EvDatasets:
00747             if self.db_handle:
00748                 self.EvDatasets[id] = self.getAstralDomainsFromSQL(astralEv_to_sql[id])
00749                 
00750             else:
00751                 if not self.path:
00752                     raise RuntimeError("No scopseq directory specified")
00753                 
00754                 file_prefix = "astral-scopdom-seqres-sel-gs"
00755                 filename = "%s-e100m-%s-%s.id" % (file_prefix, astralEv_to_file[id] ,
00756                                                   self.version)
00757                 filename = os.path.join(self.path,filename)
00758                 self.EvDatasets[id] = self.getAstralDomainsFromFile(filename)
00759         return self.EvDatasets[id]
00760 

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.SCOP.Astral.domainsClusteredById (   self,
  id 
)
get domains clustered by percent id

Definition at line 761 of file __init__.py.

00761 
00762     def domainsClusteredById(self,id):
00763         """get domains clustered by percent id"""
00764         if id not in self.IdDatasets:
00765             if self.db_handle:
00766                 self.IdDatasets[id] = self.getAstralDomainsFromSQL("id"+str(id))
00767                 
00768             else:
00769                 if not self.path:
00770                     raise RuntimeError("No scopseq directory specified")
00771                 
00772                 file_prefix = "astral-scopdom-seqres-sel-gs"
00773                 filename = "%s-bib-%s-%s.id" % (file_prefix, id, self.version)
00774                 filename = os.path.join(self.path,filename)
00775                 self.IdDatasets[id] = self.getAstralDomainsFromFile(filename)
00776         return self.IdDatasets[id]
00777 

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.SCOP.Astral.getAstralDomainsFromFile (   self,
  filename = None,
  file_handle = None 
)
Get the scop domains from a file containing a list of sids

Definition at line 778 of file __init__.py.

00778 
00779     def getAstralDomainsFromFile(self,filename=None,file_handle=None):
00780         """Get the scop domains from a file containing a list of sids"""
00781         if file_handle == filename == None:
00782             raise RuntimeError("You must provide a filename or handle")
00783         if not file_handle:
00784             file_handle = open(filename)
00785         doms = []
00786         while 1:
00787             line = file_handle.readline()
00788             if not line:
00789                 break
00790             line = line.rstrip()
00791             doms.append(line)
00792         if filename:
00793             file_handle.close()
00794 
00795         doms = filter( lambda a: a[0]=='d', doms )
00796         doms = map( self.scop.getDomainBySid, doms )
00797         return doms

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.SCOP.Astral.getAstralDomainsFromSQL (   self,
  column 
)
Load a set of astral domains from a column in the astral table of a MYSQL
database (which can be created with writeToSQL(...)

Definition at line 798 of file __init__.py.

00798 
00799     def getAstralDomainsFromSQL(self, column):
00800         """Load a set of astral domains from a column in the astral table of a MYSQL
00801         database (which can be created with writeToSQL(...)"""
00802         cur = self.db_handle.cursor()
00803         cur.execute("SELECT sid FROM astral WHERE "+column+"=1")
00804         data = cur.fetchall()
00805         data = map( lambda x: self.scop.getDomainBySid(x[0]), data)
00806         
00807         return data
00808     

Here is the caller graph for this function:

def Bio.SCOP.Astral.getSeq (   self,
  domain 
)
Return seq associated with domain

Definition at line 819 of file __init__.py.

00819 
00820     def getSeq(self,domain):
00821         """Return seq associated with domain"""
00822         return self.getSeqBySid(domain.sid)
00823 

Here is the call graph for this function:

def Bio.SCOP.Astral.getSeqBySid (   self,
  domain 
)
get the seq record of a given domain from its sid

Definition at line 809 of file __init__.py.

00809 
00810     def getSeqBySid(self,domain):
00811         """get the seq record of a given domain from its sid"""
00812         if self.db_handle is None:
00813             return self.fasta_dict[domain].seq
00814         
00815         else:
00816             cur = self.db_handle.cursor()
00817             cur.execute("SELECT seq FROM astral WHERE sid=%s", domain)
00818             return Seq(cur.fetchone()[0])

Here is the caller graph for this function:

def Bio.SCOP.Astral.hashedDomainsByEv (   self,
  id 
)
Get domains clustered by evalue in a dict

Definition at line 832 of file __init__.py.

00832 
00833     def hashedDomainsByEv(self,id):
00834         """Get domains clustered by evalue in a dict"""
00835         if id not in self.EvDatahash:
00836             self.EvDatahash[id] = {}
00837             for d in self.domainsClusteredByEv(id):
00838                 self.EvDatahash[id][d] = 1
00839         return self.EvDatahash[id]
00840                                                         

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.SCOP.Astral.hashedDomainsById (   self,
  id 
)
Get domains clustered by sequence identity in a dict

Definition at line 824 of file __init__.py.

00824 
00825     def hashedDomainsById(self,id):
00826         """Get domains clustered by sequence identity in a dict"""
00827         if id not in self.IdDatahash:
00828             self.IdDatahash[id] = {}
00829             for d in self.domainsClusteredById(id):
00830                 self.IdDatahash[id][d] = 1
00831         return self.IdDatahash[id]

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.SCOP.Astral.isDomainInEv (   self,
  dom,
  id 
)
Returns true if the domain is in the ASTRAL clusters for evalues

Definition at line 845 of file __init__.py.

00845 
00846     def isDomainInEv(self,dom,id):
00847         """Returns true if the domain is in the ASTRAL clusters for evalues"""
00848         return dom in self.hashedDomainsByEv(id)
00849             

Here is the call graph for this function:

def Bio.SCOP.Astral.isDomainInId (   self,
  dom,
  id 
)
Returns true if the domain is in the astral clusters for percent ID

Definition at line 841 of file __init__.py.

00841 
00842     def isDomainInId(self,dom,id):
00843         """Returns true if the domain is in the astral clusters for percent ID"""
00844         return dom in self.hashedDomainsById(id)

Here is the call graph for this function:

def Bio.SCOP.Astral.writeToSQL (   self,
  db_handle 
)
Write the ASTRAL database to a MYSQL database

Definition at line 850 of file __init__.py.

00850 
00851     def writeToSQL(self, db_handle):
00852         """Write the ASTRAL database to a MYSQL database"""
00853         cur = db_handle.cursor()
00854 
00855         cur.execute("DROP TABLE IF EXISTS astral")
00856         cur.execute("CREATE TABLE astral (sid CHAR(8), seq TEXT, PRIMARY KEY (sid))")
00857 
00858         for dom in self.fasta_dict:
00859             cur.execute("INSERT INTO astral (sid,seq) values (%s,%s)",
00860                         (dom, self.fasta_dict[dom].seq.data))
00861         
00862         for i in astralBibIds:
00863             cur.execute("ALTER TABLE astral ADD (id"+str(i)+" TINYINT)")
00864             
00865             for d in self.domainsClusteredById(i):
00866                 cur.execute("UPDATE astral SET id"+str(i)+"=1  WHERE sid=%s",
00867                             d.sid)
00868 
00869         for ev in astralEvs:
00870             cur.execute("ALTER TABLE astral ADD ("+astralEv_to_sql[ev]+" TINYINT)")
00871 
00872             for d in self.domainsClusteredByEv(ev):
00873                 
00874                 cur.execute("UPDATE astral SET "+astralEv_to_sql[ev]+"=1  WHERE sid=%s",
00875                             d.sid)

Here is the call graph for this function:


Member Data Documentation

Definition at line 736 of file __init__.py.

Definition at line 720 of file __init__.py.

Definition at line 738 of file __init__.py.

Definition at line 737 of file __init__.py.

Definition at line 734 of file __init__.py.

Definition at line 740 of file __init__.py.

Definition at line 739 of file __init__.py.

Definition at line 728 of file __init__.py.

Definition at line 719 of file __init__.py.

Definition at line 727 of file __init__.py.


The documentation for this class was generated from the following file: