Back to index

python-biopython  1.60
Search.py
Go to the documentation of this file.
00001 # BLASTN 2.0a19MP-WashU [05-Feb-1998] [Build decunix3.2 01:53:29 05-Feb-1998]
00002 # BLASTP 2.0.4 [Feb-24-1998]
00003 class Algorithm(object):
00004     def __init__(self, name, version, description = ""):
00005         self.name = name                 # 'blastx', 'blastn', etc.
00006         self.version = version           # '2.1.2' or '2.0a19MP-WashU'
00007         self.description = description   # '[05-Feb-1998] [Build dec ...1998]'
00008 
00009 # Query=  YAL001C YAL001C, Chr I from 147596 to 147665, and 147756 to 151168,
00010 #     reverse complement
00011 #         (3483 letters)
00012 class Query(object):
00013     def __init__(self, name, accession, description, length):
00014         self.name = name                # 'YAL001C'
00015         self.accession = accession      # or None if missing
00016         self.description = description  # 'YAL001C, Chr I from 147596 to ... '
00017         self.length = length            # 3483
00018 
00019 # Database:  ArabidopsisN
00020 #            66,211 sequences; 69,074,155 total letters.
00021 class Database(object):
00022     def __init__(self, name, letters, entries):
00023         self.name = name            # ArabidopsisN
00024         self.letters = letters      # 69074155
00025         self.entries = entries      # 66211
00026 
00027 class TableInfo(object):
00028     def __init__(self, full_description, info):
00029         self.__dict__.update(info)
00030         self.full_description = full_description
00031 
00032 
00033 class Search(object):
00034     def __init__(self, algorithm, query, database, table, hits,
00035                  parameters, statistics):
00036         self.algorithm = algorithm
00037         self.query = query
00038         self.database = database
00039         self.table = table
00040         self.hits = hits
00041         self.parameters = parameters
00042         self.statistics = statistics
00043 
00044 class Hit(object):
00045     def __init__(self, name, description, accession, length,
00046                  algorithm, hsps = None):
00047         self.name = name
00048         self.description = description
00049         self.accession = accession
00050         self.length = length
00051         self.algorithm = algorithm
00052         if hsps is None:
00053             hsps = []
00054         self.hsps = hsps
00055 
00056     def __len__(self):
00057         return self.length
00058 
00059 
00060 
00061 # >GB_PL:ATF18F4 AL021637 Arabidopsis thaliana DNA chromosome 4, BAC clone 
00062 #           F18F4 (ESSAII project). 2/98
00063 #             Length = 93,646
00064 #  
00065 #   Minus Strand HSPs:
00066 #  
00067 #  Score = 226 (33.9 bits), Expect = 0.80, P = 0.55
00068 #  Identities = 98/142 (69%), Positives = 98/142 (69%), Strand = Minus / Plus
00069 #    [...lines deleted...]
00070 # Query:  2486 ATATCAAGCAATTTGATAAGATCTAG 2461
00071 #              A AT  A C ATT GA AAGATC AG
00072 # Sbjct: 85387 AGATTTACCTATT-GAGAAGATCAAG 85411
00073 
00074 # computed from the strings
00075 class _SeqLength:
00076     def __init__(self, length, identical, positives, gaps):
00077         self.length = length
00078         self.identical = identical
00079         self.positives = positives
00080         self.gaps = gaps
00081     def __len__(self):
00082         return self.length
00083     def __getattr__(self, name):
00084         if name == "frac_identical":
00085             return float(self.identical) / self.length
00086         elif name == "frac_positives":
00087             return float(self.positives) / self.length
00088         raise AttributeError(name)
00089 
00090 
00091 class HomologySeq(_SeqLength):
00092     def __init__(self, seq, identical, positives, gaps):
00093         _SeqLength.__init__(self, len(seq), identical, positives, gaps)
00094         self.seq = seq
00095 
00096 class HSPSeq(_SeqLength):
00097     def __init__(self, name, seq, location, identical, positives, gaps):
00098         _SeqLength.__init__(self, len(seq), identical, positives, gaps)
00099         self.name = name
00100         self.seq = seq
00101         self.location = location
00102         
00103 
00104 class HSP(_SeqLength):
00105     def __init__(self,
00106                  query_seq,    # ATATCAAGCAATTTGATAAGATCTAG
00107                  homology_seq, # A AT  A C ATT GA AAGATC AG
00108                  subject_seq,  # AGATTTACCTATT-GAGAAGATCAAG
00109 
00110                  query_location,   # (2486, 2461, negative strand)
00111                  subject_location, # (85387, 85411)
00112 
00113                  query_name,     # Query (or None)
00114                  subject_name,   # Sbjct (or None)
00115 
00116                  algorithm,  # an Algorithm
00117                  info,       # contains Key/value pairs
00118                  homology_gaps = None,  # Is this needed?
00119                  ):
00120         assert len(query_seq) == len(homology_seq) == len(subject_seq), \
00121                (query_seq, homology_seq, subject_seq)
00122         self.algorithm = algorithm
00123 
00124         query_gaps = query_seq.count("-")
00125         subject_gaps = subject_seq.count("-")
00126         if homology_gaps is None:
00127             homology_gaps = query_gaps + subject_gaps
00128         self.info = info
00129 
00130         identical = info["identical"]
00131         # bioperl calls this 'conserved'
00132         positives = info.get("positives", identical)
00133         
00134         _SeqLength.__init__(self, len(query_seq), identical,
00135                             positives, homology_gaps)
00136 
00137         self.query = HSPSeq(name = query_name,
00138                             seq = query_seq,
00139                             location = query_location,
00140                             identical = identical,
00141                             positives = positives,
00142                             gaps = query_gaps)
00143 
00144         self.subject = HSPSeq(name = subject_name,
00145                               seq = subject_seq,
00146                               location = subject_location,
00147                               identical = identical,
00148                               positives = positives,
00149                               gaps = subject_gaps)
00150         self.homology = HomologySeq(seq = homology_seq,
00151                                     identical = identical,
00152                                     positives = positives,
00153                                     gaps = homology_gaps)