Back to index

python-biopython  1.60
Public Member Functions | Private Attributes
Bio.Blast.NCBIStandalone._AlignmentConsumer Class Reference
Inheritance diagram for Bio.Blast.NCBIStandalone._AlignmentConsumer:
Inheritance graph
[legend]

List of all members.

Public Member Functions

def start_alignment
def title
def length
def multalign
def end_alignment

Private Attributes

 _alignment
 _multiple_alignment
 _start_index
 _seq_index
 _name_length
 _start_length
 _seq_length

Detailed Description

Definition at line 972 of file NCBIStandalone.py.


Member Function Documentation

Reimplemented in Bio.Blast.NCBIStandalone._PSIBlastConsumer, and Bio.Blast.NCBIStandalone._BlastConsumer.

Definition at line 1102 of file NCBIStandalone.py.

01102 
01103     def end_alignment(self):
01104         # Remove trailing newlines
01105         if self._alignment:
01106             self._alignment.title = self._alignment.title.rstrip()
01107 
01108         # This code is also obsolete.  See note above.
01109         # If there's a multiple alignment, I will need to make sure
01110         # all the sequences are aligned.  That is, I may need to
01111         # right-pad the sequences.
01112         # if self._multiple_alignment is not None:
01113         #     align = self._multiple_alignment.alignment
01114         #     seqlen = None
01115         #     for i in range(len(align)):
01116         #         name, start, seq = align[i]
01117         #         if seqlen is None:
01118         #             seqlen = len(seq)
01119         #         else:
01120         #             if len(seq) < seqlen:
01121         #                 seq = seq + ' '*(seqlen - len(seq))
01122         #                 align[i] = name, start, seq
01123         #             elif len(seq) > seqlen:
01124         #                 raise ValueError, \
01125         #                       "Sequence %s is longer than the query" % name
01126         
01127         # Clean up some variables, if they exist.
01128         try:
01129             del self._seq_index
01130             del self._seq_length
01131             del self._start_index
01132             del self._start_length
01133             del self._name_length
01134         except AttributeError:
01135             pass

Definition at line 986 of file NCBIStandalone.py.

00986 
00987     def length(self, line):
00988         #e.g. "Length = 81" or more recently, "Length=428"
00989         parts = line.replace(" ","").split("=")
00990         assert len(parts)==2, "Unrecognised format length line"
00991         self._alignment.length = parts[1]
00992         self._alignment.length = _safe_int(self._alignment.length)

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 993 of file NCBIStandalone.py.

00993 
00994     def multalign(self, line):
00995         # Standalone version uses 'QUERY', while WWW version uses blast_tmp.
00996         if line.startswith('QUERY') or line.startswith('blast_tmp'):
00997             # If this is the first line of the multiple alignment,
00998             # then I need to figure out how the line is formatted.
00999             
01000             # Format of line is:
01001             # QUERY 1   acttg...gccagaggtggtttattcagtctccataagagaggggacaaacg 60
01002             try:
01003                 name, start, seq, end = line.split()
01004             except ValueError:
01005                 raise ValueError("I do not understand the line\n%s" % line)
01006             self._start_index = line.index(start, len(name))
01007             self._seq_index = line.index(seq,
01008                                          self._start_index+len(start))
01009             # subtract 1 for the space
01010             self._name_length = self._start_index - 1
01011             self._start_length = self._seq_index - self._start_index - 1
01012             self._seq_length = line.rfind(end) - self._seq_index - 1
01013             
01014             #self._seq_index = line.index(seq)
01015             ## subtract 1 for the space
01016             #self._seq_length = line.rfind(end) - self._seq_index - 1
01017             #self._start_index = line.index(start)
01018             #self._start_length = self._seq_index - self._start_index - 1
01019             #self._name_length = self._start_index
01020 
01021         # Extract the information from the line
01022         name = line[:self._name_length]
01023         name = name.rstrip()
01024         start = line[self._start_index:self._start_index+self._start_length]
01025         start = start.rstrip()
01026         if start:
01027             start = _safe_int(start)
01028         end = line[self._seq_index+self._seq_length:].rstrip()
01029         if end:
01030             end = _safe_int(end)
01031         seq = line[self._seq_index:self._seq_index+self._seq_length].rstrip()
01032         # right pad the sequence with spaces if necessary
01033         if len(seq) < self._seq_length:
01034             seq = seq + ' '*(self._seq_length-len(seq))
01035             
01036         # I need to make sure the sequence is aligned correctly with the query.
01037         # First, I will find the length of the query.  Then, if necessary,
01038         # I will pad my current sequence with spaces so that they will line
01039         # up correctly.
01040 
01041         # Two possible things can happen:
01042         # QUERY
01043         # 504
01044         #
01045         # QUERY
01046         # 403
01047         #
01048         # Sequence 504 will need padding at the end.  Since I won't know
01049         # this until the end of the alignment, this will be handled in
01050         # end_alignment.
01051         # Sequence 403 will need padding before being added to the alignment.
01052 
01053         align = self._multiple_alignment.alignment  # for convenience
01054         align.append((name, start, seq, end))
01055 
01056         # This is old code that tried to line up all the sequences
01057         # in a multiple alignment by using the sequence title's as
01058         # identifiers.  The problem with this is that BLAST assigns
01059         # different HSP's from the same sequence the same id.  Thus,
01060         # in one alignment block, there may be multiple sequences with
01061         # the same id.  I'm not sure how to handle this, so I'm not
01062         # going to.
01063         
01064         # # If the sequence is the query, then just add it.
01065         # if name == 'QUERY':
01066         #     if len(align) == 0:
01067         #         align.append((name, start, seq))
01068         #     else:
01069         #         aname, astart, aseq = align[0]
01070         #         if name != aname:
01071         #             raise ValueError, "Query is not the first sequence"
01072         #         aseq = aseq + seq
01073         #         align[0] = aname, astart, aseq
01074         # else:
01075         #     if len(align) == 0:
01076         #         raise ValueError, "I could not find the query sequence"
01077         #     qname, qstart, qseq = align[0]
01078         #     
01079         #     # Now find my sequence in the multiple alignment.
01080         #     for i in range(1, len(align)):
01081         #         aname, astart, aseq = align[i]
01082         #         if name == aname:
01083         #             index = i
01084         #             break
01085         #     else:
01086         #         # If I couldn't find it, then add a new one.
01087         #         align.append((None, None, None))
01088         #         index = len(align)-1
01089         #         # Make sure to left-pad it.
01090         #         aname, astart, aseq = name, start, ' '*(len(qseq)-len(seq))
01091         # 
01092         #     if len(qseq) != len(aseq) + len(seq):
01093         #         # If my sequences are shorter than the query sequence,
01094         #         # then I will need to pad some spaces to make them line up.
01095         #         # Since I've already right padded seq, that means aseq
01096         #         # must be too short.
01097         #         aseq = aseq + ' '*(len(qseq)-len(aseq)-len(seq))
01098         #     aseq = aseq + seq
01099         #     if astart is None:
01100         #         astart = start
01101         #     align[index] = aname, astart, aseq

Definition at line 981 of file NCBIStandalone.py.

00981 
00982     def title(self, line):
00983         if self._alignment.title:
00984             self._alignment.title += " "
00985         self._alignment.title += line.strip()


Member Data Documentation

Definition at line 978 of file NCBIStandalone.py.

Definition at line 979 of file NCBIStandalone.py.

Definition at line 1009 of file NCBIStandalone.py.

Definition at line 1006 of file NCBIStandalone.py.

Definition at line 1011 of file NCBIStandalone.py.

Definition at line 1005 of file NCBIStandalone.py.

Definition at line 1010 of file NCBIStandalone.py.


The documentation for this class was generated from the following file: