Back to index

python-biopython  1.60
Classes | Functions | Variables
test_SeqIO Namespace Reference

Classes

class  ForwardOnlyHandle

Functions

def send_warnings_to_stdout
def compare_record
def record_summary
def col_summary
def alignment_summary
def check_simple_write_read

Variables

 BytesIO = StringIO
list protein_alphas = [Alphabet.generic_protein]
list dna_alphas = [Alphabet.generic_dna]
list rna_alphas = [Alphabet.generic_rna]
list nucleotide_alphas
list no_alpha_formats
list possible_unknown_seq_formats = ["qual", "genbank", "gb", "embl", "imgt"]
list test_write_read_alignment_formats
list test_files
tuple handle = StringIO()
tuple records = list(SeqIO.parse(handle, t_format))
string mode = "rb"
tuple h = open(t_filename,mode)
list records2 = []
list records3 = []
tuple seq_iterator = SeqIO.parse(handle=h, format=t_format)
tuple record = seq_iterator.next()
list records4 = [record]
list records5 = [record]
list accs = record.annotations["accessions"]
tuple base_alpha = Alphabet._get_base_alphabet(record.seq.alphabet)
list good = []
list bad = []
 given_alpha = None
tuple given_base = Alphabet._get_base_alphabet(given_alpha)
tuple alignment
tuple alignment_len = alignment.get_alignment_length()

Function Documentation

def test_SeqIO.alignment_summary (   alignment,
  index = " " 
)
Returns a concise summary of an Alignment object as a string

Definition at line 289 of file test_SeqIO.py.

00289 
00290 def alignment_summary(alignment, index=" "):
00291     """Returns a concise summary of an Alignment object as a string"""
00292     answer = []
00293     alignment_len = alignment.get_alignment_length()
00294     rec_count = len(alignment)
00295     for i in range(min(5,alignment_len)):
00296         answer.append(index + col_summary(alignment.get_column(i)) \
00297                             + " alignment column %i" % i)
00298     if alignment_len > 5:
00299         i = alignment_len - 1
00300         answer.append(index + col_summary("|" * rec_count) \
00301                             + " ...")
00302         answer.append(index + col_summary(alignment.get_column(i)) \
00303                             + " alignment column %i" % i)
00304     return "\n".join(answer)
00305 

Here is the call graph for this function:

def test_SeqIO.check_simple_write_read (   records,
  indent = " " 
)

Definition at line 306 of file test_SeqIO.py.

00306 
00307 def check_simple_write_read(records, indent=" "):
00308     #print indent+"Checking we can write and then read back these records"
00309     for format in test_write_read_alignment_formats:
00310         if format not in possible_unknown_seq_formats \
00311         and isinstance(records[0].seq, UnknownSeq) \
00312         and len(records[0].seq) > 100:
00313            #Skipping for speed.  Some of the unknown sequences are
00314            #rather long, and it seems a bit pointless to record them.
00315            continue
00316         print indent+"Checking can write/read as '%s' format" % format
00317 
00318         #Going to write to a handle...
00319         if format in SeqIO._BinaryFormats:
00320             handle = BytesIO()
00321         else:
00322             handle = StringIO()
00323 
00324         try:
00325             c = SeqIO.write(sequences=records, handle=handle, format=format)
00326             assert c == len(records)
00327         except (TypeError, ValueError), e:
00328             #This is often expected to happen, for example when we try and
00329             #write sequences of different lengths to an alignment file.
00330             if "len()" in str(e):
00331                 #Python 2.4.3,
00332                 #>>> len(None)
00333                 #...
00334                 #TypeError: len() of unsized object
00335                 #
00336                 #Python 2.5.2,
00337                 #>>> len(None)
00338                 #...
00339                 #TypeError: object of type 'NoneType' has no len()
00340                 print "Failed: Probably len() of None"
00341             else:
00342                 print indent+"Failed: %s" % str(e)
00343             if records[0].seq.alphabet.letters is not None:
00344                 assert format != t_format, \
00345                        "Should be able to re-write in the original format!"
00346             #Carry on to the next format:
00347             continue
00348 
00349         handle.flush()
00350         handle.seek(0)
00351         #Now ready to read back from the handle...
00352         try:
00353             records2 = list(SeqIO.parse(handle=handle, format=format))
00354         except ValueError, e:
00355             #This is BAD.  We can't read our own output.
00356             #I want to see the output when called from the test harness,
00357             #run_tests.py (which can be funny about new lines on Windows)
00358             handle.seek(0)
00359             raise ValueError("%s\n\n%s\n\n%s" \
00360                               % (str(e), repr(handle.read()), repr(records)))
00361 
00362         assert len(records2) == t_count
00363         for r1, r2 in zip(records, records2):
00364             #Check the bare minimum (ID and sequence) as
00365             #many formats can't store more than that.
00366             assert len(r1) == len(r2)
00367 
00368             #Check the sequence
00369             if format in ["gb", "genbank", "embl", "imgt"]:
00370                 #The GenBank/EMBL parsers will convert to upper case.
00371                 if isinstance(r1.seq, UnknownSeq) \
00372                 and isinstance(r2.seq, UnknownSeq):
00373                     #Jython didn't like us comparing the string of very long
00374                     #UnknownSeq object (out of heap memory error)
00375                     assert r1.seq._character.upper() == r2.seq._character
00376                 else:
00377                     assert r1.seq.tostring().upper() == r2.seq.tostring()
00378             elif format == "qual":
00379                 assert isinstance(r2.seq, UnknownSeq)
00380                 assert len(r2) == len(r1)
00381             else:
00382                 assert r1.seq.tostring() == r2.seq.tostring()
00383             #Beware of different quirks and limitations in the
00384             #valid character sets and the identifier lengths!
00385             if format in ["phylip", "phylip-sequential"]:
00386                 assert r1.id.replace("[","").replace("]","")[:10] == r2.id, \
00387                        "'%s' vs '%s'" % (r1.id, r2.id)
00388             elif format=="phylip-relaxed":
00389                 assert r1.id.replace(" ", "").replace(':', '|') == r2.id, \
00390                         "'%s' vs '%s'" % (r1.id, r2.id)
00391             elif format=="clustal":
00392                 assert r1.id.replace(" ","_")[:30] == r2.id, \
00393                        "'%s' vs '%s'" % (r1.id, r2.id)
00394             elif format=="stockholm":
00395                 assert r1.id.replace(" ","_") == r2.id, \
00396                        "'%s' vs '%s'" % (r1.id, r2.id)
00397             elif format=="fasta":
00398                 assert r1.id.split()[0] == r2.id
00399             else:
00400                 assert r1.id == r2.id, \
00401                        "'%s' vs '%s'" % (r1.id, r2.id)
00402 
00403         if len(records)>1:
00404             #Try writing just one record (passing a SeqRecord, not a list)
00405             if format in SeqIO._BinaryFormats:
00406                 handle = BytesIO()
00407             else:
00408                 handle = StringIO()
00409             SeqIO.write(records[0], handle, format)
00410             assert handle.getvalue() == records[0].format(format)
00411 
00412 
#Check parsers can cope with an empty file
def test_SeqIO.col_summary (   col_text)

Definition at line 283 of file test_SeqIO.py.

00283 
00284 def col_summary(col_text):
00285     if len(col_text) < 65:
00286         return col_text
00287     else:
00288         return col_text[:60] + "..." + col_text[-5:]

Here is the caller graph for this function:

def test_SeqIO.compare_record (   record_one,
  record_two 
)
This is meant to be a strict comparison for exact agreement...

Definition at line 237 of file test_SeqIO.py.

00237 
00238 def compare_record(record_one, record_two):
00239     """This is meant to be a strict comparison for exact agreement..."""
00240     assert isinstance(record_one, SeqRecord)
00241     assert isinstance(record_two, SeqRecord)
00242     assert record_one.seq is not None
00243     assert record_two.seq is not None
00244     if record_one.id != record_two.id:
00245         return False
00246     if record_one.name != record_two.name:
00247         return False
00248     if record_one.description != record_two.description:
00249         return False
00250     if len(record_one) != len(record_two):
00251         return False
00252     if isinstance(record_one.seq, UnknownSeq) \
00253     and isinstance(record_two.seq, UnknownSeq):
00254         #Jython didn't like us comparing the string of very long UnknownSeq
00255         #object (out of heap memory error)
00256         if record_one.seq._character != record_two.seq._character:
00257             return False
00258     elif record_one.seq.tostring() != record_two.seq.tostring():
00259         return False
00260     #TODO - check features and annotation (see code for BioSQL tests)
00261     for key in set(record_one.letter_annotations).intersection( \
00262                    record_two.letter_annotations):
00263         if record_one.letter_annotations[key] != \
00264            record_two.letter_annotations[key]:
00265             return False
00266     return True

def test_SeqIO.record_summary (   record,
  indent = " " 
)
Returns a concise summary of a SeqRecord object as a string

Definition at line 267 of file test_SeqIO.py.

00267 
00268 def record_summary(record, indent=" "):
00269     """Returns a concise summary of a SeqRecord object as a string"""
00270     if record.id == record.name:
00271         answer = "%sID and Name='%s',\n%sSeq='" % (indent, record.id, indent)
00272     else:
00273         answer = "%sID = '%s', Name='%s',\n%sSeq='" % (indent, record.id, record.name, indent)
00274     if record.seq is None:
00275         answer += "None"
00276     else:
00277         if len(record.seq) > 50:
00278             answer += record.seq[:40].tostring() + "..." + record.seq[-7:].tostring()
00279         else:
00280             answer += record.seq.tostring()
00281         answer += "', length=%i" % (len(record.seq))
00282     return answer

Here is the call graph for this function:

def test_SeqIO.send_warnings_to_stdout (   message,
  category,
  filename,
  lineno,
  file = None,
  line = None 
)

Definition at line 24 of file test_SeqIO.py.

00024 
00025                                 file=None, line=None):
00026     #TODO - Have Biopython DataLossWarning?
00027     if category in [UserWarning]:
00028         print "%s - %s" % (category.__name__, message)
00029 warnings.showwarning = send_warnings_to_stdout
00030 


Variable Documentation

list test_SeqIO.accs = record.annotations["accessions"]

Definition at line 508 of file test_SeqIO.py.

Initial value:
00001 MultipleSeqAlignment(SeqIO.parse( \
00002                     handle=t_filename, format=t_format))

Definition at line 608 of file test_SeqIO.py.

tuple test_SeqIO.alignment_len = alignment.get_alignment_length()

Definition at line 612 of file test_SeqIO.py.

list test_SeqIO.bad = []

Definition at line 558 of file test_SeqIO.py.

tuple test_SeqIO.base_alpha = Alphabet._get_base_alphabet(record.seq.alphabet)

Definition at line 550 of file test_SeqIO.py.

test_SeqIO.BytesIO = StringIO

Definition at line 20 of file test_SeqIO.py.

list test_SeqIO.dna_alphas = [Alphabet.generic_dna]

Definition at line 32 of file test_SeqIO.py.

Definition at line 559 of file test_SeqIO.py.

tuple test_SeqIO.given_base = Alphabet._get_base_alphabet(given_alpha)

Definition at line 579 of file test_SeqIO.py.

Definition at line 557 of file test_SeqIO.py.

tuple test_SeqIO.h = open(t_filename,mode)

Definition at line 431 of file test_SeqIO.py.

tuple test_SeqIO.handle = StringIO()

Definition at line 417 of file test_SeqIO.py.

string test_SeqIO.mode = "rb"

Definition at line 423 of file test_SeqIO.py.

Initial value:
00001 ["fasta","clustal","phylip","phylip-relaxed",
00002                     "phylip-sequential","tab","ig",
00003                     "stockholm","emboss", "fastq","fastq-solexa",
00004                     "fastq-illumina","qual"]

Definition at line 36 of file test_SeqIO.py.

Initial value:
00001 [Alphabet.generic_nucleotide,
00002                      Alphabet.Gapped(Alphabet.generic_nucleotide)]

Definition at line 34 of file test_SeqIO.py.

list test_SeqIO.possible_unknown_seq_formats = ["qual", "genbank", "gb", "embl", "imgt"]

Definition at line 40 of file test_SeqIO.py.

list test_SeqIO.protein_alphas = [Alphabet.generic_protein]

Definition at line 31 of file test_SeqIO.py.

tuple test_SeqIO.record = seq_iterator.next()

Definition at line 449 of file test_SeqIO.py.

tuple test_SeqIO.records = list(SeqIO.parse(handle, t_format))

Definition at line 418 of file test_SeqIO.py.

Definition at line 438 of file test_SeqIO.py.

Definition at line 444 of file test_SeqIO.py.

Definition at line 464 of file test_SeqIO.py.

Definition at line 484 of file test_SeqIO.py.

list test_SeqIO.rna_alphas = [Alphabet.generic_rna]

Definition at line 33 of file test_SeqIO.py.

tuple test_SeqIO.seq_iterator = SeqIO.parse(handle=h, format=t_format)

Definition at line 446 of file test_SeqIO.py.

Definition at line 62 of file test_SeqIO.py.

Initial value:
00001 ["fasta","clustal","phylip","stockholm",
00002                                      "phylip-relaxed"]

Definition at line 45 of file test_SeqIO.py.