Back to index

python-biopython  1.60
Classes | Functions | Variables
test_SeqIO_convert Namespace Reference

Classes

class  ConvertTests

Functions

def truncation_expected
def check_convert
def check_convert_fails
def compare_record
def compare_records
def funct

Variables

list tests
tuple runner = unittest.TextTestRunner(verbosity = 2)

Function Documentation

def test_SeqIO_convert.check_convert (   in_filename,
  in_format,
  out_format,
  alphabet = None 
)

Definition at line 27 of file test_SeqIO_convert.py.

00027 
00028 def check_convert(in_filename, in_format, out_format, alphabet=None):
00029     records = list(SeqIO.parse(in_filename,in_format, alphabet))
00030     #Write it out...
00031     handle = StringIO()
00032     qual_truncate = truncation_expected(out_format)
00033     if qual_truncate:
00034         warnings.simplefilter('ignore', UserWarning)
00035     SeqIO.write(records, handle, out_format)
00036     if qual_truncate:
00037         warnings.filters.pop()
00038     handle.seek(0)
00039     #Now load it back and check it agrees,
00040     records2 = list(SeqIO.parse(handle, out_format, alphabet))
00041     compare_records(records, records2, qual_truncate)
00042     #Finally, use the convert fuction, and check that agrees:
00043     handle2 = StringIO()
00044     if qual_truncate:
00045         warnings.simplefilter('ignore', UserWarning)
00046     SeqIO.convert(in_filename, in_format, handle2, out_format, alphabet)
00047     if qual_truncate:
00048         warnings.filters.pop()
00049     #We could re-parse this, but it is simpler and stricter:
00050     assert handle.getvalue() == handle2.getvalue()

Here is the call graph for this function:

Here is the caller graph for this function:

def test_SeqIO_convert.check_convert_fails (   in_filename,
  in_format,
  out_format,
  alphabet = None 
)

Definition at line 51 of file test_SeqIO_convert.py.

00051 
00052 def check_convert_fails(in_filename, in_format, out_format, alphabet=None):
00053     qual_truncate = truncation_expected(out_format)
00054     #We want the SAME error message from parse/write as convert!
00055     err1 = None
00056     try:
00057         records = list(SeqIO.parse(in_filename,in_format, alphabet))
00058         handle = StringIO()
00059         if qual_truncate:
00060             warnings.simplefilter('ignore', UserWarning)
00061         SeqIO.write(records, handle, out_format)
00062         if qual_truncate:
00063             warnings.filters.pop()
00064         handle.seek(0)
00065         assert False, "Parse or write should have failed!"
00066     except ValueError, err:
00067         err1 = err
00068     #Now do the conversion...
00069     try:
00070         handle2 = StringIO()
00071         if qual_truncate:
00072             warnings.simplefilter('ignore', UserWarning)
00073         SeqIO.convert(in_filename, in_format, handle2, out_format, alphabet)
00074         if qual_truncate:
00075             warnings.filters.pop()
00076         assert False, "Convert should have failed!"
00077     except ValueError, err2:
00078         assert str(err1) == str(err2), \
00079                "Different failures, parse/write:\n%s\nconvert:\n%s" \
00080                % (err1, err2)
00081     #print err
00082     
#TODO - move this to a shared test module...

Here is the call graph for this function:

Here is the caller graph for this function:

def test_SeqIO_convert.compare_record (   old,
  new,
  truncate = None 
)
Quality aware SeqRecord comparision.

This will check the mapping between Solexa and PHRED scores.
It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files).

Definition at line 83 of file test_SeqIO_convert.py.

00083 
00084 def compare_record(old, new, truncate=None):
00085     """Quality aware SeqRecord comparision.
00086 
00087     This will check the mapping between Solexa and PHRED scores.
00088     It knows to ignore UnknownSeq objects for string matching (i.e. QUAL files).
00089     """
00090     if old.id != new.id:
00091         raise ValueError("'%s' vs '%s' " % (old.id, new.id))
00092     if old.description != new.description \
00093     and (old.id+" "+old.description).strip() != new.description \
00094     and new.description != "<unknown description>" \
00095     and new.description != "" : #e.g. tab format
00096         raise ValueError("'%s' vs '%s' " % (old.description, new.description))
00097     if len(old.seq) != len(new.seq):
00098         raise ValueError("%i vs %i" % (len(old.seq), len(new.seq)))
00099     if isinstance(old.seq, UnknownSeq) or isinstance(new.seq, UnknownSeq):
00100         pass
00101     elif str(old.seq) != str(new.seq):
00102         if len(old.seq) < 200:
00103             raise ValueError("'%s' vs '%s'" % (old.seq, new.seq))
00104         else:
00105             raise ValueError("'%s...' vs '%s...'" % (old.seq[:100], new.seq[:100]))
00106     if "phred_quality" in old.letter_annotations \
00107     and "phred_quality" in new.letter_annotations \
00108     and old.letter_annotations["phred_quality"] != new.letter_annotations["phred_quality"]:
00109         if truncate and [min(q,truncate) for q in old.letter_annotations["phred_quality"]] == \
00110                         [min(q,truncate) for q in new.letter_annotations["phred_quality"]]:
00111             pass
00112         else:
00113             raise ValuerError("Mismatch in phred_quality")
00114     if "solexa_quality" in old.letter_annotations \
00115     and "solexa_quality" in new.letter_annotations \
00116     and old.letter_annotations["solexa_quality"] != new.letter_annotations["solexa_quality"]:
00117         if truncate and [min(q,truncate) for q in old.letter_annotations["solexa_quality"]] == \
00118                         [min(q,truncate) for q in new.letter_annotations["solexa_quality"]]:
00119             pass
00120         else:
00121             raise ValueError("Mismatch in phred_quality")
00122     if "phred_quality" in old.letter_annotations \
00123     and "solexa_quality" in new.letter_annotations:
00124         #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
00125         #Assume "old" is the original, and "new" has been converted.
00126         converted = [round(QualityIO.solexa_quality_from_phred(q)) \
00127                      for q in old.letter_annotations["phred_quality"]]
00128         if truncate:
00129             converted = [min(q,truncate) for q in converted]
00130         if converted != new.letter_annotations["solexa_quality"]:
00131             print
00132             print old.letter_annotations["phred_quality"]
00133             print converted
00134             print new.letter_annotations["solexa_quality"]
00135             raise ValueError("Mismatch in phred_quality vs solexa_quality")
00136     if "solexa_quality" in old.letter_annotations \
00137     and "phred_quality" in new.letter_annotations:
00138         #Mapping from Solexa to PHRED is lossy, but so is PHRED to Solexa.
00139         #Assume "old" is the original, and "new" has been converted.
00140         converted = [round(QualityIO.phred_quality_from_solexa(q)) \
00141                      for q in old.letter_annotations["solexa_quality"]]
00142         if truncate:
00143             converted = [min(q,truncate) for q in converted]
00144         if converted != new.letter_annotations["phred_quality"]:
00145             print old.letter_annotations["solexa_quality"]
00146             print converted
00147             print new.letter_annotations["phred_quality"]
00148             raise ValueError("Mismatch in solexa_quality vs phred_quality")
00149     return True

Here is the caller graph for this function:

def test_SeqIO_convert.compare_records (   old_list,
  new_list,
  truncate_qual = None 
)
Check two lists of SeqRecords agree, raises a ValueError if mismatch.

Definition at line 150 of file test_SeqIO_convert.py.

00150 
00151 def compare_records(old_list, new_list, truncate_qual=None):
00152     """Check two lists of SeqRecords agree, raises a ValueError if mismatch."""
00153     if len(old_list) != len(new_list):
00154         raise ValueError("%i vs %i records" % (len(old_list), len(new_list)))
00155     for old, new in zip(old_list, new_list):
00156         if not compare_record(old,new,truncate_qual):
00157             return False
00158     return True

Here is the call graph for this function:

Here is the caller graph for this function:

def test_SeqIO_convert.funct (   fn,
  fmt1,
  fmt2,
  alpha 
)

Definition at line 182 of file test_SeqIO_convert.py.

00182 
00183         def funct(fn,fmt1, fmt2, alpha):
00184             f = lambda x : x.simple_check(fn, fmt1, fmt2, alpha)
00185             f.__doc__ = "Convert %s from %s to %s" % (fn, fmt1, fmt2)
            return f

Definition at line 18 of file test_SeqIO_convert.py.

00018 
00019 def truncation_expected(format):
00020     if format in ["fastq-solexa", "fastq-illumina"]:
00021         return 62
00022     elif format in ["fastq", "fastq-sanger"]:
00023         return 93
00024     else:
00025         return None
00026 
#Top level function as this makes it easier to use for debugging:

Here is the caller graph for this function:


Variable Documentation

tuple test_SeqIO_convert.runner = unittest.TextTestRunner(verbosity = 2)

Definition at line 235 of file test_SeqIO_convert.py.

Initial value:
00001 [
00002     ("Quality/example.fastq", "fastq", None),
00003     ("Quality/example.fastq", "fastq-sanger", generic_dna),
00004     ("Quality/tricky.fastq", "fastq", generic_nucleotide),
00005     ("Quality/sanger_93.fastq", "fastq-sanger", None),
00006     ("Quality/sanger_faked.fastq", "fastq-sanger", generic_dna),
00007     ("Quality/solexa_faked.fastq", "fastq-solexa", generic_dna),
00008     ("Quality/illumina_faked.fastq", "fastq-illumina", generic_dna),
00009     ("EMBL/U87107.embl", "embl", None),
00010     ("EMBL/TRBG361.embl", "embl", None),
00011     ("GenBank/NC_005816.gb", "gb", None),
00012     ("GenBank/cor6_6.gb", "genbank", None),
00013     ]

Definition at line 166 of file test_SeqIO_convert.py.