Back to index

python-biopython  1.60
Functions | Variables
Bio.SeqIO._convert Namespace Reference

Functions

def _genbank_convert_fasta
def _embl_convert_fasta
def _fastq_generic
def _fastq_generic2
def _fastq_sanger_convert_fastq_sanger
def _fastq_solexa_convert_fastq_solexa
def _fastq_illumina_convert_fastq_illumina
def _fastq_illumina_convert_fastq_sanger
def _fastq_sanger_convert_fastq_illumina
def _fastq_solexa_convert_fastq_sanger
def _fastq_sanger_convert_fastq_solexa
def _fastq_solexa_convert_fastq_illumina
def _fastq_illumina_convert_fastq_solexa
def _fastq_convert_fasta
def _fastq_convert_tab
def _fastq_convert_qual
def _fastq_sanger_convert_qual
def _fastq_solexa_convert_qual
def _fastq_illumina_convert_qual
def _handle_convert

Variables

dictionary _converter

Function Documentation

def Bio.SeqIO._convert._embl_convert_fasta (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast EMBL to FASTA (PRIVATE).

Definition at line 39 of file _convert.py.

00039 
00040 def _embl_convert_fasta(in_handle, out_handle, alphabet=None):
00041     """Fast EMBL to FASTA (PRIVATE)."""
00042     #We don't need to parse the features...
00043     from Bio.GenBank.Scanner import EmblScanner
00044     records = EmblScanner().parse_records(in_handle, do_features=False)
00045     #For FASTA output we can ignore the alphabet too
00046     return SeqIO.write(records, out_handle, "fasta")
00047 

def Bio.SeqIO._convert._fastq_convert_fasta (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast FASTQ to FASTA conversion (PRIVATE).

Avoids dealing with the FASTQ quality encoding, and creating SeqRecord and
Seq objects in order to speed up this conversion.

NOTE - This does NOT check the characters used in the FASTQ quality string
are valid!

Definition at line 233 of file _convert.py.

00233 
00234 def _fastq_convert_fasta(in_handle, out_handle, alphabet=None):
00235     """Fast FASTQ to FASTA conversion (PRIVATE).
00236 
00237     Avoids dealing with the FASTQ quality encoding, and creating SeqRecord and
00238     Seq objects in order to speed up this conversion.
00239 
00240     NOTE - This does NOT check the characters used in the FASTQ quality string
00241     are valid!
00242     """
00243     from Bio.SeqIO.QualityIO import FastqGeneralIterator
00244     #For real speed, don't even make SeqRecord and Seq objects!
00245     count = 0
00246     for title, seq, qual in FastqGeneralIterator(in_handle):
00247         count += 1
00248         out_handle.write(">%s\n" % title)
00249         #Do line wrapping
00250         for i in range(0, len(seq), 60):
00251             out_handle.write(seq[i:i+60] + "\n")
00252     return count

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_convert_qual (   in_handle,
  out_handle,
  mapping 
) [private]
FASTQ helper function for QUAL output (PRIVATE).

Mapping should be a dictionary mapping expected ASCII characters from the
FASTQ quality string to PHRED quality scores (as strings).

Definition at line 270 of file _convert.py.

00270 
00271 def _fastq_convert_qual(in_handle, out_handle, mapping):
00272     """FASTQ helper function for QUAL output (PRIVATE).
00273 
00274     Mapping should be a dictionary mapping expected ASCII characters from the
00275     FASTQ quality string to PHRED quality scores (as strings).
00276     """
00277     from Bio.SeqIO.QualityIO import FastqGeneralIterator
00278     #For real speed, don't even make SeqRecord and Seq objects!
00279     count = 0
00280     for title, seq, qual in FastqGeneralIterator(in_handle):
00281         count += 1
00282         out_handle.write(">%s\n" % title)
00283         #map the qual...
00284         try:
00285             qualities_strs = [mapping[ascii] for ascii in qual]
00286         except KeyError:
00287             raise ValueError("Invalid character in quality string")
00288         data = " ".join(qualities_strs)
00289         while True:
00290             if len(data) <= 60:
00291                 out_handle.write(data + "\n")
00292                 break
00293             else:
00294                 #By construction there must be spaces in the first 60 chars
00295                 #(unless we have 60 digit or higher quality scores!)
00296                 i = data.rfind(" ", 0, 60)
00297                 out_handle.write(data[:i] + "\n")
00298                 data = data[i+1:]
00299     return count
00300 
    

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.SeqIO._convert._fastq_convert_tab (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast FASTQ to simple tabbed conversion (PRIVATE).

Avoids dealing with the FASTQ quality encoding, and creating SeqRecord and
Seq objects in order to speed up this conversion.

NOTE - This does NOT check the characters used in the FASTQ quality string
are valid!

Definition at line 253 of file _convert.py.

00253 
00254 def _fastq_convert_tab(in_handle, out_handle, alphabet=None):
00255     """Fast FASTQ to simple tabbed conversion (PRIVATE).
00256 
00257     Avoids dealing with the FASTQ quality encoding, and creating SeqRecord and
00258     Seq objects in order to speed up this conversion.
00259 
00260     NOTE - This does NOT check the characters used in the FASTQ quality string
00261     are valid!
00262     """
00263     from Bio.SeqIO.QualityIO import FastqGeneralIterator
00264     #For real speed, don't even make SeqRecord and Seq objects!
00265     count = 0
00266     for title, seq, qual in FastqGeneralIterator(in_handle):
00267         count += 1
00268         out_handle.write("%s\t%s\n" % (title.split(None, 1)[0], seq))
00269     return count

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_generic (   in_handle,
  out_handle,
  mapping 
) [private]
FASTQ helper function where can't have data loss by truncation (PRIVATE).

Definition at line 48 of file _convert.py.

00048 
00049 def _fastq_generic(in_handle, out_handle, mapping):
00050     """FASTQ helper function where can't have data loss by truncation (PRIVATE)."""
00051     from Bio.SeqIO.QualityIO import FastqGeneralIterator
00052     #For real speed, don't even make SeqRecord and Seq objects!
00053     count = 0
00054     null = chr(0)
00055     for title, seq, old_qual in FastqGeneralIterator(in_handle):
00056         count += 1
00057         #map the qual...
00058         qual = old_qual.translate(mapping)
00059         if null in qual:
00060             raise ValueError("Invalid character in quality string")
00061         out_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
00062     return count
00063 
    

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.SeqIO._convert._fastq_generic2 (   in_handle,
  out_handle,
  mapping,
  truncate_char,
  truncate_msg 
) [private]
FASTQ helper function where there could be data loss by truncation (PRIVATE).

Definition at line 64 of file _convert.py.

00064 
00065 def _fastq_generic2(in_handle, out_handle, mapping, truncate_char, truncate_msg):
00066     """FASTQ helper function where there could be data loss by truncation (PRIVATE)."""
00067     from Bio.SeqIO.QualityIO import FastqGeneralIterator
00068     #For real speed, don't even make SeqRecord and Seq objects!
00069     count = 0
00070     null = chr(0)
00071     for title, seq, old_qual in FastqGeneralIterator(in_handle):
00072         count += 1
00073         #map the qual...
00074         qual = old_qual.translate(mapping)
00075         if null in qual:
00076             raise ValueError("Invalid character in quality string")
00077         if truncate_char in qual:
00078             qual = qual.replace(truncate_char, chr(126))
00079             import warnings
00080             warnings.warn(truncate_msg)
00081         out_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
00082     return count
00083 

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.SeqIO._convert._fastq_illumina_convert_fastq_illumina (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Illumina 1.3+ FASTQ to Illumina 1.3+ FASTQ conversion (PRIVATE).

Useful for removing line wrapping and the redundant second identifier
on the plus lines. Will check also check the quality string is valid.
Avoids creating SeqRecord and Seq objects in order to speed up this
conversion.

Definition at line 117 of file _convert.py.

00117 
00118 def _fastq_illumina_convert_fastq_illumina(in_handle, out_handle, alphabet=None):
00119     """Fast Illumina 1.3+ FASTQ to Illumina 1.3+ FASTQ conversion (PRIVATE).
00120 
00121     Useful for removing line wrapping and the redundant second identifier
00122     on the plus lines. Will check also check the quality string is valid.
00123     Avoids creating SeqRecord and Seq objects in order to speed up this
00124     conversion.
00125     """
00126     #Map unexpected chars to null
00127     mapping = "".join([chr(0) for ascii in range(0, 64)] \
00128                      +[chr(ascii) for ascii in range(64,127)] \
00129                      +[chr(0) for ascii in range(127,256)])
00130     assert len(mapping)==256
00131     return _fastq_generic(in_handle, out_handle, mapping)
00132 

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_illumina_convert_fastq_sanger (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Illumina 1.3+ FASTQ to Sanger FASTQ conversion (PRIVATE).

Avoids creating SeqRecord and Seq objects in order to speed up this
conversion.

Definition at line 133 of file _convert.py.

00133 
00134 def _fastq_illumina_convert_fastq_sanger(in_handle, out_handle, alphabet=None):
00135     """Fast Illumina 1.3+ FASTQ to Sanger FASTQ conversion (PRIVATE).
00136 
00137     Avoids creating SeqRecord and Seq objects in order to speed up this
00138     conversion.
00139     """
00140     #Map unexpected chars to null
00141     mapping = "".join([chr(0) for ascii in range(0, 64)] \
00142                      +[chr(33+q) for q in range(0, 62+1)] \
00143                      +[chr(0) for ascii in range(127, 256)])
00144     assert len(mapping)==256
00145     return _fastq_generic(in_handle, out_handle, mapping)
00146 

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_illumina_convert_fastq_solexa (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Illumina 1.3+ FASTQ to Solexa FASTQ conversion (PRIVATE).

Avoids creating SeqRecord and Seq objects in order to speed up this
conversion.

Definition at line 216 of file _convert.py.

00216 
00217 def _fastq_illumina_convert_fastq_solexa(in_handle, out_handle, alphabet=None):
00218     """Fast Illumina 1.3+ FASTQ to Solexa FASTQ conversion (PRIVATE).
00219 
00220     Avoids creating SeqRecord and Seq objects in order to speed up this
00221     conversion.
00222     """
00223     #Map unexpected chars to null
00224     from Bio.SeqIO.QualityIO import solexa_quality_from_phred
00225     trunc_char = chr(1)
00226     mapping = "".join([chr(0) for ascii in range(0, 64)] \
00227                      +[chr(64+int(round(solexa_quality_from_phred(q)))) \
00228                        for q in range(0, 62+1)] \
00229                      +[chr(0) for ascii in range(127, 256)])
00230     assert len(mapping)==256
00231     return _fastq_generic(in_handle, out_handle, mapping)
00232 

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_illumina_convert_qual (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Illumina 1.3+ FASTQ to QUAL conversion (PRIVATE).

Definition at line 315 of file _convert.py.

00315 
00316 def _fastq_illumina_convert_qual(in_handle, out_handle, alphabet=None):
00317     """Fast Illumina 1.3+ FASTQ to QUAL conversion (PRIVATE)."""
00318     mapping = dict((chr(q+64), str(q)) for q in range(0,62+1))
00319     return _fastq_convert_qual(in_handle, out_handle, mapping)
00320 
00321 
#TODO? - Handling aliases explicitly would let us shorten this list:

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_sanger_convert_fastq_illumina (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Sanger FASTQ to Illumina 1.3+ FASTQ conversion (PRIVATE).

Avoids creating SeqRecord and Seq objects in order to speed up this
conversion. Will issue a warning if the scores had to be truncated at 62
(maximum possible in the Illumina 1.3+ FASTQ format)

Definition at line 147 of file _convert.py.

00147 
00148 def _fastq_sanger_convert_fastq_illumina(in_handle, out_handle, alphabet=None):
00149     """Fast Sanger FASTQ to Illumina 1.3+ FASTQ conversion (PRIVATE).
00150 
00151     Avoids creating SeqRecord and Seq objects in order to speed up this
00152     conversion. Will issue a warning if the scores had to be truncated at 62
00153     (maximum possible in the Illumina 1.3+ FASTQ format)
00154     """
00155     #Map unexpected chars to null
00156     trunc_char = chr(1)
00157     mapping = "".join([chr(0) for ascii in range(0, 33)] \
00158                      +[chr(64+q) for q in range(0, 62+1) ] \
00159                      +[trunc_char for ascii in range(96,127)] \
00160                      +[chr(0) for ascii in range(127, 256)])
00161     assert len(mapping)==256
00162     return _fastq_generic2(in_handle, out_handle, mapping, trunc_char,
00163                           "Data loss - max PHRED quality 62 in Illumina 1.3+ FASTQ")
00164 

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_sanger_convert_fastq_sanger (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Sanger FASTQ to Sanger FASTQ conversion (PRIVATE).

Useful for removing line wrapping and the redundant second identifier
on the plus lines. Will check also check the quality string is valid.

Avoids creating SeqRecord and Seq objects in order to speed up this
conversion.

Definition at line 84 of file _convert.py.

00084 
00085 def _fastq_sanger_convert_fastq_sanger(in_handle, out_handle, alphabet=None):
00086     """Fast Sanger FASTQ to Sanger FASTQ conversion (PRIVATE).
00087 
00088     Useful for removing line wrapping and the redundant second identifier
00089     on the plus lines. Will check also check the quality string is valid.
00090 
00091     Avoids creating SeqRecord and Seq objects in order to speed up this
00092     conversion.
00093     """
00094     #Map unexpected chars to null
00095     mapping = "".join([chr(0) for ascii in range(0, 33)] \
00096                      +[chr(ascii) for ascii in range(33, 127)] \
00097                      +[chr(0) for ascii in range(127, 256)])
00098     assert len(mapping)==256
00099     return _fastq_generic(in_handle, out_handle, mapping)
00100 

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_sanger_convert_fastq_solexa (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Sanger FASTQ to Solexa FASTQ conversion (PRIVATE).

Avoids creating SeqRecord and Seq objects in order to speed up this
conversion. Will issue a warning if the scores had to be truncated at 62
(maximum possible in the Solexa FASTQ format)

Definition at line 180 of file _convert.py.

00180 
00181 def _fastq_sanger_convert_fastq_solexa(in_handle, out_handle, alphabet=None):
00182     """Fast Sanger FASTQ to Solexa FASTQ conversion (PRIVATE).
00183 
00184     Avoids creating SeqRecord and Seq objects in order to speed up this
00185     conversion. Will issue a warning if the scores had to be truncated at 62
00186     (maximum possible in the Solexa FASTQ format)
00187     """
00188     #Map unexpected chars to null
00189     from Bio.SeqIO.QualityIO import solexa_quality_from_phred
00190     trunc_char = chr(1)
00191     mapping = "".join([chr(0) for ascii in range(0, 33)] \
00192                      +[chr(64+int(round(solexa_quality_from_phred(q)))) \
00193                        for q in range(0, 62+1)] \
00194                      +[trunc_char for ascii in range(96, 127)] \
00195                      +[chr(0) for ascii in range(127, 256)])
00196     assert len(mapping)==256
00197     return _fastq_generic2(in_handle, out_handle, mapping, trunc_char,
00198                           "Data loss - max Solexa quality 62 in Solexa FASTQ")
00199 

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_sanger_convert_qual (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Sanger FASTQ to QUAL conversion (PRIVATE).

Definition at line 301 of file _convert.py.

00301 
00302 def _fastq_sanger_convert_qual(in_handle, out_handle, alphabet=None):
00303     """Fast Sanger FASTQ to QUAL conversion (PRIVATE)."""
00304     mapping = dict((chr(q+33), str(q)) for q in range(0,93+1))
00305     return _fastq_convert_qual(in_handle, out_handle, mapping)
00306 

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_solexa_convert_fastq_illumina (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Solexa FASTQ to Illumina 1.3+ FASTQ conversion (PRIVATE).

Avoids creating SeqRecord and Seq objects in order to speed up this
conversion.

Definition at line 200 of file _convert.py.

00200 
00201 def _fastq_solexa_convert_fastq_illumina(in_handle, out_handle, alphabet=None):
00202     """Fast Solexa FASTQ to Illumina 1.3+ FASTQ conversion (PRIVATE).
00203 
00204     Avoids creating SeqRecord and Seq objects in order to speed up this
00205     conversion.
00206     """
00207     #Map unexpected chars to null
00208     from Bio.SeqIO.QualityIO import phred_quality_from_solexa
00209     mapping = "".join([chr(0) for ascii in range(0, 59)] \
00210                      +[chr(64+int(round(phred_quality_from_solexa(q)))) \
00211                        for q in range(-5, 62+1)]\
00212                       +[chr(0) for ascii in range(127, 256)])
00213     assert len(mapping)==256
00214     return _fastq_generic(in_handle, out_handle, mapping)
00215 

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_solexa_convert_fastq_sanger (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Solexa FASTQ to Sanger FASTQ conversion (PRIVATE).

Avoids creating SeqRecord and Seq objects in order to speed up this
conversion.

Definition at line 165 of file _convert.py.

00165 
00166 def _fastq_solexa_convert_fastq_sanger(in_handle, out_handle, alphabet=None):
00167     """Fast Solexa FASTQ to Sanger FASTQ conversion (PRIVATE).
00168 
00169     Avoids creating SeqRecord and Seq objects in order to speed up this
00170     conversion.
00171     """
00172     #Map unexpected chars to null
00173     from Bio.SeqIO.QualityIO import phred_quality_from_solexa
00174     mapping = "".join([chr(0) for ascii in range(0, 59)] \
00175                      +[chr(33+int(round(phred_quality_from_solexa(q)))) \
00176                        for q in range(-5, 62+1)]\
00177                       +[chr(0) for ascii in range(127, 256)])
00178     assert len(mapping)==256
00179     return _fastq_generic(in_handle, out_handle, mapping)

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_solexa_convert_fastq_solexa (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Solexa FASTQ to Solexa FASTQ conversion (PRIVATE).

Useful for removing line wrapping and the redundant second identifier
on the plus lines. Will check also check the quality string is valid.
Avoids creating SeqRecord and Seq objects in order to speed up this
conversion.

Definition at line 101 of file _convert.py.

00101 
00102 def _fastq_solexa_convert_fastq_solexa(in_handle, out_handle, alphabet=None):
00103     """Fast Solexa FASTQ to Solexa FASTQ conversion (PRIVATE).
00104 
00105     Useful for removing line wrapping and the redundant second identifier
00106     on the plus lines. Will check also check the quality string is valid.
00107     Avoids creating SeqRecord and Seq objects in order to speed up this
00108     conversion.
00109     """
00110     #Map unexpected chars to null
00111     mapping = "".join([chr(0) for ascii in range(0, 59)] \
00112                      +[chr(ascii) for ascii in range(59, 127)] \
00113                      +[chr(0) for ascii in range(127, 256)])
00114     assert len(mapping)==256
00115     return _fastq_generic(in_handle, out_handle, mapping)
00116 

Here is the call graph for this function:

def Bio.SeqIO._convert._fastq_solexa_convert_qual (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast Solexa FASTQ to QUAL conversion (PRIVATE).

Definition at line 307 of file _convert.py.

00307 
00308 def _fastq_solexa_convert_qual(in_handle, out_handle, alphabet=None):
00309     """Fast Solexa FASTQ to QUAL conversion (PRIVATE)."""
00310     from Bio.SeqIO.QualityIO import phred_quality_from_solexa
00311     mapping = dict((chr(q+64), str(int(round(phred_quality_from_solexa(q))))) \
00312                    for q in range(-5,62+1))
00313     return _fastq_convert_qual(in_handle, out_handle, mapping)
00314 

Here is the call graph for this function:

def Bio.SeqIO._convert._genbank_convert_fasta (   in_handle,
  out_handle,
  alphabet = None 
) [private]
Fast GenBank to FASTA (PRIVATE).

Definition at line 30 of file _convert.py.

00030 
00031 def _genbank_convert_fasta(in_handle, out_handle, alphabet=None):
00032     """Fast GenBank to FASTA (PRIVATE)."""
00033     #We don't need to parse the features...
00034     from Bio.GenBank.Scanner import GenBankScanner
00035     records = GenBankScanner().parse_records(in_handle, do_features=False)
00036     #For FASTA output we can ignore the alphabet too
00037     return SeqIO.write(records, out_handle, "fasta")
00038 

def Bio.SeqIO._convert._handle_convert (   in_handle,
  in_format,
  out_handle,
  out_format,
  alphabet = None 
) [private]
SeqIO conversion function (PRIVATE).

Definition at line 356 of file _convert.py.

00356 
00357 def _handle_convert(in_handle, in_format, out_handle, out_format, alphabet=None):
00358     """SeqIO conversion function (PRIVATE)."""
00359     try:
00360         f = _converter[(in_format, out_format)]
00361     except KeyError:
00362         f = None
00363     if f:
00364         return f(in_handle, out_handle, alphabet)
00365     else:
00366         records = SeqIO.parse(in_handle, in_format, alphabet)
00367         return SeqIO.write(records, out_handle, out_format)

Here is the caller graph for this function:


Variable Documentation

Definition at line 322 of file _convert.py.