Back to index

python-biopython  1.60
Classes | Functions
Bio.Sequencing.Ace Namespace Reference

Classes

class  rd
class  qa
class  ds
class  af
class  bs
class  rt
class  ct
class  wa
class  wr
class  Reads
class  Contig
class  ACEFileRecord

Functions

def parse
def read

Function Documentation

def Bio.Sequencing.Ace.parse (   handle)
parse(handle)
    
where handle is a file-like object.

This function returns an iterator that allows you to iterate
over the ACE file record by record:

    records = parse(handle)
    for record in records:
        # do something with the record

where each record is a Contig object.

Definition at line 243 of file Ace.py.

00243 
00244 def parse(handle):
00245     """parse(handle)
00246         
00247     where handle is a file-like object.
00248     
00249     This function returns an iterator that allows you to iterate
00250     over the ACE file record by record:
00251 
00252         records = parse(handle)
00253         for record in records:
00254             # do something with the record
00255 
00256     where each record is a Contig object.
00257     """
00258 
00259     handle = iter(handle)
00260 
00261     line = ""
00262     while True:
00263         # at beginning, skip the AS and look for first CO command
00264         try:
00265             while True:
00266                 if line.startswith('CO'):
00267                     break
00268                 line = handle.next()
00269         except StopIteration:
00270             return
00271 
00272         record = Contig(line)
00273 
00274         for line in handle:
00275             line = line.strip()
00276             if not line:
00277                 break
00278             record.sequence+=line
00279 
00280         for line in handle:
00281             if line.strip():
00282                 break
00283         if not line.startswith("BQ"):
00284             raise ValueError("Failed to find BQ line")
00285 
00286         for line in handle:
00287             if not line.strip():
00288                 break
00289             record.quality.extend(map(int,line.split()))
00290 
00291         for line in handle:
00292             if line.strip():
00293                 break
00294 
00295         while True:
00296             if not line.startswith("AF "):
00297                 break
00298             record.af.append(af(line))
00299             try:
00300                 line = handle.next()
00301             except StopIteration:
00302                 raise ValueError("Unexpected end of AF block")
00303 
00304         while True:
00305             if line.strip():
00306                 break
00307             try:
00308                 line = handle.next()
00309             except StopIteration:
00310                 raise ValueError("Unexpected end of file")
00311 
00312         while True:
00313             if not line.startswith("BS "):
00314                 break
00315             record.bs.append(bs(line))
00316             try:
00317                 line = handle.next()
00318             except StopIteration:
00319                 raise ValueError("Failed to find end of BS block")
00320 
00321         # now read all the read data
00322         # it starts with a 'RD', and then a mandatory QA
00323         # then follows an optional DS
00324         # CT,RT,WA,WR may or may not be there in unlimited quantity. They might refer to the actual read or contig,
00325         # or, if encountered at the end of file, to any previous read or contig. the sort() method deals
00326         # with that later.
00327         while True:
00328 
00329             # each read must have a rd and qa
00330             try:
00331                 while True:
00332                     # If I've met the condition, then stop reading the line.
00333                     if line.startswith("RD "):
00334                         break
00335                     line = handle.next()
00336             except StopIteration:
00337                 raise ValueError("Failed to find RD line")
00338 
00339             record.reads.append(Reads(line))
00340 
00341             for line in handle:
00342                 line = line.strip()
00343                 if not line:
00344                     break
00345                 record.reads[-1].rd.sequence+=line
00346 
00347             for line in handle:
00348                 if line.strip():
00349                     break
00350             if not line.startswith("QA "):
00351                 raise ValueError("Failed to find QA line")
00352             record.reads[-1].qa = qa(line)
00353 
00354             # now one ds can follow
00355             for line in handle:
00356                 if line.strip():
00357                     break
00358             else:
00359                 break
00360 
00361             if line.startswith("DS "):
00362                 record.reads[-1].ds = ds(line)
00363                 line = ""
00364             # the file could just end, or there's some more stuff. In ace files, anything can happen.
00365             # the following tags are interspersed between reads and can appear multiple times. 
00366             while True:
00367                 # something left 
00368                 try:
00369                     while True:
00370                         if line.strip():
00371                             break
00372                         line = handle.next()
00373                 except StopIteration:
00374                     # file ends here
00375                     break
00376                 if line.startswith("RT{"):
00377                     # now if we're at the end of the file, this rt could
00378                     # belong to a previous read, not the actual one.
00379                     # we store it here were it appears, the user can sort later.
00380                     if record.reads[-1].rt is None:
00381                         record.reads[-1].rt=[]
00382                     for line in handle:
00383                         line=line.strip()
00384                         #if line=="COMMENT{":
00385                         if line.startswith("COMMENT{"):
00386                             if line[8:].strip():
00387                                 #MIRA 3.0.5 would miss the new line out :(
00388                                 record.reads[-1].rt[-1].comment.append(line[8:])
00389                             for line in handle:
00390                                 line = line.strip()
00391                                 if line.endswith("C}"):
00392                                     break
00393                                 record.reads[-1].rt[-1].comment.append(line)
00394                         elif line=='}':
00395                             break
00396                         else:
00397                             record.reads[-1].rt.append(rt(line))
00398                     line = ""
00399                 elif line.startswith("WR{"):
00400                     if record.reads[-1].wr is None:
00401                         record.reads[-1].wr=[]
00402                     for line in handle:
00403                         line=line.strip()
00404                         if line=='}': break
00405                         record.reads[-1].wr.append(wr(line))
00406                     line = ""
00407                 elif line.startswith("WA{"):
00408                     if record.wa is None:
00409                         record.wa=[]
00410                     try:
00411                         line = handle.next()
00412                     except StopIteration:
00413                         raise ValueError("Failed to read WA block")
00414                     record.wa.append(wa(line))
00415                     for line in handle:
00416                         line=line.strip()
00417                         if line=='}': break
00418                         record.wa[-1].info.append(line)
00419                     line = ""
00420                 elif line.startswith("CT{"):
00421                     if record.ct is None:
00422                         record.ct=[]
00423                     try:
00424                         line = handle.next()
00425                     except StopIteration:
00426                         raise ValueError("Failed to read CT block")
00427                     record.ct.append(ct(line))
00428                     for line in handle:
00429                         line=line.strip()
00430                         if line=="COMMENT{":
00431                             for line in handle:
00432                                 line = line.strip()
00433                                 if line.endswith("C}"):
00434                                     break
00435                                 record.ct[-1].comment.append(line)
00436                         elif line=='}':
00437                             break
00438                         else:
00439                             record.ct[-1].info.append(line)
00440                     line = ""
00441                 else:
00442                     break
00443 
00444             if not line.startswith('RD'): # another read?
00445                 break    
00446 
00447         yield record

Here is the caller graph for this function:

def Bio.Sequencing.Ace.read (   handle)
Parses the full ACE file in list of contigs.

Definition at line 509 of file Ace.py.

00509 
00510 def read(handle):
00511     """Parses the full ACE file in list of contigs.
00512 
00513     """
00514 
00515     handle = iter(handle)
00516 
00517     record=ACEFileRecord()
00518 
00519     try:
00520         line = handle.next()
00521     except StopIteration:
00522         raise ValueError("Premature end of file")
00523 
00524     # check if the file starts correctly
00525     if not line.startswith('AS'):
00526         raise ValueError("File does not start with 'AS'.")
00527 
00528     words = line.split()
00529     record.ncontigs, record.nreads = map(int, words[1:3])
00530 
00531     # now read all the records
00532     record.contigs = list(parse(handle))
00533     # wa, ct, rt rags are usually at the end of the file, but not necessarily (correct?).
00534     # If the iterator is used, the tags are returned with the contig or the read after which they appear,
00535     # if all tags are at the end, they are read with the last contig. The concept of an
00536     # iterator leaves no other choice. But if the user uses the ACEParser, we can check
00537     # them and put them into the appropriate contig/read instance.
00538     # Conclusion: An ACE file is not a filetype for which iteration is 100% suitable...
00539     record.sort()
00540     return record

Here is the call graph for this function: