Back to index

python-biopython  1.60
Classes | Functions
Bio.ExPASy.Prosite Namespace Reference

Classes

class  Record

Functions

def parse
def read
def __read

Function Documentation

def Bio.ExPASy.Prosite.__read (   handle) [private]

Definition at line 146 of file Prosite.py.

00146 
00147 def __read(handle):
00148     import re
00149     record = None
00150     for line in handle:
00151         keyword, value = line[:2], line[5:].rstrip()
00152         if keyword=='ID':
00153             record = Record()
00154             cols = value.split("; ")
00155             if len(cols) != 2:
00156                 raise ValueError("I don't understand identification line\n%s" \
00157                          % line)
00158             record.name = cols[0]
00159             record.type = cols[1].rstrip('.')    # don't want '.'
00160         elif keyword=='AC':
00161             record.accession = value.rstrip(';')
00162         elif keyword=='DT':
00163             dates = value.rstrip('.').split("; ")
00164             if (not dates[0].endswith('(CREATED)')) or \
00165                (not dates[1].endswith('(DATA UPDATE)')) or \
00166                (not dates[2].endswith('(INFO UPDATE)')):
00167                 raise ValueError("I don't understand date line\n%s" % line)
00168             record.created = dates[0].rstrip(' (CREATED)')
00169             record.data_update = dates[1].rstrip(' (DATA UPDATE)')
00170             record.info_update = dates[2].rstrip(' (INFO UPDATE)')
00171         elif keyword=='DE':
00172             record.description = value
00173         elif keyword=='PA':
00174             record.pattern += value
00175         elif keyword=='MA':
00176             record.matrix.append(value)
00177         elif keyword=='PP':
00178             record.postprocessing.extend(value.split(";"))
00179         elif keyword=='RU':
00180             record.rules.append(value)
00181         elif keyword=='NR':
00182             cols = value.split(";")
00183             for col in cols:
00184                 if not col:
00185                     continue
00186                 qual, data = [word.lstrip() for word in col.split("=")]
00187                 if qual == '/RELEASE':
00188                     release, seqs = data.split(",")
00189                     record.nr_sp_release = release
00190                     record.nr_sp_seqs = int(seqs)
00191                 elif qual == '/FALSE_NEG':
00192                     record.nr_false_neg = int(data)
00193                 elif qual == '/PARTIAL':
00194                     record.nr_partial = int(data)
00195                 elif qual in ['/TOTAL', '/POSITIVE', '/UNKNOWN', '/FALSE_POS']:
00196                     m = re.match(r'(\d+)\((\d+)\)', data)
00197                     if not m:
00198                         raise Exception("Broken data %s in comment line\n%s" \
00199                                         % (repr(data), line))
00200                     hits = tuple(map(int, m.groups()))
00201                     if(qual == "/TOTAL"):
00202                         record.nr_total = hits
00203                     elif(qual == "/POSITIVE"):
00204                         record.nr_positive = hits
00205                     elif(qual == "/UNKNOWN"):
00206                         record.nr_unknown = hits
00207                     elif(qual == "/FALSE_POS"):
00208                         record.nr_false_pos = hits
00209                 else:
00210                     raise ValueError("Unknown qual %s in comment line\n%s" \
00211                                      % (repr(qual), line))
00212         elif keyword=='CC':
00213             #Expect CC lines like this:
00214             #CC   /TAXO-RANGE=??EPV; /MAX-REPEAT=2;
00215             #Can (normally) split on ";" and then on "="
00216             cols = value.split(";")
00217             for col in cols:
00218                 if not col or col[:17] == 'Automatic scaling':
00219                     # DNAJ_2 in Release 15 has a non-standard comment line:
00220                     # CC   Automatic scaling using reversed database
00221                     # Throw it away.  (Should I keep it?)
00222                     continue
00223                 if col.count("=") == 0:
00224                     #Missing qualifier!  Can we recover gracefully?
00225                     #For example, from Bug 2403, in PS50293 have:
00226                     #CC /AUTHOR=K_Hofmann; N_Hulo
00227                     continue
00228                 qual, data = [word.lstrip() for word in col.split("=")]
00229                 if qual == '/TAXO-RANGE':
00230                     record.cc_taxo_range = data
00231                 elif qual == '/MAX-REPEAT':
00232                     record.cc_max_repeat = data
00233                 elif qual == '/SITE':
00234                     pos, desc = data.split(",")
00235                     record.cc_site.append((int(pos), desc))
00236                 elif qual == '/SKIP-FLAG':
00237                     record.cc_skip_flag = data
00238                 elif qual == '/MATRIX_TYPE':
00239                     record.cc_matrix_type = data
00240                 elif qual == '/SCALING_DB':
00241                     record.cc_scaling_db = data
00242                 elif qual == '/AUTHOR':
00243                     record.cc_author = data
00244                 elif qual == '/FT_KEY':
00245                     record.cc_ft_key = data
00246                 elif qual == '/FT_DESC':
00247                     record.cc_ft_desc = data
00248                 elif qual == '/VERSION':
00249                     record.cc_version = data
00250                 else:
00251                     raise ValueError("Unknown qual %s in comment line\n%s" \
00252                                      % (repr(qual), line))
00253         elif keyword=='DR':
00254             refs = value.split(";")
00255             for ref in refs:
00256                 if not ref:
00257                     continue
00258                 acc, name, type = [word.strip() for word in ref.split(",")]
00259                 if type == 'T':
00260                     record.dr_positive.append((acc, name))
00261                 elif type == 'F':
00262                     record.dr_false_pos.append((acc, name))
00263                 elif type == 'N':
00264                     record.dr_false_neg.append((acc, name))
00265                 elif type == 'P':
00266                     record.dr_potential.append((acc, name))
00267                 elif type == '?':
00268                     record.dr_unknown.append((acc, name))
00269                 else:
00270                     raise ValueError("I don't understand type flag %s" % type)
00271         elif keyword=='3D':
00272             cols = value.split()
00273             for id in cols:
00274                 record.pdb_structs.append(id.rstrip(';'))
00275         elif keyword=='PR':
00276             rules = value.split(";")
00277             record.prorules.extend(rules)
00278         elif keyword=='DO':
00279             record.pdoc = value.rstrip(';')
00280         elif keyword=='CC':
00281             continue
00282         elif keyword=='//':
00283             if not record:
00284                 # Then this was the copyright statement
00285                 continue
00286             break
00287         else:
00288             raise ValueError("Unknown keyword %s found" % keyword)
00289     else:
00290         return
00291     if not record:
00292         raise ValueError("Unexpected end of stream.")
00293     return record

Here is the caller graph for this function:

def Bio.ExPASy.Prosite.parse (   handle)
Parse Prosite records.

This function is for parsing Prosite files containing multiple
records.

handle   - handle to the file.

Definition at line 25 of file Prosite.py.

00025 
00026 def parse(handle):
00027     """Parse Prosite records.
00028 
00029     This function is for parsing Prosite files containing multiple
00030     records.
00031 
00032     handle   - handle to the file."""
00033     while True:
00034         record = __read(handle)
00035         if not record:
00036             break
00037         yield record

Here is the call graph for this function:

def Bio.ExPASy.Prosite.read (   handle)
Read one Prosite record.

This function is for parsing Prosite files containing
exactly one record.

handle   - handle to the file.

Definition at line 38 of file Prosite.py.

00038 
00039 def read(handle):
00040     """Read one Prosite record.
00041 
00042     This function is for parsing Prosite files containing
00043     exactly one record.
00044 
00045     handle   - handle to the file."""
00046 
00047     record = __read(handle)
00048     # We should have reached the end of the record by now
00049     remainder = handle.read()
00050     if remainder:
00051         raise ValueError("More than one Prosite record found")
00052     return record

Here is the call graph for this function: