Back to index

python-biopython  1.60
Functions | Variables
Bio.PDB.parse_pdb_header Namespace Reference

Functions

def _get_journal
def _get_references
def _format_date
def _chop_end_codes
def _chop_end_misc
def _nice_case
def parse_pdb_header
def _parse_pdb_header_list

Variables

list filename = sys.argv[1]
tuple handle = open(filename,'r')
tuple data_dict = parse_pdb_header(handle)

Function Documentation

def Bio.PDB.parse_pdb_header._chop_end_codes (   line) [private]
Chops lines ending with  '     1CSA  14' and the like.

Definition at line 83 of file parse_pdb_header.py.

00083 
00084 def _chop_end_codes(line):
00085     """Chops lines ending with  '     1CSA  14' and the like."""
00086     return re.sub("\s\s\s\s+[\w]{4}.\s+\d*\Z","",line)

Here is the caller graph for this function:

def Bio.PDB.parse_pdb_header._chop_end_misc (   line) [private]
Chops lines ending with  '     14-JUL-97  1CSA' and the like.

Definition at line 87 of file parse_pdb_header.py.

00087 
00088 def _chop_end_misc(line):
00089     """Chops lines ending with  '     14-JUL-97  1CSA' and the like."""
00090     return re.sub("\s\s\s\s+.*\Z","",line)

Here is the caller graph for this function:

def Bio.PDB.parse_pdb_header._format_date (   pdb_date) [private]
Converts dates from DD-Mon-YY to YYYY-MM-DD format.

Definition at line 65 of file parse_pdb_header.py.

00065 
00066 def _format_date(pdb_date):
00067     """Converts dates from DD-Mon-YY to YYYY-MM-DD format."""
00068     date=""
00069     year=int(pdb_date[7:])
00070     if year<50:
00071         century=2000
00072     else:
00073         century=1900            
00074     date=str(century+year)+"-"
00075     all_months=['xxx','Jan','Feb','Mar','Apr','May','Jun','Jul',\
00076     'Aug','Sep','Oct','Nov','Dec']        
00077     month=str(all_months.index(pdb_date[3:6]))
00078     if len(month)==1:
00079         month = '0'+month
00080     date = date+month+'-'+pdb_date[:2]
00081     return date
00082 

Here is the caller graph for this function:

def Bio.PDB.parse_pdb_header._get_journal (   inl) [private]

Definition at line 32 of file parse_pdb_header.py.

00032 
00033 def _get_journal(inl):
00034     # JRNL        AUTH   L.CHEN,M.DOI,F.S.MATHEWS,A.Y.CHISTOSERDOV,           2BBK   7
00035     journal=""
00036     for l in inl:
00037         if re.search("\AJRNL",l):
00038             journal+=l[19:72].lower()
00039     journal=re.sub("\s\s+"," ",journal)
00040     return journal

Here is the caller graph for this function:

Definition at line 41 of file parse_pdb_header.py.

00041 
00042 def _get_references(inl):
00043     # REMARK   1 REFERENCE 1                                                  1CSE  11
00044     # REMARK   1  AUTH   W.BODE,E.PAPAMOKOS,D.MUSIL                           1CSE  12
00045     references=[]
00046     actref=""
00047     for l in inl:        
00048         if re.search("\AREMARK   1",l):
00049             if re.search("\AREMARK   1 REFERENCE",l):
00050                 if actref!="":
00051                     actref=re.sub("\s\s+"," ",actref)
00052                     if actref!=" ":
00053                         references.append(actref)
00054                     actref=""
00055             else:
00056                 actref+=l[19:72].lower()
00057 
00058     if actref!="":
00059         actref=re.sub("\s\s+"," ",actref)
00060         if actref!=" ":
00061             references.append(actref)
00062     return references
00063     
00064       
# bring dates to format: 1909-01-08

Here is the caller graph for this function:

def Bio.PDB.parse_pdb_header._nice_case (   line) [private]
Makes A Lowercase String With Capitals.

Definition at line 91 of file parse_pdb_header.py.

00091 
00092 def _nice_case(line):
00093     """Makes A Lowercase String With Capitals."""
00094     l=line.lower()
00095     s=""
00096     i=0
00097     nextCap=1
00098     while i<len(l):
00099         c=l[i]
00100         if c>='a' and c<='z' and nextCap:
00101             c=c.upper()
00102             nextCap=0
00103         elif c==' ' or c=='.' or c==',' or c==';' or c==':' or c=='\t' or\
00104             c=='-' or c=='_':
00105             nextCap=1            
00106         s+=c
00107         i+=1
00108     return s

Here is the caller graph for this function:

Definition at line 128 of file parse_pdb_header.py.

00128 
00129 def _parse_pdb_header_list(header):
00130     # database fields
00131     dict={'name':"",
00132         'head':'',
00133         'deposition_date' : "1909-01-08",
00134         'release_date' : "1909-01-08",
00135         'structure_method' : "unknown",
00136         'resolution' : 0.0,
00137         'structure_reference' : "unknown",
00138         'journal_reference' : "unknown",
00139         'author' : "",
00140         'compound':{'1':{'misc':''}},'source':{'1':{'misc':''}}}
00141 
00142     dict['structure_reference'] = _get_references(header)
00143     dict['journal_reference'] = _get_journal(header)
00144     comp_molid="1"
00145     src_molid="1"
00146     last_comp_key="misc"
00147     last_src_key="misc"
00148 
00149     for hh in header:
00150         h=re.sub("[\s\n\r]*\Z","",hh) # chop linebreaks off
00151         #key=re.sub("\s.+\s*","",h)
00152         key = h[:6].strip()
00153         #tail=re.sub("\A\w+\s+\d*\s*","",h)
00154         tail = h[10:].strip()
00155         # print key+":"+tail
00156         
00157         # From here, all the keys from the header are being parsed
00158         if key=="TITLE":
00159             name=_chop_end_codes(tail).lower()
00160             if 'name' in dict:
00161                 dict['name'] += " "+name
00162             else:
00163                 dict['name']=name
00164         elif key=="HEADER":            
00165             rr=re.search("\d\d-\w\w\w-\d\d",tail)
00166             if rr!=None:
00167                 dict['deposition_date']=_format_date(_nice_case(rr.group()))
00168             head=_chop_end_misc(tail).lower()
00169             dict['head']=head
00170         elif key=="COMPND":            
00171             tt=re.sub("\;\s*\Z","",_chop_end_codes(tail)).lower()
00172             # look for E.C. numbers in COMPND lines
00173             rec = re.search('\d+\.\d+\.\d+\.\d+',tt)
00174             if rec:
00175                 dict['compound'][comp_molid]['ec_number']=rec.group()
00176                 tt=re.sub("\((e\.c\.)*\d+\.\d+\.\d+\.\d+\)","",tt)
00177             tok=tt.split(":")
00178             if len(tok)>=2:
00179                 ckey=tok[0]
00180                 cval=re.sub("\A\s*","",tok[1])
00181                 if ckey=='mol_id':
00182                     dict['compound'][cval]={'misc':''}
00183                     comp_molid=cval
00184                     last_comp_key="misc"
00185                 else:
00186                     dict['compound'][comp_molid][ckey]=cval            
00187                     last_comp_key=ckey
00188             else:
00189                 dict['compound'][comp_molid][last_comp_key]+=tok[0]+" "
00190         elif key=="SOURCE":
00191             tt=re.sub("\;\s*\Z","",_chop_end_codes(tail)).lower()
00192             tok=tt.split(":")
00193             # print tok
00194             if len(tok)>=2:
00195                 ckey=tok[0]
00196                 cval=re.sub("\A\s*","",tok[1])
00197                 if ckey=='mol_id':
00198                     dict['source'][cval]={'misc':''}
00199                     comp_molid=cval
00200                     last_src_key="misc"
00201                 else:
00202                     dict['source'][comp_molid][ckey]=cval            
00203                     last_src_key=ckey
00204             else:
00205                 dict['source'][comp_molid][last_src_key]+=tok[0]+" "
00206         elif key=="KEYWDS":
00207             kwd=_chop_end_codes(tail).lower()
00208             if 'keywords' in dict:
00209                 dict['keywords']+=" "+kwd
00210             else:
00211                 dict['keywords']=kwd
00212         elif key=="EXPDTA":
00213             expd=_chop_end_codes(tail)
00214             # chop junk at end of lines for some structures
00215             expd=re.sub('\s\s\s\s\s\s\s.*\Z','',expd)
00216             # if re.search('\Anmr',expd,re.IGNORECASE): expd='nmr'
00217             # if re.search('x-ray diffraction',expd,re.IGNORECASE): expd='x-ray diffraction'
00218             dict['structure_method']=expd.lower()
00219         elif key=="CAVEAT":
00220             # make Annotation entries out of these!!!
00221             pass
00222         elif key=="REVDAT":
00223             rr=re.search("\d\d-\w\w\w-\d\d",tail)
00224             if rr!=None:
00225                 dict['release_date']=_format_date(_nice_case(rr.group()))
00226         elif key=="JRNL":
00227             # print key,tail
00228             if 'journal' in dict:
00229                 dict['journal']+=tail
00230             else:
00231                 dict['journal']=tail
00232         elif key=="AUTHOR":
00233             auth = _nice_case(_chop_end_codes(tail))
00234             if 'author' in dict:
00235                 dict['author']+=auth
00236             else:
00237                 dict['author']=auth
00238         elif key=="REMARK":
00239             if re.search("REMARK   2 RESOLUTION.",hh):
00240                 r=_chop_end_codes(re.sub("REMARK   2 RESOLUTION.",'',hh))
00241                 r=re.sub("\s+ANGSTROM.*","",r)
00242                 try:
00243                     dict['resolution']=float(r)
00244                 except:
00245                     #print 'nonstandard resolution',r
00246                     dict['resolution']=None
00247         else:
00248             # print key
00249             pass
00250     if dict['structure_method']=='unknown': 
00251         if dict['resolution']>0.0: dict['structure_method']='x-ray diffraction'
00252     return dict

Here is the call graph for this function:

Here is the caller graph for this function:

Returns the header lines of a pdb file as a dictionary.

Dictionary keys are: head, deposition_date, release_date, structure_method,
resolution, structure_reference, journal_reference, author and
compound.

Definition at line 109 of file parse_pdb_header.py.

00109 
00110 def parse_pdb_header(infile):
00111     """
00112     Returns the header lines of a pdb file as a dictionary.
00113 
00114     Dictionary keys are: head, deposition_date, release_date, structure_method,
00115     resolution, structure_reference, journal_reference, author and
00116     compound.
00117     """
00118     header = []
00119     with File.as_handle(infile, 'r') as f:
00120         for l in f:
00121             record_type=l[0:6]
00122             if (record_type=='ATOM  ' or record_type=='HETATM' or
00123                     record_type=='MODEL '):
00124                 break
00125             else:
00126                 header.append(l)
00127     return _parse_pdb_header_list(header)

Here is the call graph for this function:


Variable Documentation

Definition at line 259 of file parse_pdb_header.py.

Definition at line 257 of file parse_pdb_header.py.

Definition at line 258 of file parse_pdb_header.py.