Back to index

python-biopython  1.60
Public Member Functions | Public Attributes | Private Member Functions | Private Attributes
Bio.Motif._Motif.Motif Class Reference

List of all members.

Public Member Functions

def __init__
def add_instance
def set_mask
def pwm
def log_odds
def ic
def exp_score
def search_instances
def score_hit
def search_pwm
def dist_pearson
def dist_pearson_at
def dist_product
def dist_product_at
def dist_dpq
def dist_dpq_at
def __str__
def __len__
def reverse_complement
def make_instances_from_counts
def make_counts_from_instances
def __getitem__
def consensus
def anticonsensus
def max_score
def min_score
def weblogo
def format
def scanPWM

Public Attributes

 instances
 has_instances
 counts
 has_counts
 mask
 alphabet
 length
 background
 beta
 info
 name

Private Member Functions

def _check_length
def _check_alphabet
def _read
def _write
def _to_fasta
def _from_jaspar_pfm
def _from_vert_matrix
def _from_horiz_matrix
def _from_jaspar_sites
def _to_transfac
def _to_vertical_matrix
def _to_horizontal_matrix
def _to_jaspar_pfm
def _pwm_calculate

Private Attributes

 _pwm_is_current
 _pwm
 _log_odds_is_current
 _log_odds

Detailed Description

A class representing sequence motifs.

Definition at line 12 of file _Motif.py.


Constructor & Destructor Documentation

def Bio.Motif._Motif.Motif.__init__ (   self,
  alphabet = IUPAC.unambiguous_dna 
)

Definition at line 16 of file _Motif.py.

00016 
00017     def __init__(self,alphabet=IUPAC.unambiguous_dna):
00018         self.instances = []
00019         self.has_instances=False
00020         self.counts = {}
00021         self.has_counts=False
00022         self.mask = []
00023         self._pwm_is_current = False
00024         self._pwm = []
00025         self._log_odds_is_current = False
00026         self._log_odds = []
00027         self.alphabet=alphabet
00028         self.length=None
00029         self.background=dict((n, 1.0/len(self.alphabet.letters)) \
00030                              for n in self.alphabet.letters)
00031         self.beta=1.0
00032         self.info=None
00033         self.name=""

Here is the caller graph for this function:


Member Function Documentation

def Bio.Motif._Motif.Motif.__getitem__ (   self,
  index 
)
Returns the probability distribution over symbols at a given position, padding with background.

If the requested index is out of bounds, the returned distribution comes from background.

Definition at line 566 of file _Motif.py.

00566 
00567     def __getitem__(self,index):
00568         """Returns the probability distribution over symbols at a given position, padding with background.
00569 
00570         If the requested index is out of bounds, the returned distribution comes from background.
00571         """
00572         if index in range(self.length):
00573             return self.pwm()[index]
00574         else:
00575             return self.background

Here is the call graph for this function:

Here is the caller graph for this function:

return the length of a motif

Please use this method (i.e. invoke len(m)) instead of refering to the m.length directly.

Definition at line 377 of file _Motif.py.

00377 
00378     def __len__(self):
00379         """return the length of a motif
00380 
00381         Please use this method (i.e. invoke len(m)) instead of refering to the m.length directly.
00382         """
00383         if self.length==None:
00384             return 0
00385         else:
00386             return self.length
        
def Bio.Motif._Motif.Motif.__str__ (   self,
  masked = False 
)
string representation of a motif.

Definition at line 361 of file _Motif.py.

00361 
00362     def __str__(self,masked=False):
00363         """ string representation of a motif.
00364         """
00365         str = ""
00366         for inst in self.instances:
00367             str = str + inst.tostring() + "\n"
00368 
00369         if masked:
00370             for i in xrange(self.length):
00371                 if self.mask[i]:
00372                     str = str + "*"
00373                 else:
00374                     str = str + " "
00375             str = str + "\n"
00376         return str

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif._check_alphabet (   self,
  alphabet 
) [private]

Definition at line 41 of file _Motif.py.

00041 
00042     def _check_alphabet(self,alphabet):
00043         if self.alphabet==None:
00044             self.alphabet=alphabet
00045         elif self.alphabet != alphabet:
00046                 raise ValueError("Wrong Alphabet")
        

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif._check_length (   self,
  len 
) [private]

Definition at line 34 of file _Motif.py.

00034 
00035     def _check_length(self, len):
00036         if self.length==None:
00037             self.length = len
00038         elif self.length != len:
00039             print "len",self.length,self.instances, len
00040             raise ValueError("You can't change the length of the motif")

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif._from_horiz_matrix (   self,
  stream,
  letters = None,
  make_instances = False 
) [private]
reads a horizontal count matrix from stream and fill in the counts.

Definition at line 460 of file _Motif.py.

00460 
00461     def _from_horiz_matrix(self,stream,letters=None,make_instances=False):
00462         """reads a horizontal count matrix from stream and fill in the counts.
00463         """
00464         if letters==None:
00465             letters=self.alphabet.letters
00466         self.counts = {}
00467         self.has_counts=True
00468 
00469         for i in letters:
00470             ln = stream.readline().strip().split()
00471             #if there is a letter in the beginning, ignore it
00472             if ln[0]==i:
00473                 ln=ln[1:]
00474             #print ln
00475             try:
00476                 self.counts[i]=map(int,ln)
00477             except ValueError: #not integers
00478                 self.counts[i]=map(float,ln) #map(lambda s: int(100*float(s)),ln)
00479             #print counts[i]
00480         
00481         s = sum(self.counts[nuc][0] for nuc in letters)
00482         l = len(self.counts[letters[0]])
00483         self.length=l
00484         self.set_mask("*"*l)
00485         if make_instances==True:
00486             self.make_instances_from_counts()
00487         return self
00488         

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif._from_jaspar_pfm (   self,
  stream,
  make_instances = False 
) [private]
reads the motif from Jaspar .pfm file

The instances are fake, but the pwm is accurate.

Definition at line 431 of file _Motif.py.

00431 
00432     def _from_jaspar_pfm(self,stream,make_instances=False):
00433         """
00434         reads the motif from Jaspar .pfm file
00435 
00436         The instances are fake, but the pwm is accurate.
00437         """
00438         return self._from_horiz_matrix(stream,letters="ACGT",make_instances=make_instances)

Here is the call graph for this function:

def Bio.Motif._Motif.Motif._from_jaspar_sites (   self,
  stream 
) [private]
reads the motif from Jaspar .sites file

The instances and pwm are OK.

Definition at line 539 of file _Motif.py.

00539 
00540     def _from_jaspar_sites(self,stream):
00541         """
00542         reads the motif from Jaspar .sites file
00543 
00544         The instances and pwm are OK.
00545         """
00546         
00547         while True:
00548             ln = stream.readline()# read the header "$>...."
00549             if ln=="" or ln[0]!=">":
00550                 break
00551             
00552             ln=stream.readline().strip()#read the actual sequence
00553             i=0
00554             while ln[i]==ln[i].lower():
00555                 i+=1
00556             inst=""
00557             while i<len(ln) and ln[i]==ln[i].upper():
00558                 inst+=ln[i]
00559                 i+=1
00560             inst=Seq(inst,self.alphabet)                
00561             self.add_instance(inst)
00562 
00563         self.set_mask("*"*len(inst))
00564         return self
00565 

Here is the call graph for this function:

def Bio.Motif._Motif.Motif._from_vert_matrix (   self,
  stream,
  letters = None,
  make_instances = False 
) [private]
reads a vertical count matrix from stream and fill in the counts.

Definition at line 439 of file _Motif.py.

00439 
00440     def _from_vert_matrix(self,stream,letters=None,make_instances=False):
00441         """reads a vertical count matrix from stream and fill in the counts.
00442         """
00443 
00444         self.counts = {}
00445         self.has_counts=True
00446         if letters==None:
00447             letters=self.alphabet.letters
00448         self.length=0
00449         for i in letters:
00450             self.counts[i]=[]
00451         for ln in stream.readlines():
00452             rec=map(float,ln.strip().split())
00453             for k,v in zip(letters,rec):
00454                 self.counts[k].append(v)
00455             self.length+=1
00456         self.set_mask("*"*self.length)
00457         if make_instances==True:
00458             self.make_instances_from_counts()
00459         return self
        

Here is the call graph for this function:

def Bio.Motif._Motif.Motif._pwm_calculate (   self,
  sequence 
) [private]

Definition at line 789 of file _Motif.py.

00789 
00790     def _pwm_calculate(self, sequence):
00791         logodds = self.log_odds()
00792         m = len(logodds)
00793         s = len(sequence)
00794         n = s - m + 1
00795         result = [None] * n
00796         for i in xrange(n):
00797             score = 0.0
00798             for j in xrange(m):
00799                 c = sequence[i+j]
00800                 temp = logodds[j].get(c)
00801                 if temp==None:
00802                     break
00803                 score += temp
00804             else:
00805                 result[i] = score
00806         return result

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif._read (   self,
  stream 
) [private]
Reads the motif from the stream (in AlignAce format).

the self.alphabet variable must be set beforehand.
If the last line contains asterisks it is used for setting mask

Definition at line 347 of file _Motif.py.

00347 
00348     def _read(self,stream):
00349         """Reads the motif from the stream (in AlignAce format).
00350 
00351         the self.alphabet variable must be set beforehand.
00352         If the last line contains asterisks it is used for setting mask
00353         """
00354         
00355         while 1:
00356             ln = stream.readline()
00357             if "*" in ln:
00358                 self.set_mask(ln.strip("\n\c"))
00359                 break
00360             self.add_instance(Seq(ln.strip(),self.alphabet))
        

Here is the call graph for this function:

def Bio.Motif._Motif.Motif._to_fasta (   self) [private]
FASTA representation of motif

Definition at line 396 of file _Motif.py.

00396 
00397     def _to_fasta(self):
00398         """
00399         FASTA representation of motif
00400         """
00401         if not self.has_instances:
00402             self.make_instances_from_counts()
00403         str = ""
00404         for i,inst in enumerate(self.instances):
00405             str = str + ">instance%d\n"%i + inst.tostring() + "\n"
00406             
00407         return str       

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif._to_horizontal_matrix (   self,
  letters = None,
  normalized = True 
) [private]
Return string representation of the motif as  a matrix.

Definition at line 713 of file _Motif.py.

00713 
00714     def _to_horizontal_matrix(self,letters=None,normalized=True):
00715         """Return string representation of the motif as  a matrix.
00716         
00717         """
00718         if letters==None:
00719             letters=self.alphabet.letters
00720         res=""
00721         if normalized: #output PWM
00722             self._pwm_is_current=False
00723             mat=self.pwm(laplace=False)
00724             for a in letters:
00725                 res+="\t".join([str(mat[i][a]) for i in range(self.length)])
00726                 res+="\n"
00727         else: #output counts
00728             if not self.has_counts:
00729                 self.make_counts_from_instances()
00730             mat=self.counts
00731             for a in letters:
00732                 res+="\t".join([str(mat[a][i]) for i in range(self.length)])
00733                 res+="\n"
00734         return res

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif._to_jaspar_pfm (   self) [private]
Returns the pfm representation of the motif

Definition at line 735 of file _Motif.py.

00735 
00736     def _to_jaspar_pfm(self):
00737         """Returns the pfm representation of the motif
00738         """
00739         return self._to_horizontal_matrix(normalized=False,letters="ACGT")

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif._to_transfac (   self) [private]
Write the representation of a motif in TRANSFAC format

Definition at line 674 of file _Motif.py.

00674 
00675     def _to_transfac(self):
00676         """Write the representation of a motif in TRANSFAC format
00677         """
00678         res="XX\nTY Motif\n" #header
00679         try:
00680             res+="ID %s\n"%self.name
00681         except:
00682             pass
00683         res+="BF undef\nP0"
00684         for a in self.alphabet.letters:
00685             res+=" %s"%a
00686         res+="\n"
00687         if not self.has_counts:
00688             self.make_counts_from_instances()
00689         for i in range(self.length):
00690             if i<9:
00691                 res+="0%d"%(i+1)
00692             else:
00693                 res+="%d"%(i+1)
00694             for a in self.alphabet.letters:
00695                 res+=" %d"%self.counts[a][i]
00696             res+="\n"
00697         res+="XX\n"
00698         return res

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif._to_vertical_matrix (   self,
  letters = None 
) [private]
Return string representation of the motif as  a matrix.

Definition at line 699 of file _Motif.py.

00699 
00700     def _to_vertical_matrix(self,letters=None):
00701         """Return string representation of the motif as  a matrix.
00702         
00703         """
00704         if letters==None:
00705             letters=self.alphabet.letters
00706         self._pwm_is_current=False
00707         pwm=self.pwm(laplace=False)
00708         res=""
00709         for i in range(self.length):
00710             res+="\t".join([str(pwm[i][a]) for a in letters])
00711             res+="\n"
00712         return res
    

Here is the call graph for this function:

def Bio.Motif._Motif.Motif._write (   self,
  stream 
) [private]
writes the motif to the stream

Definition at line 387 of file _Motif.py.

00387 
00388     def _write(self,stream):
00389         """
00390         writes the motif to the stream
00391         """
00392 
00393         stream.write(self.__str__())
00394             
00395             

Here is the call graph for this function:

def Bio.Motif._Motif.Motif.add_instance (   self,
  instance 
)
adds new instance to the motif

Definition at line 47 of file _Motif.py.

00047 
00048     def add_instance(self,instance):
00049         """
00050         adds new instance to the motif
00051         """
00052         self._check_alphabet(instance.alphabet)
00053         self._check_length(len(instance))
00054         if self.has_counts:
00055             for i in range(self.length):
00056                 let=instance[i]
00057                 self.counts[let][i]+=1
00058 
00059         if self.has_instances or not self.has_counts:
00060             self.instances.append(instance)
00061             self.has_instances=True
00062             
00063         self._pwm_is_current = False
00064         self._log_odds_is_current = False
00065 
 

Here is the call graph for this function:

Here is the caller graph for this function:

returns the least probable pattern to be generated from this motif.

Definition at line 590 of file _Motif.py.

00590 
00591     def anticonsensus(self):
00592         """returns the least probable pattern to be generated from this motif.
00593         """
00594         res=""
00595         for i in range(self.length):
00596             min_f=10.0
00597             min_n="X"
00598             for n in sorted(self[i]):
00599                 if self[i][n]<min_f:
00600                     min_f=self[i][n]
00601                     min_n=n
00602             res+=min_n
00603         return Seq(res,self.alphabet)

Here is the caller graph for this function:

Returns the consensus sequence of a motif.

Definition at line 576 of file _Motif.py.

00576 
00577     def consensus(self):
00578         """Returns the consensus sequence of a motif.
00579         """
00580         res=""
00581         for i in range(self.length):
00582             max_f=0
00583             max_n="X"
00584             for n in sorted(self[i]):
00585                 if self[i][n]>max_f:
00586                     max_f=self[i][n]
00587                     max_n=n
00588             res+=max_n
00589         return Seq(res,self.alphabet)

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif.dist_dpq (   self,
  other 
)

Definition at line 286 of file _Motif.py.

00286 
00287     def dist_dpq(self,other):
00288         r"""Calculates the DPQ distance measure between motifs.
00289 
00290         It is calculated as a maximal value of DPQ formula (shown using LaTeX
00291         markup, familiar to mathematicians):
00292         
00293         \sqrt{\sum_{i=1}^{alignment.len()} \sum_{k=1}^alphabet.len() \
00294         \{ m1[i].freq(alphabet[k])*log_2(m1[i].freq(alphabet[k])/m2[i].freq(alphabet[k])) +
00295            m2[i].freq(alphabet[k])*log_2(m2[i].freq(alphabet[k])/m1[i].freq(alphabet[k]))
00296         }
00297         
00298         over possible non-spaced alignemts of two motifs.  See this reference:
00299 
00300         D. M Endres and J. E Schindelin, "A new metric for probability
00301         distributions", IEEE transactions on Information Theory 49, no. 7
00302         (July 2003): 1858-1860.
00303         """
00304         
00305         min_d=float("inf")
00306         min_o=-1
00307         d_s=[]
00308         for offset in range(-self.length+1,other.length):
00309             #print "%2.3d"%offset,
00310             if offset<0:
00311                 d = self.dist_dpq_at(other,-offset)
00312                 overlap = self.length+offset
00313             else: #offset>=0
00314                 d = other.dist_dpq_at(self,offset)
00315                 overlap = other.length-offset
00316             overlap = min(self.length,other.length,overlap)
00317             out = self.length+other.length-2*overlap
00318             #print d,1.0*(overlap+out)/overlap,d*(overlap+out)/overlap
00319             #d = d/(2*overlap)
00320             d = (d/(out+overlap))*(2*overlap+out)/(2*overlap)
00321             #print d
00322             d_s.append((offset,d))
00323             if min_d> d:
00324                 min_d=d
00325                 min_o=-offset
00326         return min_d,min_o#,d_s
            

Here is the call graph for this function:

def Bio.Motif._Motif.Motif.dist_dpq_at (   self,
  other,
  offset 
)
calculates the dist_dpq measure with a given offset.

offset should satisfy 0<=offset<=len(self)

Definition at line 327 of file _Motif.py.

00327 
00328     def dist_dpq_at(self,other,offset):
00329         """
00330         calculates the dist_dpq measure with a given offset.
00331 
00332         offset should satisfy 0<=offset<=len(self)
00333         """
00334         def dpq (f1,f2,alpha):
00335             s=0
00336             for n in alpha.letters:
00337                 avg=(f1[n]+f2[n])/2
00338                 s+=f1[n]*math.log(f1[n]/avg,2)+f2[n]*math.log(f2[n]/avg,2)
00339             return math.sqrt(s)
00340                 
00341         s=0        
00342         for i in range(max(self.length,offset+other.length)):
00343             f1=self[i]
00344             f2=other[i-offset]
00345             s+=dpq(f1,f2,self.alphabet)
00346         return s
            

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif.dist_pearson (   self,
  motif,
  masked = 0 
)
return the similarity score based on pearson correlation for the given motif against self.

We use the Pearson's correlation of the respective probabilities.

Definition at line 217 of file _Motif.py.

00217 
00218     def dist_pearson(self, motif, masked = 0):
00219         """
00220         return the similarity score based on pearson correlation for the given motif against self.
00221 
00222         We use the Pearson's correlation of the respective probabilities.
00223         """
00224 
00225         if self.alphabet != motif.alphabet:
00226             raise ValueError("Cannot compare motifs with different alphabets")
00227 
00228         max_p=-2
00229         for offset in range(-self.length+1,motif.length):
00230             if offset<0:
00231                 p = self.dist_pearson_at(motif,-offset)
00232             else: #offset>=0
00233                 p = motif.dist_pearson_at(self,offset)
00234             
00235             if max_p<p:
00236                 max_p=p
00237                 max_o=-offset
00238         return 1-max_p,max_o

Here is the call graph for this function:

def Bio.Motif._Motif.Motif.dist_pearson_at (   self,
  motif,
  offset 
)

Definition at line 239 of file _Motif.py.

00239 
00240     def dist_pearson_at(self,motif,offset):
00241         sxx = 0 # \sum x^2
00242         sxy = 0 # \sum x \cdot y
00243         sx = 0  # \sum x
00244         sy = 0  # \sum y
00245         syy = 0 # \sum x^2
00246         norm=max(self.length,offset+motif.length)
00247         
00248         for pos in range(max(self.length,offset+motif.length)):
00249             for l in self.alphabet.letters:
00250                 xi = self[pos][l]
00251                 yi = motif[pos-offset][l]
00252                 sx = sx + xi
00253                 sy = sy + yi
00254                 sxx = sxx + xi * xi
00255                 syy = syy + yi * yi
00256                 sxy = sxy + xi * yi
00257 
00258         norm *= len(self.alphabet.letters)
00259         s1 = (sxy - sx*sy*1.0/norm)
00260         s2 = (norm*sxx - sx*sx*1.0)*(norm*syy- sy*sy*1.0)
00261         return s1/math.sqrt(s2)

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif.dist_product (   self,
  other 
)
A similarity measure taking into account a product probability of generating overlaping instances of two motifs

Definition at line 262 of file _Motif.py.

00262 
00263     def dist_product(self,other):
00264         """
00265         A similarity measure taking into account a product probability of generating overlaping instances of two motifs
00266         """
00267         max_p=0.0
00268         for offset in range(-self.length+1,other.length):
00269             if offset<0:
00270                 p = self.dist_product_at(other,-offset)
00271             else: #offset>=0
00272                 p = other.dist_product_at(self,offset)
00273             if max_p<p:
00274                 max_p=p
00275                 max_o=-offset
00276         return 1-max_p/self.dist_product_at(self,0),max_o
            

Here is the call graph for this function:

def Bio.Motif._Motif.Motif.dist_product_at (   self,
  other,
  offset 
)

Definition at line 277 of file _Motif.py.

00277 
00278     def dist_product_at(self,other,offset):
00279         s=0
00280         for i in range(max(self.length,offset+other.length)):
00281             f1=self[i]
00282             f2=other[i-offset]
00283             for n,b in self.background.iteritems():
00284                 s+=b*f1[n]*f2[n]
00285         return s/i

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif.exp_score (   self,
  st_dev = False 
)
Computes expected score of motif's instance and its standard deviation

Definition at line 147 of file _Motif.py.

00147 
00148     def exp_score(self,st_dev=False):
00149         """
00150         Computes expected score of motif's instance and its standard deviation
00151         """
00152         exs=0.0
00153         var=0.0
00154         pwm=self.pwm()
00155         for i in range(self.length):
00156             ex1=0.0
00157             ex2=0.0
00158             for a in self.alphabet.letters:
00159                 if pwm[i][a]!=0:
00160                     ex1+=pwm[i][a]*(math.log(pwm[i][a],2)-math.log(self.background[a],2))
00161                     ex2+=pwm[i][a]*(math.log(pwm[i][a],2)-math.log(self.background[a],2))**2
00162             exs+=ex1
00163             var+=ex2-ex1**2
00164         if st_dev:
00165             return exs,math.sqrt(var)
00166         else:
00167             return exs

Here is the call graph for this function:

def Bio.Motif._Motif.Motif.format (   self,
  format 
)
Returns a string representation of the Motif in a given format

Currently supported fromats:
 - jaspar-pfm : JASPAR Position Frequency Matrix
 - transfac : TRANSFAC like files
 - fasta : FASTA file with instances

Definition at line 740 of file _Motif.py.

00740 
00741     def format(self,format):
00742         """Returns a string representation of the Motif in a given format
00743 
00744         Currently supported fromats:
00745          - jaspar-pfm : JASPAR Position Frequency Matrix
00746          - transfac : TRANSFAC like files
00747          - fasta : FASTA file with instances
00748         """
00749 
00750         formatters={
00751             "jaspar-pfm":   self._to_jaspar_pfm,
00752             "transfac":     self._to_transfac,
00753             "fasta" :       self._to_fasta,
00754             }
00755 
00756         try:
00757             return formatters[format]()
00758         except KeyError:
00759             raise ValueError("Wrong format type")

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif.ic (   self)
Method returning the information content of a motif.

Definition at line 135 of file _Motif.py.

00135 
00136     def ic(self):
00137         """Method returning the information content of a motif.
00138         """
00139         res=0
00140         pwm=self.pwm()
00141         for i in range(self.length):
00142             res+=2
00143             for a in self.alphabet.letters:
00144                 if pwm[i][a]!=0:
00145                     res+=pwm[i][a]*math.log(pwm[i][a],2)
00146         return res

Here is the call graph for this function:

def Bio.Motif._Motif.Motif.log_odds (   self,
  laplace = True 
)
returns the logg odds matrix computed for the set of instances

Definition at line 117 of file _Motif.py.

00117 
00118     def log_odds(self,laplace=True):
00119         """
00120         returns the logg odds matrix computed for the set of instances
00121         """
00122         
00123         if self._log_odds_is_current:
00124             return self._log_odds
00125         #we need to compute new pwm
00126         self._log_odds = []
00127         pwm=self.pwm(laplace)
00128         for i in xrange(self.length):
00129             d = {}
00130             for a in self.alphabet.letters:
00131                     d[a]=math.log(pwm[i][a]/self.background[a],2)
00132             self._log_odds.append(d)
00133         self._log_odds_is_current=1
00134         return self._log_odds

Here is the call graph for this function:

Here is the caller graph for this function:

Creates the count matrix for a motif with instances.

Definition at line 519 of file _Motif.py.

00519 
00520     def make_counts_from_instances(self):
00521         """Creates the count matrix for a motif with instances.
00522 
00523         """
00524         #make strings for "columns" of motifs
00525         #col[i] is a column taken from aligned motif instances
00526         counts={}
00527         for a in self.alphabet.letters:
00528             counts[a]=[]
00529         self.has_counts=True
00530         s = len(self.instances)
00531         for i in range(self.length):
00532             ci = dict((a,0) for a in self.alphabet.letters)
00533             for inst in self.instances:
00534                 ci[inst[i]]+=1
00535             for a in self.alphabet.letters:
00536                 counts[a].append(ci[a])
00537         self.counts=counts
00538         return counts

Here is the caller graph for this function:

Creates "fake" instances for a motif created from a count matrix.

In case the sums of counts are different for different columnes, the
shorter columns are padded with background.

Definition at line 489 of file _Motif.py.

00489 
00490     def make_instances_from_counts(self):
00491         """Creates "fake" instances for a motif created from a count matrix.
00492 
00493         In case the sums of counts are different for different columnes, the
00494         shorter columns are padded with background.
00495         """
00496         alpha="".join(self.alphabet.letters)
00497         #col[i] is a column taken from aligned motif instances
00498         col=[]
00499         self.has_instances=True
00500         self.instances=[]
00501         s = sum(map(lambda nuc: self.counts[nuc][0],self.alphabet.letters))
00502         for i in range(self.length):
00503             col.append("")
00504             for n in self.alphabet.letters:
00505                 col[i] = col[i]+ (n*(self.counts[n][i]))
00506             if len(col[i])<s:
00507                 print "WARNING, column too short",len(col[i]),s
00508                 col[i]+=(alpha*s)[:(s-len(col[i]))]
00509             #print i,col[i]
00510         #iterate over instances
00511         for i in range(s): 
00512             inst="" #start with empty seq
00513             for j in range(self.length): #iterate over positions
00514                 inst+=col[j][i]
00515             #print i,inst
00516             inst=Seq(inst,self.alphabet)                
00517             self.add_instance(inst)
00518         return self.instances

Here is the call graph for this function:

Here is the caller graph for this function:

Maximal possible score for this motif.

returns the score computed for the consensus sequence.

Definition at line 604 of file _Motif.py.

00604 
00605     def max_score(self):
00606         """Maximal possible score for this motif.
00607 
00608         returns the score computed for the consensus sequence.
00609         """
00610         return self.score_hit(self.consensus(),0)
    

Here is the call graph for this function:

Minimal possible score for this motif.

returns the score computed for the anticonsensus sequence.

Definition at line 611 of file _Motif.py.

00611 
00612     def min_score(self):
00613         """Minimal possible score for this motif.
00614 
00615         returns the score computed for the anticonsensus sequence.
00616         """
00617         return self.score_hit(self.anticonsensus(),0)

Here is the call graph for this function:

def Bio.Motif._Motif.Motif.pwm (   self,
  laplace = True 
)
returns the PWM computed for the set of instances

if laplace=True (default), pseudocounts equal to self.background multiplied by self.beta are added to all positions.

Definition at line 82 of file _Motif.py.

00082 
00083     def pwm(self,laplace=True):
00084         """
00085         returns the PWM computed for the set of instances
00086 
00087         if laplace=True (default), pseudocounts equal to self.background multiplied by self.beta are added to all positions.
00088         """
00089         
00090         if self._pwm_is_current:
00091             return self._pwm
00092         #we need to compute new pwm
00093         self._pwm = []
00094         for i in xrange(self.length):
00095             dict = {}
00096             #filling the dict with 0's
00097             for letter in self.alphabet.letters:
00098                 if laplace:
00099                     dict[letter]=self.beta*self.background[letter]
00100                 else:
00101                     dict[letter]=0.0
00102             if self.has_counts:
00103                 #taking the raw counts
00104                 for letter in self.alphabet.letters:
00105                     dict[letter]+=self.counts[letter][i]
00106             elif self.has_instances:
00107                 #counting the occurences of letters in instances
00108                 for seq in self.instances:
00109                     #dict[seq[i]]=dict[seq[i]]+1
00110                     try:
00111                         dict[seq[i]]+=1
00112                     except KeyError: #we need to ignore non-alphabet letters
00113                         pass
00114             self._pwm.append(FreqTable.FreqTable(dict,FreqTable.COUNT,self.alphabet)) 
00115         self._pwm_is_current=1
00116         return self._pwm

Here is the caller graph for this function:

Gives the reverse complement of the motif

Definition at line 408 of file _Motif.py.

00408 
00409     def reverse_complement(self):
00410         """
00411         Gives the reverse complement of the motif
00412         """
00413         res = Motif()
00414         if self.has_instances:
00415             for i in self.instances:
00416                 res.add_instance(i.reverse_complement())
00417         else: # has counts
00418             res.has_counts=True
00419             res.counts["A"]=self.counts["T"][:]
00420             res.counts["T"]=self.counts["A"][:]
00421             res.counts["G"]=self.counts["C"][:]
00422             res.counts["C"]=self.counts["G"][:]
00423             res.counts["A"].reverse()
00424             res.counts["C"].reverse()
00425             res.counts["G"].reverse()
00426             res.counts["T"].reverse()
00427             res.length=self.length
00428         res.mask = self.mask
00429         return res
00430 
        

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif.scanPWM (   self,
  seq 
)
Matrix of log-odds scores for a nucleotide sequence.
 
scans a nucleotide sequence and returns the matrix of log-odds
scores for all positions.

- the result is a one-dimensional list or numpy array
- the sequence can only be a DNA sequence
- the search is performed only on one strand

Definition at line 760 of file _Motif.py.

00760 
00761     def scanPWM(self,seq):
00762         """Matrix of log-odds scores for a nucleotide sequence.
00763  
00764         scans a nucleotide sequence and returns the matrix of log-odds
00765         scores for all positions.
00766 
00767         - the result is a one-dimensional list or numpy array
00768         - the sequence can only be a DNA sequence
00769         - the search is performed only on one strand
00770         """
00771         if self.alphabet!=IUPAC.unambiguous_dna:
00772             raise ValueError("Wrong alphabet! Use only with DNA motifs")
00773         if seq.alphabet!=IUPAC.unambiguous_dna:
00774             raise ValueError("Wrong alphabet! Use only with DNA sequences")
00775 
00776         seq = seq.tostring()
00777 
00778         # check if the fast C code can be used
00779         try:
00780             import _pwm
00781         except ImportError:
00782             # use the slower Python code otherwise
00783             return self._pwm_calculate(seq)
00784         
00785         # get the log-odds matrix into a proper shape
00786         # (each row contains sorted (ACGT) log-odds values)
00787         logodds=[[y[1] for y in sorted(x.items())] for x in self.log_odds()]
00788         return _pwm.calculate(seq, logodds)

Here is the call graph for this function:

def Bio.Motif._Motif.Motif.score_hit (   self,
  sequence,
  position,
  normalized = 0,
  masked = 0 
)
give the pwm score for a given position

Definition at line 180 of file _Motif.py.

00180 
00181     def score_hit(self,sequence,position,normalized=0,masked=0):
00182         """
00183         give the pwm score for a given position
00184         """
00185         lo=self.log_odds()
00186         score = 0.0
00187         for pos in xrange(self.length):
00188             a = sequence[position+pos]
00189             if not masked or self.mask[pos]:
00190                 try:
00191                     score += lo[pos][a]
00192                 except:
00193                     pass
00194         if normalized:
00195             if not masked:
00196                 score/=self.length
00197             else:
00198                 score/=len([x for x in self.mask if x])
00199         return score
    

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif.search_instances (   self,
  sequence 
)
a generator function, returning found positions of instances of the motif in a given sequence

Definition at line 168 of file _Motif.py.

00168 
00169     def search_instances(self,sequence):
00170         """
00171         a generator function, returning found positions of instances of the motif in a given sequence
00172         """
00173         if not self.has_instances:
00174             raise ValueError ("This motif has no instances")
00175         for pos in xrange(0,len(sequence)-self.length+1):
00176             for instance in self.instances:
00177                 if instance.tostring()==sequence[pos:pos+self.length].tostring():
00178                     yield(pos,instance)
00179                     break # no other instance will fit (we don't want to return multiple hits)

Here is the call graph for this function:

def Bio.Motif._Motif.Motif.search_pwm (   self,
  sequence,
  normalized = 0,
  masked = 0,
  threshold = 0.0,
  both = True 
)
a generator function, returning found hits in a given sequence with the pwm score higher than the threshold

Definition at line 200 of file _Motif.py.

00200 
00201     def search_pwm(self,sequence,normalized=0,masked=0,threshold=0.0,both=True):
00202         """
00203         a generator function, returning found hits in a given sequence with the pwm score higher than the threshold
00204         """
00205         if both:
00206             rc = self.reverse_complement()
00207             
00208         sequence=sequence.tostring().upper()
00209         for pos in xrange(0,len(sequence)-self.length+1):
00210             score = self.score_hit(sequence,pos,normalized,masked)
00211             if score > threshold:
00212                 yield (pos,score)
00213             if both:
00214                 rev_score = rc.score_hit(sequence,pos,normalized,masked)
00215                 if rev_score > threshold:
00216                     yield (-pos,rev_score)

Here is the call graph for this function:

def Bio.Motif._Motif.Motif.set_mask (   self,
  mask 
)
sets the mask for the motif

The mask should be a string containing asterisks in the position of significant columns and spaces in other columns

Definition at line 66 of file _Motif.py.

00066 
00067     def set_mask(self,mask):
00068         """
00069         sets the mask for the motif
00070 
00071         The mask should be a string containing asterisks in the position of significant columns and spaces in other columns
00072         """
00073         self._check_length(len(mask))
00074         self.mask=[]
00075         for char in mask:
00076             if char=="*":
00077                 self.mask.append(1)
00078             elif char==" ":
00079                 self.mask.append(0)
00080             else:
00081                 raise ValueError("Mask should contain only '*' or ' ' and not a '%s'"%char)
    

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Motif._Motif.Motif.weblogo (   self,
  fname,
  format = "PNG",
  kwds 
)
uses the Berkeley weblogo service to download and save a weblogo of itself

requires an internet connection.
The parameters from **kwds are passed directly to the weblogo server.

Definition at line 618 of file _Motif.py.

00618 
00619     def weblogo(self,fname,format="PNG",**kwds):
00620         """
00621         uses the Berkeley weblogo service to download and save a weblogo of itself
00622         
00623         requires an internet connection.
00624         The parameters from **kwds are passed directly to the weblogo server.
00625         """
00626         import urllib
00627         import urllib2
00628         al= self._to_fasta()
00629         url = 'http://weblogo.berkeley.edu/logo.cgi'
00630         values = {'sequence' : al,
00631                   'format' : format,
00632                   'logowidth' : '18',
00633                   'logoheight' : '5',
00634                   'logounits' : 'cm',
00635                   'kind' : 'AUTO',
00636                   'firstnum' : "1",
00637                   'command' : 'Create Logo',
00638                   'smallsamplecorrection' : "on",
00639                   'symbolsperline' : 32,
00640                   'res' : '96',
00641                   'res_units' : 'ppi',
00642                   'antialias' : 'on',
00643                   'title' : '',
00644                   'barbits' : '',
00645                   'xaxis': 'on',
00646                   'xaxis_label'  : '',
00647                   'yaxis': 'on',
00648                   'yaxis_label' : '',
00649                   'showends' : 'on',
00650                   'shrink' : '0.5',
00651                   'fineprint' : 'on',
00652                   'ticbits' : '1',
00653                   'colorscheme' : 'DEFAULT',
00654                   'color1' : 'green',
00655                   'color2' : 'blue',
00656                   'color3' : 'red',
00657                   'color4' : 'black',
00658                   'color5' : 'purple',
00659                   'color6' : 'orange',
00660                   'color1' : 'black',
00661                   }
00662         for k,v in kwds.iteritems():
00663             values[k]=str(v)
00664             
00665         data = urllib.urlencode(values)
00666         req = urllib2.Request(url, data)
00667         response = urllib2.urlopen(req)
00668         f=open(fname,"w")
00669         im=response.read()
00670         
00671         f.write(im)
00672         f.close()
00673   

Here is the call graph for this function:


Member Data Documentation

Definition at line 25 of file _Motif.py.

Definition at line 24 of file _Motif.py.

Definition at line 23 of file _Motif.py.

Definition at line 22 of file _Motif.py.

Definition at line 26 of file _Motif.py.

Definition at line 28 of file _Motif.py.

Definition at line 30 of file _Motif.py.

Definition at line 19 of file _Motif.py.

Definition at line 20 of file _Motif.py.

Definition at line 18 of file _Motif.py.

Definition at line 31 of file _Motif.py.

Definition at line 17 of file _Motif.py.

Definition at line 27 of file _Motif.py.

Definition at line 21 of file _Motif.py.

Definition at line 32 of file _Motif.py.


The documentation for this class was generated from the following file: