Back to index

python-biopython  1.60
Functions
Bio.SeqUtils.lcc Namespace Reference

Functions

def lcc_mult
def lcc_simp

Function Documentation

def Bio.SeqUtils.lcc.lcc_mult (   seq,
  wsize 
)
Local Composition Complexity (LCC) values over sliding window.

Returns a list of floats, the LCC values for a sliding window over
the sequence.

seq - an unambiguous DNA sequence (a string or Seq object)
wsize - window size, integer

The result is the same as applying lcc_simp multiple times, but this
version is optimized for speed. The optimization works by using the
value of previous window as a base to compute the next one.

Definition at line 9 of file lcc.py.

00009 
00010 def lcc_mult(seq,wsize):
00011     """Local Composition Complexity (LCC) values over sliding window.
00012 
00013     Returns a list of floats, the LCC values for a sliding window over
00014     the sequence.
00015 
00016     seq - an unambiguous DNA sequence (a string or Seq object)
00017     wsize - window size, integer
00018 
00019     The result is the same as applying lcc_simp multiple times, but this
00020     version is optimized for speed. The optimization works by using the
00021     value of previous window as a base to compute the next one."""
00022     l2 = math.log(2)
00023     tamseq = len(seq)
00024     try:
00025         #Assume its a string
00026         upper = seq.upper()
00027     except AttributeError:
00028         #Should be a Seq object then
00029         upper = seq.tostring().upper()
00030     compone = [0]
00031     lccsal = [0]
00032     for i in range(wsize):
00033         compone.append(((i+1)/float(wsize))*
00034                        ((math.log((i+1)/float(wsize)))/l2))
00035     window = seq[0:wsize]
00036     cant_a = window.count('A')
00037     cant_c = window.count('C')
00038     cant_t = window.count('T')
00039     cant_g = window.count('G')
00040     term_a = compone[cant_a]
00041     term_c = compone[cant_c]
00042     term_t = compone[cant_t]
00043     term_g = compone[cant_g]
00044     lccsal.append(-(term_a+term_c+term_t+term_g))
00045     tail = seq[0]
00046     for x in range (tamseq-wsize):
00047         window = upper[x+1:wsize+x+1]
00048         if tail==window[-1]:
00049             lccsal.append(lccsal[-1])
00050         elif tail=='A':
00051             cant_a -= 1
00052             if window.endswith('C'):
00053                 cant_c += 1
00054                 term_a = compone[cant_a]
00055                 term_c = compone[cant_c]
00056                 lccsal.append(-(term_a+term_c+term_t+term_g))
00057             elif window.endswith('T'):
00058                 cant_t += 1
00059                 term_a = compone[cant_a]
00060                 term_t = compone[cant_t]
00061                 lccsal.append(-(term_a+term_c+term_t+term_g))
00062             elif window.endswith('G'):
00063                 cant_g += 1
00064                 term_a = compone[cant_a]
00065                 term_g = compone[cant_g]
00066                 lccsal.append(-(term_a+term_c+term_t+term_g))
00067         elif tail=='C':
00068             cant_c -= 1
00069             if window.endswith('A'):
00070                 cant_a += 1
00071                 term_a = compone[cant_a]
00072                 term_c = compone[cant_c]
00073                 lccsal.append(-(term_a+term_c+term_t+term_g))
00074             elif window.endswith('T'):
00075                 cant_t += 1
00076                 term_c = compone[cant_c]
00077                 term_t = compone[cant_t]
00078                 lccsal.append(-(term_a+term_c+term_t+term_g))
00079             elif window.endswith('G'):
00080                 cant_g += 1
00081                 term_c = compone[cant_c]
00082                 term_g = compone[cant_g]
00083                 lccsal.append(-(term_a+term_c+term_t+term_g))
00084         elif tail=='T':
00085             cant_t -= 1
00086             if window.endswith('A'):
00087                 cant_a += 1
00088                 term_a = compone[cant_a]
00089                 term_t = compone[cant_t]
00090                 lccsal.append(-(term_a+term_c+term_t+term_g))
00091             elif window.endswith('C'):
00092                 cant_c += 1
00093                 term_c = compone[cant_c]
00094                 term_t = compone[cant_t]
00095                 lccsal.append(-(term_a+term_c+term_t+term_g))
00096             elif window.endswith('G'):
00097                 cant_g += 1
00098                 term_t = compone[cant_t]
00099                 term_g = compone[cant_g]
00100                 lccsal.append(-(term_a+term_c+term_t+term_g))
00101         elif tail=='G':
00102             cant_g -= 1
00103             if window.endswith('A'):
00104                 cant_a += 1
00105                 term_a = compone[cant_a]
00106                 term_g = compone[cant_g]
00107                 lccsal.append(-(term_a+term_c+term_t+term_g))
00108             elif window.endswith('C'):
00109                 cant_c += 1
00110                 term_c = compone[cant_c]
00111                 term_g = compone[cant_g]
00112                 lccsal.append(-(term_a+term_c+term_t+term_g))
00113             elif window.endswith('T'):
00114                 cant_t += 1
00115                 term_t = compone[cant_t]
00116                 term_g = compone[cant_g]
00117                 lccsal.append(-(term_a+term_c+term_t+term_g))
00118         tail = window[0]
00119     return lccsal

Here is the caller graph for this function:

Local Composition Complexity (LCC) for a sequence.

seq - an unambiguous DNA sequence (a string or Seq object)

Returns the Local Composition Complexity (LCC) value for the entire
sequence (as a float).

Reference:
Andrzej K Konopka (2005) Sequence Complexity and Composition
DOI: 10.1038/npg.els.0005260

Definition at line 120 of file lcc.py.

00120 
00121 def lcc_simp(seq):
00122     """Local Composition Complexity (LCC) for a sequence.
00123 
00124     seq - an unambiguous DNA sequence (a string or Seq object)
00125     
00126     Returns the Local Composition Complexity (LCC) value for the entire
00127     sequence (as a float).
00128 
00129     Reference:
00130     Andrzej K Konopka (2005) Sequence Complexity and Composition
00131     DOI: 10.1038/npg.els.0005260
00132     """
00133     wsize = len(seq)
00134     try:
00135         #Assume its a string
00136         upper = seq.upper()
00137     except AttributeError:
00138         #Should be a Seq object then
00139         upper = seq.tostring().upper()
00140     l2 = math.log(2)
00141     if 'A' not in seq:
00142         term_a = 0
00143         # Check to avoid calculating the log of 0.
00144     else:
00145         term_a = ((upper.count('A'))/float(wsize))*((math.log((upper.count('A'))
00146                                                           /float(wsize)))/l2)
00147     if 'C' not in seq:
00148         term_c = 0
00149     else:
00150         term_c = ((upper.count('C'))/float(wsize))*((math.log((upper.count('C'))
00151                                                           /float(wsize)))/l2)
00152     if 'T' not in seq:
00153         term_t = 0
00154     else:
00155         term_t = ((upper.count('T'))/float(wsize))*((math.log((upper.count('T'))
00156                                                           /float(wsize)))/l2)
00157     if 'G' not in seq:
00158         term_g = 0
00159     else:
00160         term_g = ((upper.count('G'))/float(wsize))*((math.log((upper.count('G'))
00161                                                           /float(wsize)))/l2)
00162     return -(term_a+term_c+term_t+term_g)

Here is the caller graph for this function: