Back to index

python-biopython  1.60
Classes | Functions
Bio.LogisticRegression Namespace Reference

Classes

class  LogisticRegression

Functions

def train
def calculate
def classify

Function Documentation

def Bio.LogisticRegression.calculate (   lr,
  x 
)
calculate(lr, x) -> list of probabilities

Calculate the probability for each class.  lr is a
LogisticRegression object.  x is the observed data.  Returns a
list of the probability that it fits each class.

Definition at line 113 of file LogisticRegression.py.

00113 
00114 def calculate(lr, x):
00115     """calculate(lr, x) -> list of probabilities
00116 
00117     Calculate the probability for each class.  lr is a
00118     LogisticRegression object.  x is the observed data.  Returns a
00119     list of the probability that it fits each class.
00120 
00121     """
00122     # Insert a constant term for x.
00123     x = numpy.asarray([1.0] + x)
00124     # Calculate the probability.  p = e^(beta X) / (1+e^(beta X))
00125     ebetaX = numpy.exp(numpy.dot(lr.beta, x))
00126     p = ebetaX / (1+ebetaX)
00127     return [1-p, p]

Here is the caller graph for this function:

def Bio.LogisticRegression.classify (   lr,
  x 
)
classify(lr, x) -> 1 or 0

Classify an observation into a class.

Definition at line 128 of file LogisticRegression.py.

00128 
00129 def classify(lr, x):
00130     """classify(lr, x) -> 1 or 0
00131 
00132     Classify an observation into a class.
00133 
00134     """
00135     probs = calculate(lr, x)
00136     if probs[0] > probs[1]:
00137         return 0
00138     return 1

Here is the call graph for this function:

def Bio.LogisticRegression.train (   xs,
  ys,
  update_fn = None,
  typecode = None 
)
train(xs, ys[, update_fn]) -> LogisticRegression

Train a logistic regression classifier on a training set.  xs is a
list of observations and ys is a list of the class assignments,
which should be 0 or 1.  xs and ys should contain the same number
of elements.  update_fn is an optional callback function that
takes as parameters that iteration number and log likelihood.

Definition at line 34 of file LogisticRegression.py.

00034 
00035 def train(xs, ys, update_fn=None, typecode=None):
00036     """train(xs, ys[, update_fn]) -> LogisticRegression
00037     
00038     Train a logistic regression classifier on a training set.  xs is a
00039     list of observations and ys is a list of the class assignments,
00040     which should be 0 or 1.  xs and ys should contain the same number
00041     of elements.  update_fn is an optional callback function that
00042     takes as parameters that iteration number and log likelihood.
00043     
00044     """
00045     if len(xs) != len(ys):
00046         raise ValueError("xs and ys should be the same length.")
00047     classes = set(ys)
00048     if classes != set([0, 1]):
00049         raise ValueError("Classes should be 0's and 1's")
00050     if typecode is None:
00051         typecode = 'd'
00052 
00053     # Dimensionality of the data is the dimensionality of the
00054     # observations plus a constant dimension.
00055     N, ndims = len(xs), len(xs[0]) + 1
00056     if N==0 or ndims==1:
00057         raise ValueError("No observations or observation of 0 dimension.")
00058 
00059     # Make an X array, with a constant first dimension.
00060     X = numpy.ones((N, ndims), typecode)
00061     X[:, 1:] = xs
00062     Xt = numpy.transpose(X)
00063     y = numpy.asarray(ys, typecode)
00064 
00065     # Initialize the beta parameter to 0.
00066     beta = numpy.zeros(ndims, typecode)
00067 
00068     MAX_ITERATIONS = 500
00069     CONVERGE_THRESHOLD = 0.01
00070     stepsize = 1.0
00071     # Now iterate using Newton-Raphson until the log-likelihoods
00072     # converge.
00073     i = 0
00074     old_beta = old_llik = None
00075     while i < MAX_ITERATIONS:
00076         # Calculate the probabilities.  p = e^(beta X) / (1+e^(beta X))
00077         ebetaX = numpy.exp(numpy.dot(beta, Xt))
00078         p = ebetaX / (1+ebetaX)
00079         
00080         # Find the log likelihood score and see if I've converged.
00081         logp = y*numpy.log(p) + (1-y)*numpy.log(1-p)
00082         llik = sum(logp)
00083         if update_fn is not None:
00084             update_fn(iter, llik)
00085         if old_llik is not None:
00086             # Check to see if the likelihood decreased.  If it did, then
00087             # restore the old beta parameters and half the step size.
00088             if llik < old_llik:
00089                 stepsize = stepsize / 2.0
00090                 beta = old_beta
00091             # If I've converged, then stop.
00092             if numpy.fabs(llik-old_llik) <= CONVERGE_THRESHOLD:
00093                 break
00094         old_llik, old_beta = llik, beta
00095         i += 1
00096 
00097         W = numpy.identity(N) * p
00098         Xtyp = numpy.dot(Xt, y-p)         # Calculate the first derivative.
00099         XtWX = numpy.dot(numpy.dot(Xt, W), X)   # Calculate the second derivative.
00100         #u, s, vt = singular_value_decomposition(XtWX)
00101         #print "U", u
00102         #print "S", s
00103         delta = numpy.linalg.solve(XtWX, Xtyp)
00104         if numpy.fabs(stepsize-1.0) > 0.001:
00105             delta = delta * stepsize
00106         beta = beta + delta                 # Update beta.
00107     else:
00108         raise RuntimeError("Didn't converge.")
00109 
00110     lr = LogisticRegression()
00111     lr.beta = map(float, beta)   # Convert back to regular array.
00112     return lr