Back to index

python-biopython  1.60
FreqTable.py
Go to the documentation of this file.
00001 from Bio import Alphabet
00002 COUNT = 1
00003 FREQ = 2
00004 ##################################################################
00005 # A class to handle frequency tables
00006 # Copyright Iddo Friedberg idoerg@cc.huji.ac.il
00007 # Biopython (http://biopython.org) license applies
00008 # Methods to read a letter frequency or a letter count file:
00009 # Example files for a DNA alphabet:
00010 #
00011 # A count file (whitespace seperated):
00012 #
00013 # A  50
00014 # C  37
00015 # G  23
00016 # T  58
00017 #
00018 # The same info as a frequency file:
00019 #
00020 # A 0.2976
00021 # C 0.2202
00022 # G 0.1369
00023 # T 0.3452
00024 # 
00025 # Functions:
00026 #   read_count(f): read a count file from stream f. Then convert to
00027 #   frequencies
00028 #   read_freq(f): read a frequency data file from stream f. Of course, we then
00029 #   don't have the counts, but it is usually the letter frquencies which are
00030 #   interesting.
00031 #
00032 # Methods:
00033 #   (all internal)
00034 # Attributes:
00035 #   alphabet: The IUPAC alphabet set (or any other) whose letters you are
00036 #   using. Common sets are: IUPAC.protein (20-letter protein),
00037 #   IUPAC.unambiguous_dna (4-letter DNA). See Bio/alphabet for more.
00038 #   data: frequency dictionary.
00039 #   count: count dictionary. Empty if no counts are provided.
00040 #
00041 # Example of use:
00042 #   >>> from SubsMat import FreqTable
00043 #   >>> ftab = FreqTable.FreqTable(my_frequency_dictionary,FreqTable.FREQ)
00044 #   >>> ftab = FreqTable.FreqTable(my_count_dictionary,FreqTable.COUNT)
00045 #   >>> ftab = FreqTable.read_count(open('myDNACountFile'))
00046 #
00047 #  
00048 ##################################################################
00049 class FreqTable(dict):
00050     
00051     def _freq_from_count(self):
00052         total = float(sum(self.count.values()))
00053         for i, v in self.count.iteritems():
00054             self[i] = v / total
00055 
00056     def _alphabet_from_input(self):
00057         s = ''
00058         for i in sorted(self):
00059             s += i
00060         return s
00061 
00062     def __init__(self,in_dict,dict_type,alphabet=None):
00063         self.alphabet = alphabet
00064         if dict_type == COUNT:
00065             self.count = in_dict
00066             self._freq_from_count()
00067         elif dict_type == FREQ:
00068             self.count = {}
00069             self.update(in_dict)
00070         else:
00071             raise ValueError("bad dict_type")
00072         if not alphabet:
00073             self.alphabet = Alphabet.Alphabet()
00074             self.alphabet.letters = self._alphabet_from_input()
00075 
00076 def read_count(f):
00077     count = {}
00078     for line in f:
00079         key, value = line.strip().split()
00080         count[key] = int(value)
00081     freq_table = FreqTable(count,COUNT)
00082     return freq_table
00083 
00084 def read_freq(f):
00085     freq_dict = {}
00086     for line in f:
00087         key, value = line.strip().split()
00088         freq_dict[key] = float(value) 
00089     return FreqTable(freq_dict,FREQ)
00090