Package Bio :: Package SubsMat :: Module FreqTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.SubsMat.FreqTable

 1  from Bio import Alphabet 
 2  COUNT = 1 
 3  FREQ = 2 
 4  ################################################################## 
 5  # A class to handle frequency tables 
 6  # Copyright Iddo Friedberg idoerg@cc.huji.ac.il 
 7  # Biopython (http://biopython.org) license applies 
 8  # Methods to read a letter frequency or a letter count file: 
 9  # Example files for a DNA alphabet: 
10  # 
11  # A count file (whitespace seperated): 
12  # 
13  # A  50 
14  # C  37 
15  # G  23 
16  # T  58 
17  # 
18  # The same info as a frequency file: 
19  # 
20  # A 0.2976 
21  # C 0.2202 
22  # G 0.1369 
23  # T 0.3452 
24  #  
25  # Functions: 
26  #   read_count(f): read a count file from stream f. Then convert to 
27  #   frequencies 
28  #   read_freq(f): read a frequency data file from stream f. Of course, we then 
29  #   don't have the counts, but it is usually the letter frquencies which are 
30  #   interesting. 
31  # 
32  # Methods: 
33  #   (all internal) 
34  # Attributes: 
35  #   alphabet: The IUPAC alphabet set (or any other) whose letters you are 
36  #   using. Common sets are: IUPAC.protein (20-letter protein), 
37  #   IUPAC.unambiguous_dna (4-letter DNA). See Bio/alphabet for more. 
38  #   data: frequency dictionary. 
39  #   count: count dictionary. Empty if no counts are provided. 
40  # 
41  # Example of use: 
42  #   >>> from SubsMat import FreqTable 
43  #   >>> ftab = FreqTable.FreqTable(my_frequency_dictionary,FreqTable.FREQ) 
44  #   >>> ftab = FreqTable.FreqTable(my_count_dictionary,FreqTable.COUNT) 
45  #   >>> ftab = FreqTable.read_count(open('myDNACountFile')) 
46  # 
47  #   
48  ################################################################## 
49 -class FreqTable(dict):
50
51 - def _freq_from_count(self):
52 sum = 0. 53 for i in self.count.values(): 54 sum = sum + i 55 for i in self.count.keys(): 56 self[i] = self.count[i] / sum
57
58 - def _alphabet_from_input(self):
59 s = '' 60 letters_list = self.keys() 61 letters_list.sort() 62 for i in letters_list: 63 s = s + i 64 return s
65
66 - def __init__(self,in_dict,dict_type,alphabet=None):
67 self.alphabet = alphabet 68 if dict_type == COUNT: 69 self.count = in_dict 70 self._freq_from_count() 71 elif dict_type == FREQ: 72 self.count = {} 73 self.update(in_dict) 74 else: 75 raise ValueError("bad dict_type") 76 if not alphabet: 77 self.alphabet = Alphabet.Alphabet() 78 self.alphabet.letters = self._alphabet_from_input()
79
80 -def read_count(f):
81 count = {} 82 for line in f: 83 key, value = line.strip().split() 84 count[key] = int(value) 85 freq_table = FreqTable(count,COUNT) 86 return freq_table
87
88 -def read_freq(f):
89 freq_dict = {} 90 for line in f: 91 key, value = line.strip().split() 92 freq_dict[key] = float(value) 93 return FreqTable(freq_dict,FREQ)
94