Package Bio :: Package PopGen :: Package GenePop
[hide private]
[frames] | no frames]

Source Code for Package Bio.PopGen.GenePop

  1  # Copyright 2007 by Tiago Antao.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module provides code to work with GenePop. 
  8   
  9  See http://wbiomed.curtin.edu.au/genepop/ , the format is documented 
 10  here: http://wbiomed.curtin.edu.au/genepop/help_input.html . 
 11   
 12  Classes: 
 13  Record           Holds GenePop data. 
 14  RecordParser     Parses a GenePop record (file) into a Record object. 
 15   
 16  _Scanner         Scans a GenePop record. 
 17  _RecordConsumer  Consumes GenePop data to a Record object. 
 18   
 19  Partially inspired on MedLine Code. 
 20   
 21  """ 
 22  from copy import deepcopy 
 23  from types import * 
 24   
 25  from Bio import File 
 26  from Bio.ParserSupport import * 
 27   
 28   
29 -class Record:
30 """Holds information from a GenePop record. 31 32 Members: 33 marker_len The marker length (2 or 3 digit code per allele). 34 35 comment_line Comment line. 36 37 loci_list List of loci names. 38 39 populations List of population data. 40 41 populations has one element per population. Each element is itself 42 a list of individuals, each individual is a pair composed by individual 43 name and a list of alleles (2 per marker): Example 44 [ 45 [ 46 ('Ind1', [(1,2), (3,3), (200,201)], 47 ('Ind2', [(2,None), (3,3), (None,None)], 48 ], 49 [ 50 ('Other1', [(1,1), (4,3), (200,200)], 51 ] 52 ] 53 54 55 """
56 - def __init__(self):
57 self.marker_len = 0 58 self.comment_line = "" 59 self.loci_list = [] 60 self.populations = []
61
62 - def __str__(self):
63 rep = [self.comment_line + '\n'] 64 rep.append('\n'.join(self.loci_list) + '\n') 65 for pop in self.populations: 66 rep.append('Pop\n') 67 for indiv in pop: 68 name, markers = indiv 69 rep.append(name) 70 rep.append(',') 71 for marker in markers: 72 rep.append(' ') 73 for al in marker: 74 if al == None: 75 al = '0' 76 aStr = str(al) 77 while len(aStr)<self.marker_len: 78 aStr = "".join(['0', aStr]) 79 rep.append(aStr) 80 rep.append('\n') 81 return "".join(rep)
82
83 - def split_in_pops(self, pop_names):
84 """Splits a GP record in a dictionary with 1 pop per entry. 85 86 Given a record with n pops and m loci returns a dictionary 87 of records (key pop_name) where each item is a record 88 with a single pop and m loci. 89 90 Parameters: 91 pop_names - Population names 92 """ 93 gp_pops = {} 94 for i in range(len(self.populations)): 95 gp_pop = GenePop.Record() 96 gp_pop.marker_len = self.marker_len 97 gp_pop.comment_line = self.comment_line 98 gp_pop.loci_list = deepcopy(self.loci_list) 99 gp_pop.populations = [deepcopy(self.populations[i])] 100 gp_pops[pop_names[i]] = gp_pop 101 return gp_pops
102
103 - def split_in_loci(self, gp):
104 """Splits a GP record in a dictionary with 1 locus per entry. 105 106 Given a record with n pops and m loci returns a dictionary 107 of records (key locus name) where each item is a record 108 with a single locus and n pops. 109 """ 110 gp_loci = {} 111 for i in range(len(self.loci_list)): 112 gp_pop = GenePop.Record() 113 gp_pop.marker_len = self.marker_len 114 gp_pop.comment_line = self.comment_line 115 gp_pop.loci_list = [self.loci_list[i]] 116 gp_pop.populations = [] 117 for pop in self.populations: 118 my_pop = [] 119 for indiv in pop: 120 my_pop.append((indiv[0], [indiv[1][i]])) 121 gp_pop.populations.append(my_pop) 122 gp_loci[gp_pop.loci_list[0]] = gp_pop 123 return gp_loci
124 125
126 - def remove_population(self, pos):
127 """Removes a population (by position). 128 """ 129 del self.populations[pos]
130
131 - def remove_locus_by_position(self, pos):
132 """Removes a locus by position. 133 """ 134 del self.loci_list[pos] 135 for pop in self.populations: 136 for indiv in pop: 137 name, loci = indiv 138 del loci[pos]
139
140 - def remove_locus_by_name(self, name):
141 """Removes a locus by name. 142 """ 143 for i in range(len(self.loci_list)): 144 if self.loci_list[i] == name: 145 self.remove_locus_by_position(i) 146 return
147 #If here than locus not existent... Maybe raise exception? 148 # Although it should be Ok... Just a boolean return, maybe? 149 150
151 -class RecordParser(AbstractParser):
152 """Parses GenePop data into a Record object. 153 154 """
155 - def __init__(self):
156 self._scanner = _Scanner() 157 self._consumer = _RecordConsumer()
158
159 - def parse(self, handle):
160 self._scanner.feed(handle, self._consumer) 161 return self._consumer.data
162
163 -def parse(handle):
164 """Parses a handle containing a GenePop file. 165 """ 166 parser = RecordParser() 167 return parser.parse(handle)
168
169 -class _Scanner:
170 """Scans a GenePop record. 171 172 There is only one record per file. 173 174 """ 175
176 - def feed(self, handle, consumer):
177 """feed(self, handle, consumer) 178 179 Feed in a GenePop unit record for scanning. handle is a file-like 180 object that contains a Genepop record. consumer is a 181 Consumer object that will receive events as the report is scanned. 182 183 """ 184 if isinstance(handle, File.UndoHandle): 185 uhandle = handle 186 else: 187 uhandle = File.UndoHandle(handle) 188 189 190 consumer.start_record() 191 192 comment_line = uhandle.readline().rstrip() 193 consumer.comment(comment_line) 194 195 #We can now have one loci per line or all loci in a single line 196 #seperated by either space or comma+space... 197 #We will remove all commas on loci... that should not be a problem 198 sample_loci_line = uhandle.readline().rstrip().replace(',', '') 199 all_loci = sample_loci_line.split(' ') 200 if len(all_loci)>1: #This is all loci in one line 201 for locus in all_loci: 202 consumer.loci_name(locus) 203 else: 204 consumer.loci_name(sample_loci_line) 205 next_line = uhandle.readline().rstrip() 206 while next_line.upper()!='POP': 207 if next_line == '': 208 raise ValueError('No population data found, file probably not GenePop related') 209 consumer.loci_name(next_line) 210 next_line = uhandle.readline().rstrip() 211 consumer.start_pop() 212 first_individual = True 213 line = uhandle.readline().rstrip() 214 while line!='': 215 if line.upper()=='POP': 216 consumer.start_pop() 217 else: 218 (indiv_name, marker_line) = line.split(',') 219 markers = marker_line.replace('\t', ' ').split(' ') 220 for i in range(len(markers), 0, -1): 221 if markers[i-1] == '': 222 del(markers[i-1]) 223 if first_individual: 224 first_individual = False 225 if len(markers[0]) == 4: #2 digits per allele 226 marker_len = 2 227 else: 228 marker_len = 3 229 consumer.marker_len(marker_len) 230 allele_list = [] 231 for marker in markers: 232 allele_list.append(( 233 int(marker[0:marker_len]), 234 int(marker[marker_len:]) 235 )) 236 consumer.individual(indiv_name, allele_list) 237 line = uhandle.readline().rstrip() 238 consumer.end_record()
239
240 -class _RecordConsumer(AbstractConsumer):
241 """Consumer that converts a GenePop record to a Record object. 242 243 Members: 244 data Record with GenePop data. 245 246 """
247 - def __init__(self):
248 self.data = None
249
250 - def start_record(self):
251 self.data = Record()
252
253 - def end_record(self):
254 pops = self.data.populations 255 loci = self.data.loci_list 256 for pop_i in range(len(pops)): 257 for indiv_i in range(len(pops[pop_i])): 258 for mk_i in range(len(loci)): 259 mk_orig = pops[pop_i][indiv_i][1][mk_i] 260 mk_real = [] 261 for al in mk_orig: 262 if al == 0: 263 mk_real.append(None) 264 else: 265 mk_real.append(al) 266 pops[pop_i][indiv_i][1][mk_i] = tuple(mk_real)
267
268 - def comment(self, comment_line):
270
271 - def loci_name(self, locus):
272 self.data.loci_list.append(locus)
273
274 - def marker_len(self, marker_len):
276
277 - def start_pop(self):
278 self.current_pop = [] 279 self.data.populations.append(self.current_pop)
280
281 - def individual(self, indiv_name, allele_list):
282 self.current_pop.append((indiv_name, allele_list))
283