1
2
3
4
5
6 """
7 This module provides code to work with GenePop.
8
9 See http://wbiomed.curtin.edu.au/genepop/ , the format is documented
10 here: http://wbiomed.curtin.edu.au/genepop/help_input.html .
11
12 Classes:
13 Record Holds GenePop data.
14 RecordParser Parses a GenePop record (file) into a Record object.
15
16 _Scanner Scans a GenePop record.
17 _RecordConsumer Consumes GenePop data to a Record object.
18
19 Partially inspired on MedLine Code.
20
21 """
22 from copy import deepcopy
23 from types import *
24
25 from Bio import File
26 from Bio.ParserSupport import *
27
28
30 """Holds information from a GenePop record.
31
32 Members:
33 marker_len The marker length (2 or 3 digit code per allele).
34
35 comment_line Comment line.
36
37 loci_list List of loci names.
38
39 populations List of population data.
40
41 populations has one element per population. Each element is itself
42 a list of individuals, each individual is a pair composed by individual
43 name and a list of alleles (2 per marker): Example
44 [
45 [
46 ('Ind1', [(1,2), (3,3), (200,201)],
47 ('Ind2', [(2,None), (3,3), (None,None)],
48 ],
49 [
50 ('Other1', [(1,1), (4,3), (200,200)],
51 ]
52 ]
53
54
55 """
61
63 rep = [self.comment_line + '\n']
64 rep.append('\n'.join(self.loci_list) + '\n')
65 for pop in self.populations:
66 rep.append('Pop\n')
67 for indiv in pop:
68 name, markers = indiv
69 rep.append(name)
70 rep.append(',')
71 for marker in markers:
72 rep.append(' ')
73 for al in marker:
74 if al == None:
75 al = '0'
76 aStr = str(al)
77 while len(aStr)<self.marker_len:
78 aStr = "".join(['0', aStr])
79 rep.append(aStr)
80 rep.append('\n')
81 return "".join(rep)
82
84 """Splits a GP record in a dictionary with 1 pop per entry.
85
86 Given a record with n pops and m loci returns a dictionary
87 of records (key pop_name) where each item is a record
88 with a single pop and m loci.
89
90 Parameters:
91 pop_names - Population names
92 """
93 gp_pops = {}
94 for i in range(len(self.populations)):
95 gp_pop = GenePop.Record()
96 gp_pop.marker_len = self.marker_len
97 gp_pop.comment_line = self.comment_line
98 gp_pop.loci_list = deepcopy(self.loci_list)
99 gp_pop.populations = [deepcopy(self.populations[i])]
100 gp_pops[pop_names[i]] = gp_pop
101 return gp_pops
102
104 """Splits a GP record in a dictionary with 1 locus per entry.
105
106 Given a record with n pops and m loci returns a dictionary
107 of records (key locus name) where each item is a record
108 with a single locus and n pops.
109 """
110 gp_loci = {}
111 for i in range(len(self.loci_list)):
112 gp_pop = GenePop.Record()
113 gp_pop.marker_len = self.marker_len
114 gp_pop.comment_line = self.comment_line
115 gp_pop.loci_list = [self.loci_list[i]]
116 gp_pop.populations = []
117 for pop in self.populations:
118 my_pop = []
119 for indiv in pop:
120 my_pop.append((indiv[0], [indiv[1][i]]))
121 gp_pop.populations.append(my_pop)
122 gp_loci[gp_pop.loci_list[0]] = gp_pop
123 return gp_loci
124
125
127 """Removes a population (by position).
128 """
129 del self.populations[pos]
130
132 """Removes a locus by position.
133 """
134 del self.loci_list[pos]
135 for pop in self.populations:
136 for indiv in pop:
137 name, loci = indiv
138 del loci[pos]
139
147
148
149
150
152 """Parses GenePop data into a Record object.
153
154 """
158
159 - def parse(self, handle):
160 self._scanner.feed(handle, self._consumer)
161 return self._consumer.data
162
164 """Parses a handle containing a GenePop file.
165 """
166 parser = RecordParser()
167 return parser.parse(handle)
168
170 """Scans a GenePop record.
171
172 There is only one record per file.
173
174 """
175
176 - def feed(self, handle, consumer):
177 """feed(self, handle, consumer)
178
179 Feed in a GenePop unit record for scanning. handle is a file-like
180 object that contains a Genepop record. consumer is a
181 Consumer object that will receive events as the report is scanned.
182
183 """
184 if isinstance(handle, File.UndoHandle):
185 uhandle = handle
186 else:
187 uhandle = File.UndoHandle(handle)
188
189
190 consumer.start_record()
191
192 comment_line = uhandle.readline().rstrip()
193 consumer.comment(comment_line)
194
195
196
197
198 sample_loci_line = uhandle.readline().rstrip().replace(',', '')
199 all_loci = sample_loci_line.split(' ')
200 if len(all_loci)>1:
201 for locus in all_loci:
202 consumer.loci_name(locus)
203 else:
204 consumer.loci_name(sample_loci_line)
205 next_line = uhandle.readline().rstrip()
206 while next_line.upper()!='POP':
207 if next_line == '':
208 raise ValueError('No population data found, file probably not GenePop related')
209 consumer.loci_name(next_line)
210 next_line = uhandle.readline().rstrip()
211 consumer.start_pop()
212 first_individual = True
213 line = uhandle.readline().rstrip()
214 while line!='':
215 if line.upper()=='POP':
216 consumer.start_pop()
217 else:
218 (indiv_name, marker_line) = line.split(',')
219 markers = marker_line.replace('\t', ' ').split(' ')
220 for i in range(len(markers), 0, -1):
221 if markers[i-1] == '':
222 del(markers[i-1])
223 if first_individual:
224 first_individual = False
225 if len(markers[0]) == 4:
226 marker_len = 2
227 else:
228 marker_len = 3
229 consumer.marker_len(marker_len)
230 allele_list = []
231 for marker in markers:
232 allele_list.append((
233 int(marker[0:marker_len]),
234 int(marker[marker_len:])
235 ))
236 consumer.individual(indiv_name, allele_list)
237 line = uhandle.readline().rstrip()
238 consumer.end_record()
239
241 """Consumer that converts a GenePop record to a Record object.
242
243 Members:
244 data Record with GenePop data.
245
246 """
249
252
254 pops = self.data.populations
255 loci = self.data.loci_list
256 for pop_i in range(len(pops)):
257 for indiv_i in range(len(pops[pop_i])):
258 for mk_i in range(len(loci)):
259 mk_orig = pops[pop_i][indiv_i][1][mk_i]
260 mk_real = []
261 for al in mk_orig:
262 if al == 0:
263 mk_real.append(None)
264 else:
265 mk_real.append(al)
266 pops[pop_i][indiv_i][1][mk_i] = tuple(mk_real)
267
270
273
276
278 self.current_pop = []
279 self.data.populations.append(self.current_pop)
280
282 self.current_pop.append((indiv_name, allele_list))
283