1 """Code to interact with and run various EMBOSS programs.
2
3 These classes follow the AbstractCommandline interfaces for running
4 programs.
5 """
6
7 from Bio import Application
8 from Bio.Application import _Option, _Argument
9
11 """Commandline object for the Primer3 interface from EMBOSS.
12 """
14 Application.AbstractCommandline.__init__(self)
15 self.program_name = cmd
16
17 self.parameters = \
18 [_Option(["-sequence"], ["input"], None, 1,
19 "Sequence to choose primers from"),
20 _Option(["-outfile"], ["output", "file"], None, 1,
21 "Output file name"),
22 _Option(["-task"], ["input"], None, 0),
23 _Option(["-numreturn"], ["input"], None, 0),
24 _Option(["-includedregion"], ["input"], None, 0),
25 _Option(["-target"], ["input"], None, 0),
26 _Option(["-excludedregion"], ["input"], None, 0),
27 _Option(["-forwardinput"], ["input"], None, 0),
28 _Option(["-reverseinput"], ["input"], None, 0),
29 _Option(["-gcclamp"], ["input"], None, 0),
30 _Option(["-osize"], ["input"], None, 0),
31 _Option(["-minsize"], ["input"], None, 0),
32 _Option(["-maxsize"], ["input"], None, 0),
33 _Option(["-otm"], ["input"], None, 0),
34 _Option(["-mintm"], ["input"], None, 0),
35 _Option(["-maxtm"], ["input"], None, 0),
36 _Option(["-maxdifftm"], ["input"], None, 0),
37 _Option(["-ogcpercent"], ["input"], None, 0),
38 _Option(["-mingc"], ["input"], None, 0),
39 _Option(["-maxgc"], ["input"], None, 0),
40 _Option(["-saltconc"], ["input"], None, 0),
41 _Option(["-dnaconc"], ["input"], None, 0),
42 _Option(["-maxployx"], ["input"], None, 0),
43 _Option(["-productosize"], ["input"], None, 0),
44 _Option(["-productsizerange"], ["input"], None, 0),
45 _Option(["-productotm"], ["input"], None, 0),
46 _Option(["-productmintm"], ["input"], None, 0),
47 _Option(["-productmaxtm"], ["input"], None, 0),
48 _Option(["-oligoexcluderegion"], ["input"], None, 0),
49 _Option(["-oligoinput"], ["input"], None, 0),
50 _Option(["-oligosize"], ["input"], None, 0),
51 _Option(["-oligominsize"], ["input"], None, 0),
52 _Option(["-oligomaxsize"], ["input"], None, 0),
53 _Option(["-oligotm"], ["input"], None, 0),
54 _Option(["-oligomintm"], ["input"], None, 0),
55 _Option(["-oligomaxtm"], ["input"], None, 0),
56 _Option(["-oligoogcpercent"], ["input"], None, 0),
57 _Option(["-oligomingc"], ["input"], None, 0),
58 _Option(["-oligomaxgc"], ["input"], None, 0),
59 _Option(["-oligosaltconc"], ["input"], None, 0),
60 _Option(["-oligodnaconc"], ["input"], None, 0),
61 _Option(["-oligoselfany"], ["input"], None, 0),
62 _Option(["-oligoselfend"], ["input"], None, 0),
63 _Option(["-oligomaxpolyx"], ["input"], None, 0)]
64
66 """Commandline object for the primersearch program from EMBOSS.
67 """
68 - def __init__(self, cmd = "primersearch"):
69 Application.AbstractCommandline.__init__(self)
70 self.program_name = cmd
71
72 self.parameters = \
73 [_Option(["-sequences"], ["input"], None, 1,
74 "Sequence to look for the primer pairs in."),
75 _Option(["-primers"], ["input", "file"], None, 1,
76 "File containing the primer pairs to search for."),
77 _Option(["-out"], ["output", "file"], None, 1,
78 "Name of the output file."),
79 _Option(["-mismatchpercent"], ["input"], None, 1,
80 "Allowed percentage mismatch.")]
81
83 """Commandline object for the eprotdist program from EMBOSS.
84
85 This is an EMBOSS wrapper around protdist from PHYLIP.
86 """
88 Application.AbstractCommandline.__init__(self)
89 self.program_name = cmd
90
91 self.parameters = \
92 [_Option(["-msf"], ["input"], None, 1,
93 "File containing sequences"),
94 _Option(["-outfile"], ["output"], None, 1,
95 "Output file name"),
96 _Option(["-method"], ["input"], None, 1,
97 "Choose the method to use"),
98 _Option(["-categ"], ["input"], None, 0,
99 "Choose the categorie to use"),
100 _Option(["-gencode"], ["input"], None, 0,
101 "Which genetic code"),
102 _Option(["-prob"], ["input"], None, 0,
103 "Prob change category (1.0=easy)"),
104 _Option(["-tranrate"], ["input"], None, 0,
105 "Transition/transversion ratio"),
106 _Option(["-freqa"], ["input"], None, 0,
107 "Frequency for A"),
108 _Option(["-freqc"], ["input"], None, 0,
109 "Frequency for C"),
110 _Option(["-freqg"], ["input"], None, 0,
111 "Frequency for G"),
112 _Option(["-freqt"], ["input"], None, 0,
113 "Frequency for T"),
114 _Option(["-printdata"], ["input"], None, 0,
115 "Print out the data at start of run"),
116 _Option(["-progress"], ["input"], None, 0,
117 "Print indications of progress of run"),
118 _Option(["-basefrequency"], ["input"], None, 0,
119 "Use empirical base frequencies")]
120
122 """Commandline object for the eneighbor program from EMBOSS.
123
124 This is an EMBOSS wrapper around neighbor from PHYLIP.
125 """
127 Application.AbstractCommandline.__init__(self)
128 self.program_name = cmd
129
130 self.parameters = \
131 [_Option(["-infile"], ["input"], None, 1,
132 "infile value"),
133 _Option(["-outfile"], ["output"], None, 1,
134 "Output file name"),
135 _Option(["-trout"], ["input"], None, 1,
136 "Create a tree file"),
137 _Option(["-treefile"], ["input"], None, 1,
138 "Tree file name"),
139 _Option(["-nj"], ["input"], None, 1,
140 "Neighbor-joining"),
141 _Option(["-noog"], ["input"], None, 1,
142 "Outgroup root"),
143 _Option(["-outgnum"], ["input"], None, 0,
144 "number of the outgroup"),
145 _Option(["-randseed"], ["input"], None, 0,
146 "Random number seed (must be odd)"),
147 _Option(["-datasets"], ["input"], None, 0,
148 "How many data sets"),
149 _Option(["-drawtree"], ["input"], None, 0,
150 "Draw tree"),
151 _Option(["-lt"], ["input"], None, 0,
152 "Lower-triangular data matrix"),
153 _Option(["-ut"], ["input"], None, 0,
154 "Upper-triangular data matrix"),
155 _Option(["-sr"], ["input"], None, 0,
156 "Subreplicates"),
157 _Option(["-random"], ["input"], None, 0,
158 "Randomize input order of species"),
159 _Option(["-multsets"], ["input"], None, 0,
160 "Analyze multiple data sets"),
161 _Option(["-printdata"], ["input"], None, 0,
162 "Print out the data at start of run"),
163 _Option(["-progress"], ["input"], None, 0,
164 "Print indications of progress of run")]
165
167 """Commandline object for the eprotpars program from EMBOSS.
168
169 This is an EMBOSS wrapper around protpars from PHYLIP.
170 """
172 Application.AbstractCommandline.__init__(self)
173 self.program_name = cmd
174
175 self.parameters = \
176 [_Option(["-msf"], ["input", "file"], None, 1,
177 "Sequences file to be read in"),
178 _Option(["-outfile"], ["output", "file"], None, 1,
179 "Output file"),
180 _Option(["-besttree"], ["input"], None, 0,
181 "Search for the best tree"),
182 _Option(["-random"], ["input"], None, 0,
183 "Randomize input order of species"),
184 _Option(["-norandom"], ["input"], None, 0,
185 "Do not randomize input order of species"),
186 _Option(["-randseed"], ["input"], None, 0,
187 "Random number seed (must be odd)"),
188 _Option(["-randtimes"], ["input"], None, 0,
189 "How many times to randomize"),
190 _Option(["-og"], ["input"], None, 0,
191 "Use an outgroup root"),
192 _Option(["-noog"], ["input"], None, 0,
193 "Do not use an outgroup root"),
194 _Option(["-outgnum"], ["input"], None, 0,
195 "Number of the outgroup"),
196 _Option(["-thresh"], ["input"], None, 0,
197 "Use Threshold parsimony"),
198 _Option(["-valthresh"], ["input"], None, 0,
199 "threshold value"),
200 _Option(["-printdata"], ["input"], None, 0,
201 "Print out the data at start of run"),
202 _Option(["-progress"], ["input"], None, 0,
203 "Print indications of progress of run"),
204 _Option(["-steps"], ["input"], None, 0,
205 "Print out steps in each site"),
206 _Option(["-seqatnodes"], ["input"], None, 0,
207 "Print sequences at all nodes of tree"),
208 _Option(["-drawtree"], ["input"], None, 0,
209 "Draw tree"),
210 _Option(["-trout"], ["input"], None, 0,
211 "Create a tree file"),
212 _Option(["-notrout"], ["input"], None, 0,
213 "Do not create a tree file"),
214 _Option(["-treefile"], ["output", "file"], None, 0,
215 "Output treefile name")]
216
218 """Commandline object for the econsense program from EMBOSS.
219
220 This is an EMBOSS wrapper around consense from PHYLIP.
221 """
223 Application.AbstractCommandline.__init__(self)
224 self.program_name = cmd
225
226 self.parameters = \
227 [_Option(["-infile"], ["input", "file"], None, 1,
228 "file to read in (New Hampshire standard form)"),
229 _Option(["-outfile"], ["output", "file"], None, 1,
230 "Output file name"),
231 _Option(["-notrout"], ["input"], None, 0,
232 "Do not create a tree file"),
233 _Option(["-trout"], ["input"], None, 0,
234 "Create a tree file"),
235 _Option(["-treefile"], ["output", "file"], None, 0,
236 "tree file name"),
237 _Option(["-noog"], ["input"], None, 0,
238 "Do not use an outgroup"),
239 _Option(["-og"], ["input"], None, 0,
240 "Use an outgroup"),
241 _Option(["-outgnum"], ["input"], None, 0,
242 "number of the outgroup"),
243 _Option(["-nodrawtree"], ["input"], None, 0,
244 "Do not draw a tree"),
245 _Option(["-drawtree"], ["input"], None, 0,
246 "Draw tree"),
247 _Option(["-root"], ["input"], None, 0,
248 "Trees to be treated as Rooted"),
249 _Option(["-progress"], ["input"], None, 0,
250 "Print indications of the progress of run"),
251 _Option(["-noprintsets"], ["input"], None, 0,
252 "Do not print out the sets of species"),
253 _Option(["-printsets"], ["input"], None, 0,
254 "Print out the sets of species")]
255
257 """Commandline object for the eseqboot program from EMBOSS.
258
259 This is an EMBOSS wrapper around seqboot from PHYLIP.
260 """
262 Application.AbstractCommandline.__init__(self)
263 self.program_name = cmd
264
265 self.parameters = \
266 [_Option(["-datafile"], ["input", "file"], None, 1,
267 "Input file"),
268 _Option(["-outfile"], ["output", "file"], None, 1,
269 "Output file name"),
270 _Option(["-randseed"], ["input"], None, 1,
271 "Random number seed (must be odd)"),
272 _Option(["-method"], ["input"], None, 1,
273 "Choose the method"),
274 _Option(["-test"], ["input"], None, 1,
275 "Choose test"),
276 _Option(["-reps"], ["input"], None, 1,
277 "How many replicates"),
278 _Option(["-inter"], ["input"], None, 0,
279 "Interleaved input"),
280 _Option(["-enzymes"], ["input"], None, 0,
281 "Present in input file"),
282 _Option(["-all"], ["input"], None, 0,
283 "All alleles present at each locus"),
284 _Option(["-printdata"], ["input"], None, 0,
285 "Print out the data at start of run"),
286 _Option(["-progress"], ["input"], None, 0,
287 "Print indications of progress of run")]
288
290 """Commandline object for the water program from EMBOSS.
291 """
293 Application.AbstractCommandline.__init__(self)
294 self.program_name = cmd
295
296 self.parameters = \
297 [_Option(["-asequence"], ["input", "file"], None, 1,
298 "First sequence to align"),
299 _Option(["-bsequence"], ["input", "file"], None, 1,
300 "Second sequence to align"),
301 _Option(["-gapopen"], ["input"], None, 1,
302 "Gap open penalty"),
303 _Option(["-gapextend"], ["input"], None, 1,
304 "Gap extension penalty"),
305 _Option(["-outfile"], ["output", "file"], None, 1,
306 "Output file for the alignment"),
307 _Option(["-datafile"], ["input", "file"], None, 0,
308 "Matrix file"),
309 _Option(["-similarity"], ["input"], None, 0,
310 "Display percent identity and similarity"),
311 _Option(["-nosimilarity"], ["input"], None, 0,
312 "Do not display percent identity and similarity"),
313 _Option(["-aformat"], ["input"], None, 0,
314 "Display output in a different specified output format")]
315
317 """Commandline object for the fuzznuc program from EMBOSS.
318 """
320 Application.AbstractCommandline.__init__(self)
321 self.program_name = cmd
322
323 self.parameters = [
324 _Option(["-sequence"], ["input"], None, 1,
325 "Sequence database USA"),
326 _Option(["-pattern"], ["input"], None, 1,
327 "Search pattern, using standard IUPAC one-letter codes"),
328 _Option(["-mismatch"], ["input"], None, 1,
329 "Number of mismatches"),
330 _Option(["-outfile"], ["output", "file"], None, 1,
331 "Output report file name"),
332 _Option(["-complement"], ["input"], None, 0,
333 "Search complementary strand"),
334 _Option(["-rformat"], ["input"], None, 0,
335 "Specify the report format to output in.")]
336
338 """Commandline object for the est2genome program from EMBOSS.
339 """
340 - def __init__(self, cmd = "est2genome"):
341 Application.AbstractCommandline.__init__(self)
342 self.program_name = cmd
343
344 self.parameters = [
345 _Option(["-est"], ["input"], None, 1,
346 "EST sequence(s)"),
347 _Option(["-genome"], ["input"], None, 1,
348 "Genomic sequence"),
349 _Option(["-outfile"], ["output", "file"], None, 1,
350 "Output file name"),
351 _Option(["-match"], ["input"], None, 0,
352 "Score for matching two bases"),
353 _Option(["-mismatch"], ["input"], None, 0,
354 "Cost for mismatching two bases"),
355 _Option(["-gappenalty"], ["input"], None, 0,
356 "Cost for deleting a single base in either sequence, " + \
357 "excluding introns"),
358 _Option(["-intronpenalty"], ["input"], None, 0,
359 "Cost for an intron, independent of length."),
360 _Option(["-splicepenalty"], ["input"], None, 0,
361 "Cost for an intron, independent of length " + \
362 "and starting/ending on donor-acceptor sites"),
363 _Option(["-minscore"], ["input"], None, 0,
364 "Exclude alignments with scores below this threshold score."),
365 _Option(["-reverse"], ["input"], None, 0,
366 "Reverse the orientation of the EST sequence"),
367 _Option(["-splice"], ["input"], None, 0,
368 "Use donor and acceptor splice sites."),
369 _Option(["-mode"], ["input"], None, 0,
370 "This determines the comparion mode. 'both', 'forward' " + \
371 "'reverse'"),
372 _Option(["-best"], ["input"], None, 0,
373 "You can print out all comparisons instead of just the best"),
374 _Option(["-space"], ["input"], None, 0,
375 "for linear-space recursion."),
376 _Option(["-shuffle"], ["input"], None, 0,
377 "Shuffle"),
378 _Option(["-seed"], ["input"], None, 0,
379 "Random number seed"),
380 _Option(["-align"], ["input"], None, 0,
381 "Show the alignment."),
382 _Option(["-width"], ["input"], None, 0,
383 "Alignment width")
384 ]
385
387 """Commandline object for the etandem program from EMBOSS.
388 """
390 Application.AbstractCommandline.__init__(self)
391 self.program_name = cmd
392
393 self.parameters = [
394 _Option(["-sequence"], ["input", "file"], None, 1,
395 "Sequence"),
396 _Option(["-minrepeat"], ["input"], None, 1,
397 "Minimum repeat size"),
398 _Option(["-maxrepeat"], ["input"], None, 1,
399 "Maximum repeat size"),
400 _Option(["-outfile"], ["output", "file"] , None, 1,
401 "Output report file name"),
402 _Option(["-threshold"], ["input"], None, 0,
403 "Threshold score"),
404 _Option(["-mismatch"], ["input"], None, 0,
405 "Allow N as a mismatch"),
406 _Option(["-uniform"], ["input"], None, 0,
407 "Allow uniform consensus"),
408 _Option(["-rformat"], ["output"], None, 0,
409 "Output report format")]
410
412 """Commandline object for the einverted program from EMBOSS.
413 """
415 Application.AbstractCommandline.__init__(self)
416 self.program_name = cmd
417
418 self.parameters = [
419 _Option(["-sequence"], ["input", "file"], None, 1,
420 "Sequence"),
421 _Option(["-gap"], ["input", "file"], None, 1,
422 "Gap penalty"),
423 _Option(["-threshold"], ["input"], None, 1,
424 "Minimum score threshold"),
425 _Option(["-match"], ["input"], None, 1,
426 "Match score"),
427 _Option(["-mismatch"], ["input"], None, 1,
428 "Mismatch score"),
429 _Option(["-outfile"], ["output", "file"] , None, 1,
430 "Output report file name"),
431 _Option(["-maxrepeat"], ["input"], None, 0,
432 "Maximum separation between the start and end of repeat"),
433 ]
434
436 """Commandline object for the palindrome program from EMBOSS.
437 """
438 - def __init__(self, cmd = "palindrome"):
439 Application.AbstractCommandline.__init__(self)
440 self.program_name = cmd
441
442 self.parameters = [
443 _Option(["-sequence"], ["input", "file"], None, 1,
444 "Sequence"),
445 _Option(["-minpallen"], ["input"], None, 1,
446 "Minimum palindrome length"),
447 _Option(["-maxpallen"], ["input"], None, 1,
448 "Maximum palindrome length"),
449 _Option(["-gaplimit"], ["input"], None, 1,
450 "Maximum gap between repeats"),
451 _Option(["-nummismatches"], ["input"], None, 1,
452 "Number of mismatches allowed"),
453 _Option(["-overlap"], ["input"], None, 1,
454 "Report overlapping matches"),
455 _Option(["-outfile"], ["output", "file"] , None, 1,
456 "Output report file name"),
457 ]
458
460 """Commandline object for the tranalign program from EMBOSS.
461 """
463 Application.AbstractCommandline.__init__(self)
464 self.program_name = cmd
465
466 self.parameters = [
467 _Option(["-asequence"], ["input", "file"], None, 1,
468 "Nucleotide sequences to be aligned."),
469 _Option(["-bsequence"], ["input", "file"], None, 1,
470 "Protein sequence alignment"),
471 _Option(["-outseq"], ["output", "file"], None, 1,
472 "Output sequence file."),
473 _Option(["-table"], ["input"], None, 0,
474 "Code to use")]
475