1 """Code to interact with and run various EMBOSS programs.
2
3 These classes follow the AbstractCommandline interfaces for running
4 programs.
5 """
6
7 from Bio import Application
8 from Bio.Application import _Option
9
11 """Commandline object for the Primer3 interface from EMBOSS.
12 """
14 Application.AbstractCommandline.__init__(self)
15 self.program_name = cmd
16
17 self.parameters = \
18 [_Option(["-sequence"], ["input"], None, 1,
19 "Sequence to choose primers from"),
20 _Option(["-outfile"], ["output", "file"], None, 1,
21 "Output file name"),
22 _Option(["-task"], ["input"], None, 0),
23 _Option(["-numreturn"], ["input"], None, 0),
24 _Option(["-includedregion"], ["input"], None, 0),
25 _Option(["-target"], ["input"], None, 0),
26 _Option(["-excludedregion"], ["input"], None, 0),
27 _Option(["-forwardinput"], ["input"], None, 0),
28 _Option(["-reverseinput"], ["input"], None, 0),
29 _Option(["-gcclamp"], ["input"], None, 0),
30 _Option(["-osize"], ["input"], None, 0),
31 _Option(["-minsize"], ["input"], None, 0),
32 _Option(["-maxsize"], ["input"], None, 0),
33 _Option(["-otm"], ["input"], None, 0),
34 _Option(["-mintm"], ["input"], None, 0),
35 _Option(["-maxtm"], ["input"], None, 0),
36 _Option(["-maxdifftm"], ["input"], None, 0),
37 _Option(["-ogcpercent"], ["input"], None, 0),
38 _Option(["-mingc"], ["input"], None, 0),
39 _Option(["-maxgc"], ["input"], None, 0),
40 _Option(["-saltconc"], ["input"], None, 0),
41 _Option(["-dnaconc"], ["input"], None, 0),
42 _Option(["-maxployx"], ["input"], None, 0),
43 _Option(["-productosize"], ["input"], None, 0),
44 _Option(["-productsizerange"], ["input"], None, 0),
45 _Option(["-productotm"], ["input"], None, 0),
46 _Option(["-productmintm"], ["input"], None, 0),
47 _Option(["-productmaxtm"], ["input"], None, 0),
48 _Option(["-oligoexcluderegion"], ["input"], None, 0),
49 _Option(["-oligoinput"], ["input"], None, 0),
50 _Option(["-oligosize"], ["input"], None, 0),
51 _Option(["-oligominsize"], ["input"], None, 0),
52 _Option(["-oligomaxsize"], ["input"], None, 0),
53 _Option(["-oligotm"], ["input"], None, 0),
54 _Option(["-oligomintm"], ["input"], None, 0),
55 _Option(["-oligomaxtm"], ["input"], None, 0),
56 _Option(["-oligoogcpercent"], ["input"], None, 0),
57 _Option(["-oligomingc"], ["input"], None, 0),
58 _Option(["-oligomaxgc"], ["input"], None, 0),
59 _Option(["-oligosaltconc"], ["input"], None, 0),
60 _Option(["-oligodnaconc"], ["input"], None, 0),
61 _Option(["-oligoselfany"], ["input"], None, 0),
62 _Option(["-oligoselfend"], ["input"], None, 0),
63 _Option(["-oligomaxpolyx"], ["input"], None, 0),
64 _Option(["-mispriminglibraryfile"], ["input"], None, 0),
65 _Option(["-maxmispriming"], ["input"], None, 0),
66 _Option(["-oligomishyblibraryfile"], ["input"], None, 0),
67 _Option(["-oligomaxmishyb"], ["input"], None, 0),
68 _Option(["-explainflag"], ["input"], None, 0),
69 ]
70
72 """Commandline object for the primersearch program from EMBOSS.
73 """
74 - def __init__(self, cmd = "primersearch"):
75 Application.AbstractCommandline.__init__(self)
76 self.program_name = cmd
77
78 self.parameters = \
79 [_Option(["-sequences"], ["input"], None, 1,
80 "Sequence to look for the primer pairs in."),
81 _Option(["-primers"], ["input", "file"], None, 1,
82 "File containing the primer pairs to search for."),
83 _Option(["-out"], ["output", "file"], None, 1,
84 "Name of the output file."),
85 _Option(["-mismatchpercent"], ["input"], None, 1,
86 "Allowed percentage mismatch.")]
87
89 """Commandline object for the eprotdist program from EMBOSS.
90
91 This is an EMBOSS wrapper around protdist from PHYLIP.
92 """
94 Application.AbstractCommandline.__init__(self)
95 self.program_name = cmd
96
97 self.parameters = \
98 [_Option(["-msf"], ["input"], None, 1,
99 "File containing sequences"),
100 _Option(["-outfile"], ["output"], None, 1,
101 "Output file name"),
102 _Option(["-method"], ["input"], None, 1,
103 "Choose the method to use"),
104 _Option(["-categ"], ["input"], None, 0,
105 "Choose the categorie to use"),
106 _Option(["-gencode"], ["input"], None, 0,
107 "Which genetic code"),
108 _Option(["-prob"], ["input"], None, 0,
109 "Prob change category (1.0=easy)"),
110 _Option(["-tranrate"], ["input"], None, 0,
111 "Transition/transversion ratio"),
112 _Option(["-freqa"], ["input"], None, 0,
113 "Frequency for A"),
114 _Option(["-freqc"], ["input"], None, 0,
115 "Frequency for C"),
116 _Option(["-freqg"], ["input"], None, 0,
117 "Frequency for G"),
118 _Option(["-freqt"], ["input"], None, 0,
119 "Frequency for T"),
120 _Option(["-printdata"], ["input"], None, 0,
121 "Print out the data at start of run"),
122 _Option(["-progress"], ["input"], None, 0,
123 "Print indications of progress of run"),
124 _Option(["-basefrequency"], ["input"], None, 0,
125 "Use empirical base frequencies")]
126
128 """Commandline object for the eneighbor program from EMBOSS.
129
130 This is an EMBOSS wrapper around neighbor from PHYLIP.
131 """
133 Application.AbstractCommandline.__init__(self)
134 self.program_name = cmd
135
136 self.parameters = \
137 [_Option(["-infile"], ["input"], None, 1,
138 "infile value"),
139 _Option(["-outfile"], ["output"], None, 1,
140 "Output file name"),
141 _Option(["-trout"], ["input"], None, 1,
142 "Create a tree file"),
143 _Option(["-treefile"], ["input"], None, 1,
144 "Tree file name"),
145 _Option(["-nj"], ["input"], None, 1,
146 "Neighbor-joining"),
147 _Option(["-noog"], ["input"], None, 1,
148 "Outgroup root"),
149 _Option(["-outgnum"], ["input"], None, 0,
150 "number of the outgroup"),
151 _Option(["-randseed"], ["input"], None, 0,
152 "Random number seed (must be odd)"),
153 _Option(["-datasets"], ["input"], None, 0,
154 "How many data sets"),
155 _Option(["-drawtree"], ["input"], None, 0,
156 "Draw tree"),
157 _Option(["-lt"], ["input"], None, 0,
158 "Lower-triangular data matrix"),
159 _Option(["-ut"], ["input"], None, 0,
160 "Upper-triangular data matrix"),
161 _Option(["-sr"], ["input"], None, 0,
162 "Subreplicates"),
163 _Option(["-random"], ["input"], None, 0,
164 "Randomize input order of species"),
165 _Option(["-multsets"], ["input"], None, 0,
166 "Analyze multiple data sets"),
167 _Option(["-printdata"], ["input"], None, 0,
168 "Print out the data at start of run"),
169 _Option(["-progress"], ["input"], None, 0,
170 "Print indications of progress of run")]
171
173 """Commandline object for the eprotpars program from EMBOSS.
174
175 This is an EMBOSS wrapper around protpars from PHYLIP.
176 """
178 Application.AbstractCommandline.__init__(self)
179 self.program_name = cmd
180
181 self.parameters = \
182 [_Option(["-msf"], ["input", "file"], None, 1,
183 "Sequences file to be read in"),
184 _Option(["-outfile"], ["output", "file"], None, 1,
185 "Output file"),
186 _Option(["-besttree"], ["input"], None, 0,
187 "Search for the best tree"),
188 _Option(["-random"], ["input"], None, 0,
189 "Randomize input order of species"),
190 _Option(["-norandom"], ["input"], None, 0,
191 "Do not randomize input order of species"),
192 _Option(["-randseed"], ["input"], None, 0,
193 "Random number seed (must be odd)"),
194 _Option(["-randtimes"], ["input"], None, 0,
195 "How many times to randomize"),
196 _Option(["-og"], ["input"], None, 0,
197 "Use an outgroup root"),
198 _Option(["-noog"], ["input"], None, 0,
199 "Do not use an outgroup root"),
200 _Option(["-outgnum"], ["input"], None, 0,
201 "Number of the outgroup"),
202 _Option(["-thresh"], ["input"], None, 0,
203 "Use Threshold parsimony"),
204 _Option(["-valthresh"], ["input"], None, 0,
205 "threshold value"),
206 _Option(["-printdata"], ["input"], None, 0,
207 "Print out the data at start of run"),
208 _Option(["-progress"], ["input"], None, 0,
209 "Print indications of progress of run"),
210 _Option(["-steps"], ["input"], None, 0,
211 "Print out steps in each site"),
212 _Option(["-seqatnodes"], ["input"], None, 0,
213 "Print sequences at all nodes of tree"),
214 _Option(["-drawtree"], ["input"], None, 0,
215 "Draw tree"),
216 _Option(["-trout"], ["input"], None, 0,
217 "Create a tree file"),
218 _Option(["-notrout"], ["input"], None, 0,
219 "Do not create a tree file"),
220 _Option(["-treefile"], ["output", "file"], None, 0,
221 "Output treefile name")]
222
224 """Commandline object for the econsense program from EMBOSS.
225
226 This is an EMBOSS wrapper around consense from PHYLIP.
227 """
229 Application.AbstractCommandline.__init__(self)
230 self.program_name = cmd
231
232 self.parameters = \
233 [_Option(["-infile"], ["input", "file"], None, 1,
234 "file to read in (New Hampshire standard form)"),
235 _Option(["-outfile"], ["output", "file"], None, 1,
236 "Output file name"),
237 _Option(["-notrout"], ["input"], None, 0,
238 "Do not create a tree file"),
239 _Option(["-trout"], ["input"], None, 0,
240 "Create a tree file"),
241 _Option(["-treefile"], ["output", "file"], None, 0,
242 "tree file name"),
243 _Option(["-noog"], ["input"], None, 0,
244 "Do not use an outgroup"),
245 _Option(["-og"], ["input"], None, 0,
246 "Use an outgroup"),
247 _Option(["-outgnum"], ["input"], None, 0,
248 "number of the outgroup"),
249 _Option(["-nodrawtree"], ["input"], None, 0,
250 "Do not draw a tree"),
251 _Option(["-drawtree"], ["input"], None, 0,
252 "Draw tree"),
253 _Option(["-root"], ["input"], None, 0,
254 "Trees to be treated as Rooted"),
255 _Option(["-progress"], ["input"], None, 0,
256 "Print indications of the progress of run"),
257 _Option(["-noprintsets"], ["input"], None, 0,
258 "Do not print out the sets of species"),
259 _Option(["-printsets"], ["input"], None, 0,
260 "Print out the sets of species")]
261
263 """Commandline object for the eseqboot program from EMBOSS.
264
265 This is an EMBOSS wrapper around seqboot from PHYLIP.
266 """
268 Application.AbstractCommandline.__init__(self)
269 self.program_name = cmd
270
271 self.parameters = \
272 [_Option(["-datafile"], ["input", "file"], None, 1,
273 "Input file"),
274 _Option(["-outfile"], ["output", "file"], None, 1,
275 "Output file name"),
276 _Option(["-randseed"], ["input"], None, 1,
277 "Random number seed (must be odd)"),
278 _Option(["-method"], ["input"], None, 1,
279 "Choose the method"),
280 _Option(["-test"], ["input"], None, 1,
281 "Choose test"),
282 _Option(["-reps"], ["input"], None, 1,
283 "How many replicates"),
284 _Option(["-inter"], ["input"], None, 0,
285 "Interleaved input"),
286 _Option(["-enzymes"], ["input"], None, 0,
287 "Present in input file"),
288 _Option(["-all"], ["input"], None, 0,
289 "All alleles present at each locus"),
290 _Option(["-printdata"], ["input"], None, 0,
291 "Print out the data at start of run"),
292 _Option(["-progress"], ["input"], None, 0,
293 "Print indications of progress of run")]
294
296 """Commandline object for the water program from EMBOSS.
297 """
299 Application.AbstractCommandline.__init__(self)
300 self.program_name = cmd
301
302 self.parameters = \
303 [_Option(["-asequence"], ["input", "file"], None, 1,
304 "First sequence to align"),
305 _Option(["-bsequence"], ["input", "file"], None, 1,
306 "Second sequence to align"),
307 _Option(["-gapopen"], ["input"], None, 1,
308 "Gap open penalty"),
309 _Option(["-gapextend"], ["input"], None, 1,
310 "Gap extension penalty"),
311 _Option(["-outfile"], ["output", "file"], None, 1,
312 "Output file for the alignment"),
313 _Option(["-datafile"], ["input", "file"], None, 0,
314 "Matrix file"),
315 _Option(["-similarity"], ["input"], None, 0,
316 "Display percent identity and similarity"),
317 _Option(["-nosimilarity"], ["input"], None, 0,
318 "Do not display percent identity and similarity"),
319 _Option(["-aformat"], ["input"], None, 0,
320 "Display output in a different specified output format")]
321
323 """Commandline object for the fuzznuc program from EMBOSS.
324 """
326 Application.AbstractCommandline.__init__(self)
327 self.program_name = cmd
328
329 self.parameters = [
330 _Option(["-sequence"], ["input"], None, 1,
331 "Sequence database USA"),
332 _Option(["-pattern"], ["input"], None, 1,
333 "Search pattern, using standard IUPAC one-letter codes"),
334 _Option(["-mismatch"], ["input"], None, 1,
335 "Number of mismatches"),
336 _Option(["-outfile"], ["output", "file"], None, 1,
337 "Output report file name"),
338 _Option(["-complement"], ["input"], None, 0,
339 "Search complementary strand"),
340 _Option(["-rformat"], ["input"], None, 0,
341 "Specify the report format to output in.")]
342
344 """Commandline object for the est2genome program from EMBOSS.
345 """
346 - def __init__(self, cmd = "est2genome"):
347 Application.AbstractCommandline.__init__(self)
348 self.program_name = cmd
349
350 self.parameters = [
351 _Option(["-est"], ["input"], None, 1,
352 "EST sequence(s)"),
353 _Option(["-genome"], ["input"], None, 1,
354 "Genomic sequence"),
355 _Option(["-outfile"], ["output", "file"], None, 1,
356 "Output file name"),
357 _Option(["-match"], ["input"], None, 0,
358 "Score for matching two bases"),
359 _Option(["-mismatch"], ["input"], None, 0,
360 "Cost for mismatching two bases"),
361 _Option(["-gappenalty"], ["input"], None, 0,
362 "Cost for deleting a single base in either sequence, " + \
363 "excluding introns"),
364 _Option(["-intronpenalty"], ["input"], None, 0,
365 "Cost for an intron, independent of length."),
366 _Option(["-splicepenalty"], ["input"], None, 0,
367 "Cost for an intron, independent of length " + \
368 "and starting/ending on donor-acceptor sites"),
369 _Option(["-minscore"], ["input"], None, 0,
370 "Exclude alignments with scores below this threshold score."),
371 _Option(["-reverse"], ["input"], None, 0,
372 "Reverse the orientation of the EST sequence"),
373 _Option(["-splice"], ["input"], None, 0,
374 "Use donor and acceptor splice sites."),
375 _Option(["-mode"], ["input"], None, 0,
376 "This determines the comparion mode. 'both', 'forward' " + \
377 "'reverse'"),
378 _Option(["-best"], ["input"], None, 0,
379 "You can print out all comparisons instead of just the best"),
380 _Option(["-space"], ["input"], None, 0,
381 "for linear-space recursion."),
382 _Option(["-shuffle"], ["input"], None, 0,
383 "Shuffle"),
384 _Option(["-seed"], ["input"], None, 0,
385 "Random number seed"),
386 _Option(["-align"], ["input"], None, 0,
387 "Show the alignment."),
388 _Option(["-width"], ["input"], None, 0,
389 "Alignment width")
390 ]
391
393 """Commandline object for the etandem program from EMBOSS.
394 """
396 Application.AbstractCommandline.__init__(self)
397 self.program_name = cmd
398
399 self.parameters = [
400 _Option(["-sequence"], ["input", "file"], None, 1,
401 "Sequence"),
402 _Option(["-minrepeat"], ["input"], None, 1,
403 "Minimum repeat size"),
404 _Option(["-maxrepeat"], ["input"], None, 1,
405 "Maximum repeat size"),
406 _Option(["-outfile"], ["output", "file"] , None, 1,
407 "Output report file name"),
408 _Option(["-threshold"], ["input"], None, 0,
409 "Threshold score"),
410 _Option(["-mismatch"], ["input"], None, 0,
411 "Allow N as a mismatch"),
412 _Option(["-uniform"], ["input"], None, 0,
413 "Allow uniform consensus"),
414 _Option(["-rformat"], ["output"], None, 0,
415 "Output report format")]
416
418 """Commandline object for the einverted program from EMBOSS.
419 """
421 Application.AbstractCommandline.__init__(self)
422 self.program_name = cmd
423
424 self.parameters = [
425 _Option(["-sequence"], ["input", "file"], None, 1,
426 "Sequence"),
427 _Option(["-gap"], ["input", "file"], None, 1,
428 "Gap penalty"),
429 _Option(["-threshold"], ["input"], None, 1,
430 "Minimum score threshold"),
431 _Option(["-match"], ["input"], None, 1,
432 "Match score"),
433 _Option(["-mismatch"], ["input"], None, 1,
434 "Mismatch score"),
435 _Option(["-outfile"], ["output", "file"] , None, 1,
436 "Output report file name"),
437 _Option(["-maxrepeat"], ["input"], None, 0,
438 "Maximum separation between the start and end of repeat"),
439 ]
440
442 """Commandline object for the palindrome program from EMBOSS.
443 """
444 - def __init__(self, cmd = "palindrome"):
445 Application.AbstractCommandline.__init__(self)
446 self.program_name = cmd
447
448 self.parameters = [
449 _Option(["-sequence"], ["input", "file"], None, 1,
450 "Sequence"),
451 _Option(["-minpallen"], ["input"], None, 1,
452 "Minimum palindrome length"),
453 _Option(["-maxpallen"], ["input"], None, 1,
454 "Maximum palindrome length"),
455 _Option(["-gaplimit"], ["input"], None, 1,
456 "Maximum gap between repeats"),
457 _Option(["-nummismatches"], ["input"], None, 1,
458 "Number of mismatches allowed"),
459 _Option(["-overlap"], ["input"], None, 1,
460 "Report overlapping matches"),
461 _Option(["-outfile"], ["output", "file"] , None, 1,
462 "Output report file name"),
463 ]
464
466 """Commandline object for the tranalign program from EMBOSS.
467 """
469 Application.AbstractCommandline.__init__(self)
470 self.program_name = cmd
471
472 self.parameters = [
473 _Option(["-asequence"], ["input", "file"], None, 1,
474 "Nucleotide sequences to be aligned."),
475 _Option(["-bsequence"], ["input", "file"], None, 1,
476 "Protein sequence alignment"),
477 _Option(["-outseq"], ["output", "file"], None, 1,
478 "Output sequence file."),
479 _Option(["-table"], ["input"], None, 0,
480 "Code to use")]
481
483 """Commandline object for the diffseq program from EMBOSS.
484 """
486 Application.AbstractCommandline.__init__(self)
487 self.program_name = cmd
488
489 self.parameters = [
490 _Option(["-asequence"], ["input", "file"], None, 1,
491 "First sequence to compare"),
492 _Option(["-bsequence"], ["input", "file"], None, 1,
493 "Second sequence to compare"),
494 _Option(["-wordsize"], ["input"], None, 1,
495 "Word size to use for comparisons (10 default)"),
496 _Option(["-outfile"], ["output", "file"], None, 1,
497 "Output report file name"),
498 _Option(["-aoutfeat"], ["output", "file"], None, 1,
499 "File for output of first sequence's features"),
500 _Option(["-boutfeat"], ["output", "file"], None, 1,
501 "File for output of second sequence's features"),
502 _Option(["-rformat"], ["output"], None, 0,
503 "Output report file format")
504 ]
505
507 """Commandline for EMBOSS iep: calculated isoelectric point and charge.
508 """
510 Application.AbstractCommandline.__init__(self)
511 self.program_name = cmd
512
513 self.parameters = [
514 _Option(["-sequence"], ["input", "file"], None, 1,
515 "Protein sequence(s) filename"),
516 _Option(["-outfile"], ["output", "file"], None, 1,
517 "Output report file name"),
518 _Option(["-amino"], ["input"], None, 0),
519 _Option(["-lysinemodified"], ["input"], None, 0),
520 _Option(["-disulphides"], ["input"], None, 0),
521 _Option(["-notermini"], ["input"], None, 0),
522 ]
523