Package Bio :: Package Align :: Package Applications :: Module _Dialign
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._Dialign

  1  # Copyright 2009 by Cymon J. Cox.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  Bio.Application command line for the multiple alignment program DIALIGN2-2. 
  8   
  9  http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html 
 10   
 11  Citations: 
 12   
 13  B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence Alignment 
 14  at BiBiServ. Nucleic Acids Research 32, W33-W36. 
 15   
 16  Last checked against version: 2.2 
 17  """ 
 18  import os 
 19  import types 
 20  from Bio import Application 
 21  from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline 
 22   
23 -class DialignCommandline(AbstractCommandline):
24 """Command line wrapper for the multiple alignment program DIALIGN2-2."""
25 - def __init__(self, cmd="dialign2-2", **kwargs):
26 self.program_name = cmd 27 self.parameters = \ 28 [ 29 _Switch(["-afc", "afc"], ["input"], 30 "Creates additional output file '*.afc' " + \ 31 "containing data of all fragments considered " + \ 32 "for alignment WARNING: this file can be HUGE !"), 33 _Switch(["-afc_v", "afc_v"], ["input"], 34 "Like '-afc' but verbose: fragments are explicitly " + \ 35 "printed. WARNING: this file can be EVEN BIGGER !"), 36 _Switch(["-anc", "anc"], ["input"], 37 "Anchored alignment. Requires a file <seq_file>.anc " + \ 38 "containing anchor points."), 39 _Switch(["-cs", "cs"], ["input"], 40 "If segments are translated, not only the `Watson " + \ 41 "strand' but also the `Crick strand' is looked at."), 42 _Switch(["-cw", "cw"], ["input"], 43 "Additional output file in CLUSTAL W format."), 44 _Switch(["-ds", "ds"], ["input"], 45 "`dna alignment speed up' - non-translated nucleic acid " + \ 46 "fragments are taken into account only if they start " + \ 47 "with at least two matches. Speeds up DNA alignment at " + \ 48 "the expense of sensitivity."), 49 _Switch(["-fa", "fa"], ["input"], 50 "Additional output file in FASTA format."), 51 _Switch(["-ff", "ff"], ["input"], 52 "Creates file *.frg containing information about all " + \ 53 "fragments that are part of the respective optimal " + \ 54 "pairwise alignmnets plus information about " + \ 55 "consistency in the multiple alignment"), 56 _Option(["-fn", "fn"], ["input"], 57 None, 58 0, 59 "Output files are named <out_file>.<extension>.", 60 0), 61 _Switch(["-fop", "fop"], ["input"], 62 "Creates file *.fop containing coordinates of all " + \ 63 "fragments that are part of the respective pairwise alignments."), 64 _Switch(["-fsm", "fsm"], ["input"], 65 "Creates file *.fsm containing coordinates of all " + \ 66 "fragments that are part of the final alignment"), 67 _Switch(["-iw", "iw"], ["input"], 68 "Overlap weights switched off (by default, overlap " + \ 69 "weights are used if up to 35 sequences are aligned). " + \ 70 "This option speeds up the alignment but may lead " + \ 71 "to reduced alignment quality."), 72 _Switch(["-lgs", "lgs"], ["input"], 73 "`long genomic sequences' - combines the following " + \ 74 "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, " + \ 75 "-fop, -ff, -cs, -ds, -pst "), 76 _Switch(["-lgs_t", "lgs_t"], ["input"], 77 "Like '-lgs' but with all segment pairs assessed " + \ 78 "at the peptide level (rather than 'mixed alignments' " + \ 79 "as with the '-lgs' option). Therefore faster than " + \ 80 "-lgs but not very sensitive for non-coding regions."), 81 _Option(["-lmax", "lmax"], ["input"], 82 lambda x: isinstance(x, types.IntType), 83 0, 84 "Maximum fragment length = x (default: x = 40 or " + \ 85 "x = 120 for `translated' fragments). Shorter x " + \ 86 "speeds up the program but may affect alignment quality.", 87 0), 88 _Switch(["-lo", "lo"], ["input"], 89 "(Long Output) Additional file *.log with information " + \ 90 "about fragments selected for pairwise alignment and " + \ 91 "about consistency in multi-alignment proceedure."), 92 _Switch(["-ma", "ma"], ["input"], 93 "`mixed alignments' consisting of P-fragments and " + \ 94 "N-fragments if nucleic acid sequences are aligned."), 95 _Switch(["-mask", "mask"], ["input"], 96 "Residues not belonging to selected fragments are " + \ 97 "replaced by `*' characters in output alignment " + \ 98 "(rather than being printed in lower-case characters)"), 99 _Switch(["-mat", "mat"], ["input"], 100 "Creates file *mat with substitution counts derived " + \ 101 "from the fragments that have been selected for alignment."), 102 _Switch(["-mat_thr", "mat_thr"], ["input"], 103 "Like '-mat' but only fragments with weight score " + \ 104 "> t are considered"), 105 _Switch(["-max_link", "max_link"], ["input"], 106 "'maximum linkage' clustering used to construct " + \ 107 "sequence tree (instead of UPGMA)."), 108 _Switch(["-min_link", "min_link"], ["input"], 109 "'minimum linkage' clustering used."), 110 _Option(["-mot", "mot"], ["input"], 111 None, 112 0, 113 "'motif' option.", 114 0), 115 _Switch(["-msf", "msf"], ["input"], 116 "Separate output file in MSF format."), 117 _Switch(["-n", "n"], ["input"], 118 "Input sequences are nucleic acid sequences. " + \ 119 "No translation of fragments."), 120 _Switch(["-nt", "nt"], ["input"], 121 "Input sequences are nucleic acid sequences and " + \ 122 "`nucleic acid segments' are translated to `peptide " + \ 123 "segments'."), 124 _Switch(["-nta", "nta"], ["input"], 125 "`no textual alignment' - textual alignment suppressed. " + \ 126 "This option makes sense if other output files are of " + \ 127 "intrest -- e.g. the fragment files created with -ff, " + \ 128 "-fop, -fsm or -lo."), 129 _Switch(["-o", "o"], ["input"], 130 "Fast version, resulting alignments may be slightly " + \ 131 "different."), 132 _Switch(["-ow", "ow"], ["input"], 133 "Overlap weights enforced (By default, overlap weights " + \ 134 "are used only if up to 35 sequences are aligned since " + \ 135 "calculating overlap weights is time consuming)."), 136 _Switch(["-pst", "pst"], ["input"], 137 "'print status'. Creates and updates a file *.sta with " + \ 138 "information about the current status of the program " + \ 139 "run. This option is recommended if large data sets " + \ 140 "are aligned since it allows the user to estimate the " + \ 141 "remaining running time."), 142 _Switch(["-smin", "smin"], ["input"], 143 "Minimum similarity value for first residue pair " + \ 144 "(or codon pair) in fragments. Speeds up protein " + \ 145 "alignment or alignment of translated DNA fragments " + \ 146 "at the expense of sensitivity."), 147 _Option(["-stars", "stars"], ["input"], 148 lambda x: x in range(0,10), 149 0, 150 "Maximum number of `*' characters indicating degree " + \ 151 "of local similarity among sequences. By default, no " + \ 152 "stars are used but numbers between 0 and 9, instead.", 153 0), 154 _Switch(["-stdo", "stdo"], ["input"], 155 "Results written to standard output."), 156 _Switch(["-ta", "ta"], ["input"], 157 "Standard textual alignment printed (overrides " + \ 158 "suppression of textual alignments in special " + \ 159 "options, e.g. -lgs)"), 160 _Option(["-thr", "thr"], ["input"], 161 lambda x: isinstance(x, types.IntType), 162 0, 163 "Threshold T = x.", 164 0), 165 _Switch(["-xfr", "xfr"], ["input"], 166 "'exclude fragments' - list of fragments can be " + \ 167 "specified that are NOT considered for pairwise alignment"), 168 _Argument(["input"], ["input", "file"], None, 1, 169 "Input file name. Must be FASTA format") 170 ] 171 AbstractCommandline.__init__(self, cmd, **kwargs)
172