Package Bio :: Package Blast :: Module Applications
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Applications

  1  # Copyright 2001 Brad Chapman. 
  2  # Revisions copyright 2009 by Peter Cock. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7  """Definitions for interacting with Blast related applications. 
  8  """ 
  9  from Bio.Application import _Option, AbstractCommandline 
 10   
11 -class FastacmdCommandline(AbstractCommandline):
12 """Create a commandline for the fasta program from NCBI. 13 14 """
15 - def __init__(self, cmd="fastacmd", **kwargs):
16 self.parameters = \ 17 [ 18 _Option(["-d", "database"], ["input"], None, 1, 19 "The database to retrieve from."), 20 _Option(["-s", "search_string"], ["input"], None, 1, 21 "The id to search for.") 22 ] 23 AbstractCommandline.__init__(self, cmd, **kwargs)
24 25
26 -class _BlastCommandLine(AbstractCommandline) :
27 """Base Commandline object for NCBI BLAST wrappers (PRIVATE). 28 29 This is provided for subclassing, it deals with shared options 30 common to all the BLAST tools (blastall, rpsblast, pgpblast). 31 """
32 - def __init__(self, cmd=None, **kwargs):
33 assert cmd is not None 34 extra_parameters = [\ 35 _Option(["-d", "database"], ["input"], None, 1, 36 "The database to BLAST against.", False), 37 _Option(["-i", "infile"], ["input", "file"], None, 1, 38 "The sequence to search with.", False), 39 _Option(["-e", "expectation"], ["input"], None, 0, 40 "Expectation value cutoff.", False), 41 _Option(["-m", "align_view"], ["input"], None, 0, 42 "Alignment view. Integer 0-11. Use 7 for XML output.", 43 False), 44 _Option(["-o", "align_outfile", "outfile"], ["output", "file"], None, 0, 45 "Output file for alignment.", False), 46 _Option(["-y", "xdrop_extension"], ["input"], None, 0, 47 "Dropoff for blast extensions.", False), 48 _Option(["-F", "filter"], ["input"], None, 0, 49 "Filter query sequence with SEG? T/F", False), 50 _Option(["-X", "xdrop"], ["input"], None, 0, 51 "Dropoff value (bits) for gapped alignments."), 52 _Option(["-I", "show_gi"], ["input"], None, 0, 53 "Show GI's in deflines? T/F", False), 54 _Option(["-J", "believe_query"], ["input"], None, 0, 55 "Believe the query defline? T/F", False), 56 _Option(["-Z", "xdrop_final"], ["input"], None, 0, 57 "X dropoff for final gapped alignment.", False), 58 _Option(["-z", "db_length"], ["input"], None, 0, 59 "Effective database length.", False), 60 _Option(["-O", "seqalign_file"], ["output", "file"], None, 0, 61 "seqalign file to output.", False), 62 _Option(["-v", "descriptions"], ["input"], None, 0, 63 "Number of one-line descriptions.", False), 64 _Option(["-b", "alignments"], ["input"], None, 0, 65 "Number of alignments.", False), 66 _Option(["-Y", "search_length"], ["input"], None, 0, 67 "Effective length of search space (use zero for the " + \ 68 "real size).", False), 69 _Option(["-T", "html"], ["input"], None, 0, 70 "Produce HTML output? T/F", False), 71 _Option(["-U", "case_filter"], ["input"], None, 0, 72 "Use lower case filtering of FASTA sequence? T/F", False), 73 74 _Option(["-a", "nprocessors"], ["input"], None, 0, 75 "Number of processors to use.", False), 76 _Option(["-g", "gapped"], ["input"], None, 0, 77 "Whether to do a gapped alignment. T/F", False), 78 ] 79 try : 80 #Insert extra parameters - at the start just in case there 81 #are any arguments which must come last: 82 self.parameters = extra_parameters + self.parameters 83 except AttributeError: 84 #Should we raise an error? The subclass should have set this up! 85 self.parameters = extra_parameters 86 AbstractCommandline.__init__(self, cmd, **kwargs)
87 88
89 -class _BlastAllOrPgpCommandLine(_BlastCommandLine) :
90 """Base Commandline object for NCBI BLAST wrappers (PRIVATE). 91 92 This is provided for subclassing, it deals with shared options 93 common to all the blastall and pgpblast tools (but not rpsblast). 94 """
95 - def __init__(self, cmd=None, **kwargs):
96 assert cmd is not None 97 extra_parameters = [\ 98 _Option(["-G", "gap_open"], ["input"], None, 0, 99 "Gap open penalty", False), 100 _Option(["-E", "gap_extend"], ["input"], None, 0, 101 "Gap extension penalty", False), 102 _Option(["-A", "window_size"], ["input"], None, 0, 103 "Multiple hits window size", False), 104 _Option(["-f", "hit_extend"], ["input"], None, 0, 105 "Threshold for extending hits.", False), 106 _Option(["-K", "keep_hits"], ["input"], None, 0, 107 " Number of best hits from a region to keep.", False), 108 _Option(["-W", "wordsize"], ["input"], None, 0, 109 "Word size", False), 110 _Option(["-P", "passes"], ["input"], None, 0, 111 "Hits/passes. Integer 0-2. 0 for multiple hit, " 112 "1 for single hit (does not apply to blastn)", False), 113 ] 114 try : 115 #Insert extra parameters - at the start just in case there 116 #are any arguments which must come last: 117 self.parameters = extra_parameters + self.parameters 118 except AttributeError: 119 #Should we raise an error? The subclass should have set this up! 120 self.parameters = extra_parameters 121 _BlastCommandLine.__init__(self, cmd, **kwargs)
122 123
124 -class BlastallCommandline(_BlastAllOrPgpCommandLine):
125 """Create a commandline for the blastall program from NCBI.""" 126 #TODO - This could use more checking for valid parameters to the program.
127 - def __init__(self, cmd="blastall",**kwargs):
128 self.parameters = [ \ 129 #Sorted in the same order as the output from blastall --help 130 #which should make it easier to keep them up to date in future. 131 #Note that some arguments are defined the the base clases (above). 132 _Option(["-p", "program"], ["input"], None, 1, 133 "The blast program to use (e.g. blastp, blastn).", False), 134 _Option(["-q", "nuc_mismatch"], ["input"], None, 0, 135 "Penalty for a nucleotide mismatch (blastn only).", False), 136 _Option(["-r", "nuc_match"], ["input"], None, 0, 137 "Reward for a nucleotide match (blastn only).", False), 138 _Option(["-Q", "query_genetic_code"], ["input"], None, 0, 139 "Query Genetic code to use.", False), 140 _Option(["-D", "db_genetic_code"], ["input"], None, 0, 141 "DB Genetic code (for tblast[nx] only).", False), 142 _Option(["-M", "matrix"], ["input"], None, 0, 143 "Matrix to use", False), 144 _Option(["-S", "strands"], ["input"], None, 0, 145 "Query strands to search against database (for blast[nx], " + \ 146 "and tblastx). 3 is both, 1 is top, 2 is bottom.", False), 147 _Option(["-l", "restrict_gi"], ["input"], None, 0, 148 "Restrict search of database to list of GI's.", False), 149 _Option(["-R"], ["input", "file"], None, 0, 150 "PSI-TBLASTN checkpoint input file.", False), 151 _Option(["-n", "megablast"], ["input"], None, 0, 152 "MegaBlast search T/F.", False), 153 #The old name "region_length" is for consistency with our 154 #old blastall function wrapper: 155 _Option(["-L", "region_length", "range_restriction"], ["input"], 156 None, 0, 157 """Location on query sequence (string format start,end). 158 159 In older versions of BLAST, -L set the length of region 160 used to judge hits (see -K parameter).""", False), 161 _Option(["-w"], ["input"], None, 0, 162 "Frame shift penalty (OOF algorithm for blastx).", False), 163 _Option(["-t"], ["input"], None, 0, 164 "Length of the largest intron allowed in a translated " + \ 165 "nucleotide sequence when linking multiple distinct " + \ 166 "alignments. (0 invokes default behavior; a negative value " + \ 167 "disables linking.)", False), 168 _Option(["-B"], ["input"], None, 0, 169 "Number of concatenated queries, for blastn and tblastn.", 170 False), 171 _Option(["-V", "oldengine"], ["input"], None, 0, 172 "Force use of the legacy BLAST engine.", False), 173 _Option(["-C"], ["input"], None, 0, 174 """Use composition-based statistics for tblastn: 175 D or d: default (equivalent to F) 176 0 or F or f: no composition-based statistics 177 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001 178 2: Composition-based score adjustment as in Bioinformatics 179 21:902-911, 2005, conditioned on sequence properties 180 3: Composition-based score adjustment as in Bioinformatics 181 21:902-911, 2005, unconditionally 182 For programs other than tblastn, must either be absent or be 183 D, F or 0.""", False), 184 _Option(["-s"], ["input"], None, 0, 185 "Compute locally optimal Smith-Waterman alignments (This " + \ 186 "option is only available for gapped tblastn.) T/F", False), 187 ] 188 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
189 190
191 -class BlastpgpCommandline(_BlastAllOrPgpCommandLine):
192 """Create a commandline for the blastpgp program from NCBI."""
193 - def __init__(self, cmd="blastpgp",**kwargs):
194 self.parameters = [ \ 195 _Option(["-C", "checkpoint_outfile"], ["output", "file"], None, 0, 196 "Output file for PSI-BLAST checkpointing.", False), 197 _Option(["-R", "restart_infile"], ["input", "file"], None, 0, 198 "Input file for PSI-BLAST restart.", False), 199 _Option(["-k", "hit_infile"], ["input", "file"], None, 0, 200 "Hit file for PHI-BLAST.", False), 201 _Option(["-Q", "matrix_outfile"], ["output", "file"], None, 0, 202 "Output file for PSI-BLAST matrix in ASCII.", False), 203 _Option(["-B", "align_infile"], ["input", "file"], None, 0, 204 "Input alignment file for PSI-BLAST restart.", False), 205 _Option(["-S", "required_start"], ["input"], None, 0, 206 "Start of required region in query.", False), 207 _Option(["-H", "required_end"], ["input"], None, 0, 208 "End of required region in query.", False), 209 _Option(["-j", "npasses"], ["input"], None, 0, 210 "Number of passes", False), 211 _Option(["-N", "nbits_gapping"], ["input"], None, 0, 212 "Number of bits to trigger gapping.", False), 213 _Option(["-c", "pseudocounts"], ["input"], None, 0, 214 "Pseudocounts constants for multiple passes.", False), 215 _Option(["-h", "model_threshold"], ["input"], None, 0, 216 "E-value threshold to include in multipass model.", False), 217 #Does the old name "region_length" for -L make sense? 218 _Option(["-L", "region_length"], ["input"], None, 0, 219 "Cost to decline alignment (disabled when zero).", False), 220 _Option(["-M", "matrix"], ["input"], None, 0, 221 "Matrix (string, default BLOSUM62).", False), 222 _Option(["-p", "program"], ["input"], None, 1, 223 "The blast program to use (e.g blastpgp, patseedp or seedp).", False), 224 ] 225 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
226 227
228 -class RpsBlastCommandline(_BlastCommandLine):
229 """Create a commandline for the rpsblast program from NCBI."""
230 - def __init__(self, cmd="rpsblast",**kwargs):
231 self.parameters = [ \ 232 #Note -N is also in blastpgp, but not blastall 233 _Option(["-N", "nbits_gapping"], ["input"], None, 0, 234 "Number of bits to trigger gapping.", False), 235 #Note blastall and blastpgp wrappers have -P with name "passes". 236 #If this is the same thing, we should be consistent! 237 _Option(["-P", "multihit"], ["input"], None, 0, 238 "0 for multiple hit, 1 for single hit", False), 239 _Option(["-l", "logfile"], ["output", "file"], None, 0, 240 "Logfile name.", False), 241 _Option(["-p", "protein"], ["input"], None, 0, 242 "Query sequence is protein. T/F", False), 243 _Option(["-L", "range_restriction"], ["input"], None, 0, 244 "Location on query sequence (string format start,end).", 245 False), 246 ] 247 _BlastCommandLine.__init__(self, cmd, **kwargs)
248