Package Bio :: Package Blast :: Module Record
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Record

  1  # Copyright 1999-2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Record classes to hold BLAST output. 
  7   
  8  Classes: 
  9  Blast              Holds all the information from a blast search. 
 10  PSIBlast           Holds all the information from a psi-blast search. 
 11   
 12  Header             Holds information from the header. 
 13  Description        Holds information about one hit description. 
 14  Alignment          Holds information about one alignment hit. 
 15  HSP                Holds information about one HSP. 
 16  MultipleAlignment  Holds information about a multiple alignment. 
 17  DatabaseReport     Holds information from the database report. 
 18  Parameters         Holds information from the parameters. 
 19   
 20  """ 
 21  # XXX finish printable BLAST output 
 22   
 23  import string 
 24   
 25  from Bio.Align import Generic 
 26   
27 -class Header:
28 """Saves information from a blast header. 29 30 Members: 31 application The name of the BLAST flavor that generated this data. 32 version Version of blast used. 33 date Date this data was generated. 34 reference Reference for blast. 35 36 query Name of query sequence. 37 query_letters Number of letters in the query sequence. (int) 38 39 database Name of the database. 40 database_sequences Number of sequences in the database. (int) 41 database_letters Number of letters in the database. (int) 42 43 """
44 - def __init__(self):
45 self.application = '' 46 self.version = '' 47 self.date = '' 48 self.reference = '' 49 50 self.query = '' 51 self.query_letters = None 52 53 self.database = '' 54 self.database_sequences = None 55 self.database_letters = None
56
57 -class Description:
58 """Stores information about one hit in the descriptions section. 59 60 Members: 61 title Title of the hit. 62 score Number of bits. (int) 63 bits Bit score. (float) 64 e E value. (float) 65 num_alignments Number of alignments for the same subject. (int) 66 67 """
68 - def __init__(self):
69 self.title = '' 70 self.score = None 71 self.bits = None 72 self.e = None 73 self.num_alignments = None
74 - def __str__(self):
75 return "%-66s %5s %s" % (self.title, self.score, self.e)
76
77 -class Alignment:
78 """Stores information about one hit in the alignments section. 79 80 Members: 81 title Name. 82 hit_id Hit identifier. (str) 83 hit_def Hit definition. (str) 84 length Length. (int) 85 hsps A list of HSP objects. 86 87 """
88 - def __init__(self):
89 self.title = '' 90 self.hit_id = '' 91 self.hit_def = '' 92 self.length = None 93 self.hsps = []
94 - def __str__(self):
95 lines = [] 96 titles = string.split(self.title, '\n') 97 for i in range(len(titles)): 98 if i: 99 lines.append(" ") 100 lines.append("%s\n" % titles[i]) 101 lines.append(" Length = %s\n" % self.length) 102 return string.join(lines, '')
103
104 -class HSP:
105 """Stores information about one hsp in an alignment hit. 106 107 Members: 108 score BLAST score of hit. (float) 109 bits Number of bits for that score. (float) 110 expect Expect value. (float) 111 num_alignments Number of alignments for same subject. (int) 112 identities Number of identities/total aligned. tuple of (int, int) 113 positives Number of positives/total aligned. tuple of (int, int) 114 gaps Numer of gaps/total aligned. tuple of (int, int) 115 align_length Length of the alignment. (int) 116 strand Tuple of (query, target) strand. 117 frame Tuple of 1 or 2 frame shifts, depending on the flavor. 118 119 query The query sequence. 120 query_start The start residue for the query sequence. (1-based) 121 query_end The end residue for the query sequence. (1-based) 122 match The match sequence. 123 sbjct The sbjct sequence. 124 sbjct_start The start residue for the sbjct sequence. (1-based) 125 sbjct_end The end residue for the sbjct sequence. (1-based) 126 127 Not all flavors of BLAST return values for every attribute: 128 score expect identities positives strand frame 129 BLASTP X X X X 130 BLASTN X X X X X 131 BLASTX X X X X X 132 TBLASTN X X X X X 133 TBLASTX X X X X X/X 134 135 Note: for BLASTX, the query sequence is shown as a protein sequence, 136 but the numbering is based on the nucleotides. Thus, the numbering 137 is 3x larger than the number of amino acid residues. A similar effect 138 can be seen for the sbjct sequence in TBLASTN, and for both sequences 139 in TBLASTX. 140 141 Also, for negative frames, the sequence numbering starts from 142 query_start and counts down. 143 144 """
145 - def __init__(self):
146 self.score = None 147 self.bits = None 148 self.expect = None 149 self.num_alignments = None 150 self.identities = (None, None) 151 self.positives = (None, None) 152 self.gaps = (None, None) 153 self.align_length = None 154 self.strand = (None, None) 155 self.frame = () 156 157 self.query = '' 158 self.query_start = None 159 self.query_end = None 160 self.match = '' 161 self.sbjct = '' 162 self.sbjct_start = None 163 self.sbjct_end = None
164
165 -class MultipleAlignment:
166 """Holds information about a multiple alignment. 167 168 Members: 169 alignment A list of tuples (name, start residue, sequence, end residue). 170 171 The start residue is 1-based. It may be blank, if that sequence is 172 not aligned in the multiple alignment. 173 174 """
175 - def __init__(self):
176 self.alignment = []
177
178 - def to_generic(self, alphabet):
179 """Retrieve generic alignment object for the given alignment. 180 181 Instead of the tuples, this returns an Alignment object from 182 Bio.Align.Generic, through which you can manipulate and query 183 the object. 184 185 alphabet is the specified alphabet for the sequences in the code (for 186 example IUPAC.IUPACProtein. 187 188 Thanks to James Casbon for the code. 189 """ 190 seq_parts = [] 191 seq_names = [] 192 parse_number = 0 193 n = 0 194 for name, start, seq, end in self.alignment: 195 if name == 'QUERY': #QUERY is the first in each alignment block 196 parse_number = parse_number + 1 197 n = 0 198 199 if parse_number == 1: # create on first_parse, append on all others 200 seq_parts.append(seq) 201 seq_names.append(name) 202 else: 203 seq_parts[n] = seq_parts[n] + seq 204 n = n + 1 205 206 generic = Generic.Alignment(alphabet) 207 for (name,seq) in zip(seq_names,seq_parts): 208 generic.add_sequence(name, seq) 209 210 return generic
211
212 -class Round:
213 """Holds information from a PSI-BLAST round. 214 215 Members: 216 number Round number. (int) 217 reused_seqs Sequences in model, found again. List of Description objects. 218 new_seqs Sequences not found, or below threshold. List of Description. 219 alignments A list of Alignment objects. 220 multiple_alignment A MultipleAlignment object. 221 222 """
223 - def __init__(self):
224 self.number = None 225 self.reused_seqs = [] 226 self.new_seqs = [] 227 self.alignments = [] 228 self.multiple_alignment = None
229
230 -class DatabaseReport:
231 """Holds information about a database report. 232 233 Members: 234 database_name List of database names. (can have multiple dbs) 235 num_letters_in_database Number of letters in the database. (int) 236 num_sequences_in_database List of number of sequences in the database. 237 posted_date List of the dates the databases were posted. 238 ka_params A tuple of (lambda, k, h) values. (floats) 239 gapped # XXX this isn't set right! 240 ka_params_gap A tuple of (lambda, k, h) values. (floats) 241 242 """
243 - def __init__(self):
244 self.database_name = [] 245 self.posted_date = [] 246 self.num_letters_in_database = [] 247 self.num_sequences_in_database = [] 248 self.ka_params = (None, None, None) 249 self.gapped = 0 250 self.ka_params_gap = (None, None, None)
251
252 -class Parameters:
253 """Holds information about the parameters. 254 255 Members: 256 matrix Name of the matrix. 257 gap_penalties Tuple of (open, extend) penalties. (floats) 258 sc_match Match score for nucleotide-nucleotide comparison 259 sc_mismatch Mismatch penalty for nucleotide-nucleotide comparison 260 num_hits Number of hits to the database. (int) 261 num_sequences Number of sequences. (int) 262 num_good_extends Number of extensions. (int) 263 num_seqs_better_e Number of sequences better than e-value. (int) 264 hsps_no_gap Number of HSP's better, without gapping. (int) 265 hsps_prelim_gapped Number of HSP's gapped in prelim test. (int) 266 hsps_prelim_gapped_attemped Number of HSP's attempted in prelim. (int) 267 hsps_gapped Total number of HSP's gapped. (int) 268 query_length Length of the query. (int) 269 query_id Identifier of the query sequence. (str) 270 database_length Number of letters in the database. (int) 271 effective_hsp_length Effective HSP length. (int) 272 effective_query_length Effective length of query. (int) 273 effective_database_length Effective length of database. (int) 274 effective_search_space Effective search space. (int) 275 effective_search_space_used Effective search space used. (int) 276 frameshift Frameshift window. Tuple of (int, float) 277 threshold Threshold. (int) 278 window_size Window size. (int) 279 dropoff_1st_pass Tuple of (score, bits). (int, float) 280 gap_x_dropoff Tuple of (score, bits). (int, float) 281 gap_x_dropoff_final Tuple of (score, bits). (int, float) 282 gap_trigger Tuple of (score, bits). (int, float) 283 blast_cutoff Tuple of (score, bits). (int, float) 284 """
285 - def __init__(self):
286 self.matrix = '' 287 self.gap_penalties = (None, None) 288 self.sc_match = None 289 self.sc_mismatch = None 290 self.num_hits = None 291 self.num_sequences = None 292 self.num_good_extends = None 293 self.num_seqs_better_e = None 294 self.hsps_no_gap = None 295 self.hsps_prelim_gapped = None 296 self.hsps_prelim_gapped_attemped = None 297 self.hsps_gapped = None 298 self.query_id = None 299 self.query_length = None 300 self.database_length = None 301 self.effective_hsp_length = None 302 self.effective_query_length = None 303 self.effective_database_length = None 304 self.effective_search_space = None 305 self.effective_search_space_used = None 306 self.frameshift = (None, None) 307 self.threshold = None 308 self.window_size = None 309 self.dropoff_1st_pass = (None, None) 310 self.gap_x_dropoff = (None, None) 311 self.gap_x_dropoff_final = (None, None) 312 self.gap_trigger = (None, None) 313 self.blast_cutoff = (None, None)
314
315 -class Blast(Header, DatabaseReport, Parameters):
316 """Saves the results from a blast search. 317 318 Members: 319 descriptions A list of Description objects. 320 alignments A list of Alignment objects. 321 multiple_alignment A MultipleAlignment object. 322 + members inherited from base classes 323 324 """
325 - def __init__(self):
326 Header.__init__(self) 327 DatabaseReport.__init__(self) 328 Parameters.__init__(self) 329 self.descriptions = [] 330 self.alignments = [] 331 self.multiple_alignment = None
332
333 -class PSIBlast(Header, DatabaseReport, Parameters):
334 """Saves the results from a blastpgp search. 335 336 Members: 337 rounds A list of Round objects. 338 converged Whether the search converged. 339 + members inherited from base classes 340 341 """
342 - def __init__(self):
343 Header.__init__(self) 344 DatabaseReport.__init__(self) 345 Parameters.__init__(self) 346 self.rounds = [] 347 self.converged = 0
348