Package Bio :: Package Blast :: Module Record
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Record

  1  # Copyright 1999-2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Record classes to hold BLAST output. 
  7   
  8  Classes: 
  9  Blast              Holds all the information from a blast search. 
 10  PSIBlast           Holds all the information from a psi-blast search. 
 11   
 12  Header             Holds information from the header. 
 13  Description        Holds information about one hit description. 
 14  Alignment          Holds information about one alignment hit. 
 15  HSP                Holds information about one HSP. 
 16  MultipleAlignment  Holds information about a multiple alignment. 
 17  DatabaseReport     Holds information from the database report. 
 18  Parameters         Holds information from the parameters. 
 19   
 20  """ 
 21  # XXX finish printable BLAST output 
 22   
 23  from Bio.Align import Generic 
 24   
25 -class Header:
26 """Saves information from a blast header. 27 28 Members: 29 application The name of the BLAST flavor that generated this data. 30 version Version of blast used. 31 date Date this data was generated. 32 reference Reference for blast. 33 34 query Name of query sequence. 35 query_letters Number of letters in the query sequence. (int) 36 37 database Name of the database. 38 database_sequences Number of sequences in the database. (int) 39 database_letters Number of letters in the database. (int) 40 41 """
42 - def __init__(self):
43 self.application = '' 44 self.version = '' 45 self.date = '' 46 self.reference = '' 47 48 self.query = '' 49 self.query_letters = None 50 51 self.database = '' 52 self.database_sequences = None 53 self.database_letters = None
54
55 -class Description:
56 """Stores information about one hit in the descriptions section. 57 58 Members: 59 title Title of the hit. 60 score Number of bits. (int) 61 bits Bit score. (float) 62 e E value. (float) 63 num_alignments Number of alignments for the same subject. (int) 64 65 """
66 - def __init__(self):
67 self.title = '' 68 self.score = None 69 self.bits = None 70 self.e = None 71 self.num_alignments = None
72 - def __str__(self):
73 return "%-66s %5s %s" % (self.title, self.score, self.e)
74
75 -class Alignment:
76 """Stores information about one hit in the alignments section. 77 78 Members: 79 title Name. 80 hit_id Hit identifier. (str) 81 hit_def Hit definition. (str) 82 length Length. (int) 83 hsps A list of HSP objects. 84 85 """
86 - def __init__(self):
87 self.title = '' 88 self.hit_id = '' 89 self.hit_def = '' 90 self.length = None 91 self.hsps = []
92 - def __str__(self):
93 lines = self.title.split('\n') 94 lines.append("Length = %s\n" % self.length) 95 return '\n '.join(lines)
96
97 -class HSP:
98 """Stores information about one hsp in an alignment hit. 99 100 Members: 101 score BLAST score of hit. (float) 102 bits Number of bits for that score. (float) 103 expect Expect value. (float) 104 num_alignments Number of alignments for same subject. (int) 105 identities Number of identities (int) if using the XML parser. 106 Tuple of numer of identities/total aligned (int, int) 107 if using the (obsolete) plain text parser. 108 positives Number of positives (int) if using the XML parser. 109 Tuple of numer of positives/total aligned (int, int) 110 if using the (obsolete) plain text parser. 111 gaps Number of gaps (int) if using the XML parser. 112 Tuple of numer of gaps/total aligned (int, int) if 113 using the (obsolete) plain text parser. 114 align_length Length of the alignment. (int) 115 strand Tuple of (query, target) strand. 116 frame Tuple of 1 or 2 frame shifts, depending on the flavor. 117 118 query The query sequence. 119 query_start The start residue for the query sequence. (1-based) 120 query_end The end residue for the query sequence. (1-based) 121 match The match sequence. 122 sbjct The sbjct sequence. 123 sbjct_start The start residue for the sbjct sequence. (1-based) 124 sbjct_end The end residue for the sbjct sequence. (1-based) 125 126 Not all flavors of BLAST return values for every attribute: 127 score expect identities positives strand frame 128 BLASTP X X X X 129 BLASTN X X X X X 130 BLASTX X X X X X 131 TBLASTN X X X X X 132 TBLASTX X X X X X/X 133 134 Note: for BLASTX, the query sequence is shown as a protein sequence, 135 but the numbering is based on the nucleotides. Thus, the numbering 136 is 3x larger than the number of amino acid residues. A similar effect 137 can be seen for the sbjct sequence in TBLASTN, and for both sequences 138 in TBLASTX. 139 140 Also, for negative frames, the sequence numbering starts from 141 query_start and counts down. 142 143 """
144 - def __init__(self):
145 self.score = None 146 self.bits = None 147 self.expect = None 148 self.num_alignments = None 149 self.identities = (None, None) 150 self.positives = (None, None) 151 self.gaps = (None, None) 152 self.align_length = None 153 self.strand = (None, None) 154 self.frame = () 155 156 self.query = '' 157 self.query_start = None 158 self.query_end = None 159 self.match = '' 160 self.sbjct = '' 161 self.sbjct_start = None 162 self.sbjct_end = None
163
164 - def __str__(self):
165 lines = ["Score %i (%i bits), expectation %0.1e, alignment length %i" \ 166 % (self.score, self.bits, self.expect, self.align_length)] 167 if self.align_length < 50 : 168 lines.append("Query:%s %s %s" % (str(self.query_start).rjust(8), 169 str(self.query), 170 str(self.query_end))) 171 lines.append(" %s" \ 172 % (str(self.match))) 173 lines.append("Sbjct:%s %s %s" % (str(self.sbjct_start).rjust(8), 174 str(self.sbjct), 175 str(self.sbjct_end))) 176 else : 177 lines.append("Query:%s %s...%s %s" \ 178 % (str(self.query_start).rjust(8), 179 str(self.query)[:45], 180 str(self.query)[-3:], 181 str(self.query_end))) 182 lines.append(" %s...%s" \ 183 % (str(self.match)[:45], 184 str(self.match)[-3:])) 185 lines.append("Sbjct:%s %s...%s %s" \ 186 % (str(self.sbjct_start).rjust(8), 187 str(self.sbjct)[:45], 188 str(self.sbjct)[-3:], 189 str(self.sbjct_end))) 190 return "\n".join(lines)
191
192 -class MultipleAlignment:
193 """Holds information about a multiple alignment. 194 195 Members: 196 alignment A list of tuples (name, start residue, sequence, end residue). 197 198 The start residue is 1-based. It may be blank, if that sequence is 199 not aligned in the multiple alignment. 200 201 """
202 - def __init__(self):
203 self.alignment = []
204
205 - def to_generic(self, alphabet):
206 """Retrieve generic alignment object for the given alignment. 207 208 Instead of the tuples, this returns an Alignment object from 209 Bio.Align.Generic, through which you can manipulate and query 210 the object. 211 212 alphabet is the specified alphabet for the sequences in the code (for 213 example IUPAC.IUPACProtein. 214 215 Thanks to James Casbon for the code. 216 """ 217 seq_parts = [] 218 seq_names = [] 219 parse_number = 0 220 n = 0 221 for name, start, seq, end in self.alignment: 222 if name == 'QUERY': #QUERY is the first in each alignment block 223 parse_number = parse_number + 1 224 n = 0 225 226 if parse_number == 1: # create on first_parse, append on all others 227 seq_parts.append(seq) 228 seq_names.append(name) 229 else: 230 seq_parts[n] = seq_parts[n] + seq 231 n = n + 1 232 233 generic = Generic.Alignment(alphabet) 234 for (name,seq) in zip(seq_names,seq_parts): 235 generic.add_sequence(name, seq) 236 237 return generic
238
239 -class Round:
240 """Holds information from a PSI-BLAST round. 241 242 Members: 243 number Round number. (int) 244 reused_seqs Sequences in model, found again. List of Description objects. 245 new_seqs Sequences not found, or below threshold. List of Description. 246 alignments A list of Alignment objects. 247 multiple_alignment A MultipleAlignment object. 248 249 """
250 - def __init__(self):
251 self.number = None 252 self.reused_seqs = [] 253 self.new_seqs = [] 254 self.alignments = [] 255 self.multiple_alignment = None
256
257 -class DatabaseReport:
258 """Holds information about a database report. 259 260 Members: 261 database_name List of database names. (can have multiple dbs) 262 num_letters_in_database Number of letters in the database. (int) 263 num_sequences_in_database List of number of sequences in the database. 264 posted_date List of the dates the databases were posted. 265 ka_params A tuple of (lambda, k, h) values. (floats) 266 gapped # XXX this isn't set right! 267 ka_params_gap A tuple of (lambda, k, h) values. (floats) 268 269 """
270 - def __init__(self):
271 self.database_name = [] 272 self.posted_date = [] 273 self.num_letters_in_database = [] 274 self.num_sequences_in_database = [] 275 self.ka_params = (None, None, None) 276 self.gapped = 0 277 self.ka_params_gap = (None, None, None)
278
279 -class Parameters:
280 """Holds information about the parameters. 281 282 Members: 283 matrix Name of the matrix. 284 gap_penalties Tuple of (open, extend) penalties. (floats) 285 sc_match Match score for nucleotide-nucleotide comparison 286 sc_mismatch Mismatch penalty for nucleotide-nucleotide comparison 287 num_hits Number of hits to the database. (int) 288 num_sequences Number of sequences. (int) 289 num_good_extends Number of extensions. (int) 290 num_seqs_better_e Number of sequences better than e-value. (int) 291 hsps_no_gap Number of HSP's better, without gapping. (int) 292 hsps_prelim_gapped Number of HSP's gapped in prelim test. (int) 293 hsps_prelim_gapped_attemped Number of HSP's attempted in prelim. (int) 294 hsps_gapped Total number of HSP's gapped. (int) 295 query_length Length of the query. (int) 296 query_id Identifier of the query sequence. (str) 297 database_length Number of letters in the database. (int) 298 effective_hsp_length Effective HSP length. (int) 299 effective_query_length Effective length of query. (int) 300 effective_database_length Effective length of database. (int) 301 effective_search_space Effective search space. (int) 302 effective_search_space_used Effective search space used. (int) 303 frameshift Frameshift window. Tuple of (int, float) 304 threshold Threshold. (int) 305 window_size Window size. (int) 306 dropoff_1st_pass Tuple of (score, bits). (int, float) 307 gap_x_dropoff Tuple of (score, bits). (int, float) 308 gap_x_dropoff_final Tuple of (score, bits). (int, float) 309 gap_trigger Tuple of (score, bits). (int, float) 310 blast_cutoff Tuple of (score, bits). (int, float) 311 """
312 - def __init__(self):
313 self.matrix = '' 314 self.gap_penalties = (None, None) 315 self.sc_match = None 316 self.sc_mismatch = None 317 self.num_hits = None 318 self.num_sequences = None 319 self.num_good_extends = None 320 self.num_seqs_better_e = None 321 self.hsps_no_gap = None 322 self.hsps_prelim_gapped = None 323 self.hsps_prelim_gapped_attemped = None 324 self.hsps_gapped = None 325 self.query_id = None 326 self.query_length = None 327 self.database_length = None 328 self.effective_hsp_length = None 329 self.effective_query_length = None 330 self.effective_database_length = None 331 self.effective_search_space = None 332 self.effective_search_space_used = None 333 self.frameshift = (None, None) 334 self.threshold = None 335 self.window_size = None 336 self.dropoff_1st_pass = (None, None) 337 self.gap_x_dropoff = (None, None) 338 self.gap_x_dropoff_final = (None, None) 339 self.gap_trigger = (None, None) 340 self.blast_cutoff = (None, None)
341
342 -class Blast(Header, DatabaseReport, Parameters):
343 """Saves the results from a blast search. 344 345 Members: 346 descriptions A list of Description objects. 347 alignments A list of Alignment objects. 348 multiple_alignment A MultipleAlignment object. 349 + members inherited from base classes 350 351 """
352 - def __init__(self):
353 Header.__init__(self) 354 DatabaseReport.__init__(self) 355 Parameters.__init__(self) 356 self.descriptions = [] 357 self.alignments = [] 358 self.multiple_alignment = None
359
360 -class PSIBlast(Header, DatabaseReport, Parameters):
361 """Saves the results from a blastpgp search. 362 363 Members: 364 rounds A list of Round objects. 365 converged Whether the search converged. 366 + members inherited from base classes 367 368 """
369 - def __init__(self):
370 Header.__init__(self) 371 DatabaseReport.__init__(self) 372 Parameters.__init__(self) 373 self.rounds = [] 374 self.converged = 0
375