Package Bio :: Package Blast :: Module Record
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Record

  1  # Copyright 1999-2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Record classes to hold BLAST output. 
  7   
  8  Classes: 
  9  Blast              Holds all the information from a blast search. 
 10  PSIBlast           Holds all the information from a psi-blast search. 
 11   
 12  Header             Holds information from the header. 
 13  Description        Holds information about one hit description. 
 14  Alignment          Holds information about one alignment hit. 
 15  HSP                Holds information about one HSP. 
 16  MultipleAlignment  Holds information about a multiple alignment. 
 17  DatabaseReport     Holds information from the database report. 
 18  Parameters         Holds information from the parameters. 
 19   
 20  """ 
 21  # XXX finish printable BLAST output 
 22   
 23  from Bio.Align import Generic 
 24   
25 -class Header:
26 """Saves information from a blast header. 27 28 Members: 29 application The name of the BLAST flavor that generated this data. 30 version Version of blast used. 31 date Date this data was generated. 32 reference Reference for blast. 33 34 query Name of query sequence. 35 query_letters Number of letters in the query sequence. (int) 36 37 database Name of the database. 38 database_sequences Number of sequences in the database. (int) 39 database_letters Number of letters in the database. (int) 40 41 """
42 - def __init__(self):
43 self.application = '' 44 self.version = '' 45 self.date = '' 46 self.reference = '' 47 48 self.query = '' 49 self.query_letters = None 50 51 self.database = '' 52 self.database_sequences = None 53 self.database_letters = None
54
55 -class Description:
56 """Stores information about one hit in the descriptions section. 57 58 Members: 59 title Title of the hit. 60 score Number of bits. (int) 61 bits Bit score. (float) 62 e E value. (float) 63 num_alignments Number of alignments for the same subject. (int) 64 65 """
66 - def __init__(self):
67 self.title = '' 68 self.score = None 69 self.bits = None 70 self.e = None 71 self.num_alignments = None
72 - def __str__(self):
73 return "%-66s %5s %s" % (self.title, self.score, self.e)
74
75 -class Alignment:
76 """Stores information about one hit in the alignments section. 77 78 Members: 79 title Name. 80 hit_id Hit identifier. (str) 81 hit_def Hit definition. (str) 82 length Length. (int) 83 hsps A list of HSP objects. 84 85 """
86 - def __init__(self):
87 self.title = '' 88 self.hit_id = '' 89 self.hit_def = '' 90 self.length = None 91 self.hsps = []
92 - def __str__(self):
93 lines = self.title.split('\n') 94 lines.append("Length = %s\n" % self.length) 95 return '\n '.join(lines)
96
97 -class HSP:
98 """Stores information about one hsp in an alignment hit. 99 100 Members: 101 score BLAST score of hit. (float) 102 bits Number of bits for that score. (float) 103 expect Expect value. (float) 104 num_alignments Number of alignments for same subject. (int) 105 identities Number of identities/total aligned. tuple of (int, int) 106 positives Number of positives/total aligned. tuple of (int, int) 107 gaps Numer of gaps/total aligned. tuple of (int, int) 108 align_length Length of the alignment. (int) 109 strand Tuple of (query, target) strand. 110 frame Tuple of 1 or 2 frame shifts, depending on the flavor. 111 112 query The query sequence. 113 query_start The start residue for the query sequence. (1-based) 114 query_end The end residue for the query sequence. (1-based) 115 match The match sequence. 116 sbjct The sbjct sequence. 117 sbjct_start The start residue for the sbjct sequence. (1-based) 118 sbjct_end The end residue for the sbjct sequence. (1-based) 119 120 Not all flavors of BLAST return values for every attribute: 121 score expect identities positives strand frame 122 BLASTP X X X X 123 BLASTN X X X X X 124 BLASTX X X X X X 125 TBLASTN X X X X X 126 TBLASTX X X X X X/X 127 128 Note: for BLASTX, the query sequence is shown as a protein sequence, 129 but the numbering is based on the nucleotides. Thus, the numbering 130 is 3x larger than the number of amino acid residues. A similar effect 131 can be seen for the sbjct sequence in TBLASTN, and for both sequences 132 in TBLASTX. 133 134 Also, for negative frames, the sequence numbering starts from 135 query_start and counts down. 136 137 """
138 - def __init__(self):
139 self.score = None 140 self.bits = None 141 self.expect = None 142 self.num_alignments = None 143 self.identities = (None, None) 144 self.positives = (None, None) 145 self.gaps = (None, None) 146 self.align_length = None 147 self.strand = (None, None) 148 self.frame = () 149 150 self.query = '' 151 self.query_start = None 152 self.query_end = None 153 self.match = '' 154 self.sbjct = '' 155 self.sbjct_start = None 156 self.sbjct_end = None
157
158 - def __str__(self):
159 lines = ["Score %i (%i bits), expectation %0.1e, alignment length %i" \ 160 % (self.score, self.bits, self.expect, self.align_length)] 161 if self.align_length < 50 : 162 lines.append("Query:%s %s %s" % (str(self.query_start).rjust(8), 163 str(self.query), 164 str(self.query_end))) 165 lines.append(" %s" \ 166 % (str(self.match))) 167 lines.append("Sbjct:%s %s %s" % (str(self.sbjct_start).rjust(8), 168 str(self.sbjct), 169 str(self.sbjct_end))) 170 else : 171 lines.append("Query:%s %s...%s %s" \ 172 % (str(self.query_start).rjust(8), 173 str(self.query)[:45], 174 str(self.query)[-3:], 175 str(self.query_end))) 176 lines.append(" %s...%s" \ 177 % (str(self.match)[:45], 178 str(self.match)[-3:])) 179 lines.append("Sbjct:%s %s...%s %s" \ 180 % (str(self.sbjct_start).rjust(8), 181 str(self.sbjct)[:45], 182 str(self.sbjct)[-3:], 183 str(self.sbjct_end))) 184 return "\n".join(lines)
185
186 -class MultipleAlignment:
187 """Holds information about a multiple alignment. 188 189 Members: 190 alignment A list of tuples (name, start residue, sequence, end residue). 191 192 The start residue is 1-based. It may be blank, if that sequence is 193 not aligned in the multiple alignment. 194 195 """
196 - def __init__(self):
197 self.alignment = []
198
199 - def to_generic(self, alphabet):
200 """Retrieve generic alignment object for the given alignment. 201 202 Instead of the tuples, this returns an Alignment object from 203 Bio.Align.Generic, through which you can manipulate and query 204 the object. 205 206 alphabet is the specified alphabet for the sequences in the code (for 207 example IUPAC.IUPACProtein. 208 209 Thanks to James Casbon for the code. 210 """ 211 seq_parts = [] 212 seq_names = [] 213 parse_number = 0 214 n = 0 215 for name, start, seq, end in self.alignment: 216 if name == 'QUERY': #QUERY is the first in each alignment block 217 parse_number = parse_number + 1 218 n = 0 219 220 if parse_number == 1: # create on first_parse, append on all others 221 seq_parts.append(seq) 222 seq_names.append(name) 223 else: 224 seq_parts[n] = seq_parts[n] + seq 225 n = n + 1 226 227 generic = Generic.Alignment(alphabet) 228 for (name,seq) in zip(seq_names,seq_parts): 229 generic.add_sequence(name, seq) 230 231 return generic
232
233 -class Round:
234 """Holds information from a PSI-BLAST round. 235 236 Members: 237 number Round number. (int) 238 reused_seqs Sequences in model, found again. List of Description objects. 239 new_seqs Sequences not found, or below threshold. List of Description. 240 alignments A list of Alignment objects. 241 multiple_alignment A MultipleAlignment object. 242 243 """
244 - def __init__(self):
245 self.number = None 246 self.reused_seqs = [] 247 self.new_seqs = [] 248 self.alignments = [] 249 self.multiple_alignment = None
250
251 -class DatabaseReport:
252 """Holds information about a database report. 253 254 Members: 255 database_name List of database names. (can have multiple dbs) 256 num_letters_in_database Number of letters in the database. (int) 257 num_sequences_in_database List of number of sequences in the database. 258 posted_date List of the dates the databases were posted. 259 ka_params A tuple of (lambda, k, h) values. (floats) 260 gapped # XXX this isn't set right! 261 ka_params_gap A tuple of (lambda, k, h) values. (floats) 262 263 """
264 - def __init__(self):
265 self.database_name = [] 266 self.posted_date = [] 267 self.num_letters_in_database = [] 268 self.num_sequences_in_database = [] 269 self.ka_params = (None, None, None) 270 self.gapped = 0 271 self.ka_params_gap = (None, None, None)
272
273 -class Parameters:
274 """Holds information about the parameters. 275 276 Members: 277 matrix Name of the matrix. 278 gap_penalties Tuple of (open, extend) penalties. (floats) 279 sc_match Match score for nucleotide-nucleotide comparison 280 sc_mismatch Mismatch penalty for nucleotide-nucleotide comparison 281 num_hits Number of hits to the database. (int) 282 num_sequences Number of sequences. (int) 283 num_good_extends Number of extensions. (int) 284 num_seqs_better_e Number of sequences better than e-value. (int) 285 hsps_no_gap Number of HSP's better, without gapping. (int) 286 hsps_prelim_gapped Number of HSP's gapped in prelim test. (int) 287 hsps_prelim_gapped_attemped Number of HSP's attempted in prelim. (int) 288 hsps_gapped Total number of HSP's gapped. (int) 289 query_length Length of the query. (int) 290 query_id Identifier of the query sequence. (str) 291 database_length Number of letters in the database. (int) 292 effective_hsp_length Effective HSP length. (int) 293 effective_query_length Effective length of query. (int) 294 effective_database_length Effective length of database. (int) 295 effective_search_space Effective search space. (int) 296 effective_search_space_used Effective search space used. (int) 297 frameshift Frameshift window. Tuple of (int, float) 298 threshold Threshold. (int) 299 window_size Window size. (int) 300 dropoff_1st_pass Tuple of (score, bits). (int, float) 301 gap_x_dropoff Tuple of (score, bits). (int, float) 302 gap_x_dropoff_final Tuple of (score, bits). (int, float) 303 gap_trigger Tuple of (score, bits). (int, float) 304 blast_cutoff Tuple of (score, bits). (int, float) 305 """
306 - def __init__(self):
307 self.matrix = '' 308 self.gap_penalties = (None, None) 309 self.sc_match = None 310 self.sc_mismatch = None 311 self.num_hits = None 312 self.num_sequences = None 313 self.num_good_extends = None 314 self.num_seqs_better_e = None 315 self.hsps_no_gap = None 316 self.hsps_prelim_gapped = None 317 self.hsps_prelim_gapped_attemped = None 318 self.hsps_gapped = None 319 self.query_id = None 320 self.query_length = None 321 self.database_length = None 322 self.effective_hsp_length = None 323 self.effective_query_length = None 324 self.effective_database_length = None 325 self.effective_search_space = None 326 self.effective_search_space_used = None 327 self.frameshift = (None, None) 328 self.threshold = None 329 self.window_size = None 330 self.dropoff_1st_pass = (None, None) 331 self.gap_x_dropoff = (None, None) 332 self.gap_x_dropoff_final = (None, None) 333 self.gap_trigger = (None, None) 334 self.blast_cutoff = (None, None)
335
336 -class Blast(Header, DatabaseReport, Parameters):
337 """Saves the results from a blast search. 338 339 Members: 340 descriptions A list of Description objects. 341 alignments A list of Alignment objects. 342 multiple_alignment A MultipleAlignment object. 343 + members inherited from base classes 344 345 """
346 - def __init__(self):
347 Header.__init__(self) 348 DatabaseReport.__init__(self) 349 Parameters.__init__(self) 350 self.descriptions = [] 351 self.alignments = [] 352 self.multiple_alignment = None
353
354 -class PSIBlast(Header, DatabaseReport, Parameters):
355 """Saves the results from a blastpgp search. 356 357 Members: 358 rounds A list of Round objects. 359 converged Whether the search converged. 360 + members inherited from base classes 361 362 """
363 - def __init__(self):
364 Header.__init__(self) 365 DatabaseReport.__init__(self) 366 Parameters.__init__(self) 367 self.rounds = [] 368 self.converged = 0
369