1
2
3
4
5
6
7
8
9 """Connect with a BioSQL database and load Biopython like objects from it.
10
11 This provides interfaces for loading biological objects from a relational
12 database, and is compatible with the BioSQL standards.
13 """
14 import BioSeq
15 import Loader
16 import DBUtils
17
19 """Main interface for loading a existing BioSQL-style database.
20
21 This function is the easiest way to retrieve a connection to a
22 database, doing something like:
23
24 >>> from BioSeq import BioSeqDatabase
25 >>> server = BioSeqDatabase.open_database(user = "root", db="minidb")
26
27 the various options are:
28 driver -> The name of the database driver to use for connecting. The
29 driver should implement the python DB API. By default, the MySQLdb
30 driver is used.
31 user -> the username to connect to the database with.
32 password, passwd -> the password to connect with
33 host -> the hostname of the database
34 database or db -> the name of the database
35 """
36 module = __import__(driver)
37 connect = getattr(module, "connect")
38
39
40 kw = kwargs.copy()
41 if driver == "MySQLdb":
42 if "database" in kw:
43 kw["db"] = kw["database"]
44 del kw["database"]
45 if "password" in kw:
46 kw["passwd"] = kw["password"]
47 del kw["password"]
48 else:
49
50 if "db" in kw:
51 kw["database"] = kw["db"]
52 del kw["db"]
53 if "passwd" in kw:
54 kw["password"] = kw["passwd"]
55 del kw["passwd"]
56 if driver in ["psycopg", "psycopg2"] and not kw.get("database"):
57 kw["database"] = "template1"
58 try:
59 conn = connect(**kw)
60 except module.InterfaceError:
61
62
63 if "database" in kw:
64 kw["dbname"] = kw["database"]
65 del kw["database"]
66 elif "db" in kw:
67 kw["dbname"] = kw["db"]
68 del kw["db"]
69
70 dsn = ' '.join(['='.join(i) for i in kw.items()])
71 conn = connect(dsn)
72
73 return DBServer(conn, module)
74
76 - def __init__(self, conn, module, module_name=None):
77 self.module = module
78 if module_name is None:
79 module_name = module.__name__
80 self.adaptor = Adaptor(conn, DBUtils.get_dbutils(module_name))
81 self.module_name = module_name
82
84 return self.__class__.__name__ + "(%r)" % self.adaptor.conn
93
100
101 - def new_database(self, db_name, authority=None, description=None):
102 """Add a new database to the server and return it.
103 """
104
105 sql = r"INSERT INTO biodatabase (name, authority, description)" \
106 r" VALUES (%s, %s, %s)"
107 self.adaptor.execute(sql, (db_name,authority, description))
108 return BioSeqDatabase(self.adaptor, db_name)
109
111 """Load a database schema into the given database.
112
113 This is used to create tables, etc when a database is first created.
114 sql_file should specify the complete path to a file containing
115 SQL entries for building the tables.
116 """
117
118
119
120
121
122 sql_handle = open(sql_file, "rb")
123 sql = r""
124 for line in sql_handle.xreadlines():
125 if line.find("--") == 0:
126 pass
127 elif line.find("#") == 0:
128 pass
129 elif line.strip():
130 sql += line.strip()
131 sql += ' '
132
133
134
135
136
137 if self.module_name in ["psycopg", "psycopg2"]:
138 self.adaptor.cursor.execute(sql)
139
140
141 elif self.module_name in ["MySQLdb"]:
142 sql_parts = sql.split(";")
143 for sql_line in sql_parts[:-1]:
144 self.adaptor.cursor.execute(sql_line)
145 else:
146 raise ValueError("Module %s not supported by the loader." %
147 (self.module_name))
148
150 """Commits the current transaction to the database."""
151 return self.adaptor.commit()
152
154 """Rolls backs the current transaction."""
155 return self.adaptor.rollback()
156
158 """Close the connection. No further activity possible."""
159 return self.adaptor.close()
160
163 self.conn = conn
164 self.cursor = conn.cursor()
165 self.dbutils = dbutils
166
169
171 """Set the autocommit mode. True values enable; False value disable."""
172 return self.dbutils.autocommit(self.conn, y)
173
175 """Commits the current transaction."""
176 return self.conn.commit()
177
179 """Rolls backs the current transaction."""
180 return self.conn.rollback()
181
183 """Close the connection. No further activity possible."""
184 return self.conn.close()
185
187 self.cursor.execute(
188 r"select biodatabase_id from biodatabase where name = %s",
189 (dbname,))
190 rv = self.cursor.fetchall()
191 if not rv:
192 raise KeyError("Cannot find biodatabase with name %r" % dbname)
193
194
195 return rv[0][0]
196
198 sql = r"select bioentry_id from bioentry where name = %s"
199 fields = [name]
200 if dbid:
201 sql += " and biodatabase_id = %s"
202 fields.append(dbid)
203 self.cursor.execute(sql, fields)
204 rv = self.cursor.fetchall()
205 if not rv:
206 raise IndexError("Cannot find display id %r" % name)
207 if len(rv) > 1:
208 raise IndexError("More than one entry with display id %r" % name)
209 return rv[0][0]
210
212 sql = r"select bioentry_id from bioentry where accession = %s"
213 fields = [name]
214 if dbid:
215 sql += " and biodatabase_id = %s"
216 fields.append(dbid)
217 self.cursor.execute(sql, fields)
218 rv = self.cursor.fetchall()
219 if not rv:
220 raise IndexError("Cannot find accession %r" % name)
221 if len(rv) > 1:
222 raise IndexError("More than one entry with accession %r" % name)
223 return rv[0][0]
224
226 sql = r"select bioentry_id from bioentry where accession = %s"
227 fields = [name]
228 if dbid:
229 sql += " and biodatabase_id = %s"
230 fields.append(dbid)
231 return self.execute_and_fetch_col0(sql, fields)
232
234 acc_version = name.split(".")
235 if len(acc_version) > 2:
236 raise IndexError("Bad version %r" % name)
237 acc = acc_version[0]
238 if len(acc_version) == 2:
239 version = acc_version[1]
240 else:
241 version = "0"
242 sql = r"SELECT bioentry_id FROM bioentry WHERE accession = %s" \
243 r" AND version = %s"
244 fields = [acc, version]
245 if dbid:
246 sql += " and biodatabase_id = %s"
247 fields.append(dbid)
248 self.cursor.execute(sql, fields)
249 rv = self.cursor.fetchall()
250 if not rv:
251 raise IndexError("Cannot find version %r" % name)
252 if len(rv) > 1:
253 raise IndexError("More than one entry with version %r" % name)
254 return rv[0][0]
255
257
258 sql = "SELECT bioentry_id FROM bioentry WHERE identifier = %s"
259 fields = [identifier]
260 if dbid:
261 sql += " and biodatabase_id = %s"
262 fields.append(dbid)
263 self.cursor.execute(sql, fields)
264 rv = self.cursor.fetchall()
265 if not rv:
266 raise IndexError("Cannot find display id %r" % identifier)
267 return rv[0][0]
268
272
273 - def list_bioentry_ids(self, dbid):
274 return self.execute_and_fetch_col0(
275 "SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %s",
276 (dbid,))
277
279 return self.execute_and_fetch_col0(
280 "SELECT name FROM bioentry WHERE biodatabase_id = %s",
281 (dbid,))
282
284 """Return ids given a SQL statement to select for them.
285
286 This assumes that the given SQL does a SELECT statement that
287 returns a list of items. This parses them out of the 2D list
288 they come as and just returns them in a list.
289 """
290 return self.cursor.execute_and_fetch_col0(sql, args)
291
293 self.cursor.execute(sql, args or ())
294 rv = self.cursor.fetchall()
295 assert len(rv) == 1, "Expected 1 response, got %d" % len(rv)
296 return rv[0]
297
298 - def execute(self, sql, args=None):
299 """Just execute an sql command.
300 """
301 self.cursor.execute(sql, args or ())
302
309
311 self.cursor.execute(sql, args or ())
312 return [field[0] for field in self.cursor.fetchall()]
313
317
318 _allowed_lookups = {
319
320 'primary_id': "fetch_seqid_by_identifier",
321 'gi': "fetch_seqid_by_identifier",
322 'display_id': "fetch_seqid_by_display_id",
323 'name': "fetch_seqid_by_display_id",
324 'accession': "fetch_seqid_by_accession",
325 'version': "fetch_seqid_by_version",
326 }
327
334 return "BioSeqDatabase(%r, %r)" % (self.adaptor, self.name)
335
344
353
362
364 """Gets a *list* of Bio::Seq objects by accession number
365
366 Example: seqs = db.get_Seq_by_acc('X77802')
367
368 """
369 seqids = self.adaptor.fetch_seqids_by_accession(self.dbid, name)
370 return [BioSeq.DBSeqRecord(self.adaptor, seqid) for seqid in seqids]
371
373
374
375
376
377 raise NotImplementedError("waiting for Python 2.2's iter")
378
380 """Array of all the primary_ids of the sequences in the database.
381
382 These maybe ids (display style) or accession numbers or
383 something else completely different - they *are not*
384 meaningful outside of this database implementation.
385 """
386 return self.adaptor.list_bioentry_ids(self.dbid)
387
396
398 if len(kwargs) != 1:
399 raise TypeError("single key/value parameter expected")
400 k, v = kwargs.items()[0]
401 if k not in _allowed_lookups:
402 raise TypeError("lookup() expects one of %s, not %r" % \
403 (repr(_allowed_lookups.keys())[1:-1], repr(k)))
404 lookup_name = _allowed_lookups[k]
405 lookup_func = getattr(self.adaptor, lookup_name)
406 seqid = lookup_func(self.dbid, v)
407 return BioSeq.DBSeqRecord(self.adaptor, seqid)
408
410 """Gets a Bio::Seq object by the primary (internal) id.
411
412 The primary id in these cases has to come from
413 $db->get_all_primary_ids. There is no other way to get (or
414 guess) the primary_ids in a database.
415 """
416 return self[seqid]
417
418 - def load(self, record_iterator, fetch_NCBI_taxonomy=False):
419 """Load a set of SeqRecords into the BioSQL database.
420
421 record_iterator is either a list of SeqRecord objects, or an
422 Iterator object that returns SeqRecord objects (such as the
423 output from the Bio.SeqIO.parse() function), which will be
424 used to populate the database.
425
426 fetch_NCBI_taxonomy is boolean flag allowing or preventing
427 connection to the taxonomic database on the NCBI server
428 (via Bio.Entrez) to fetch a detailed taxonomy for each
429 SeqRecord.
430
431 Example:
432 from Bio import SeqIO
433 count = db.load(SeqIO.parse(open(filename), format))
434
435 Returns the number of records loaded.
436 """
437 db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, \
438 fetch_NCBI_taxonomy)
439 num_records = 0
440 for cur_record in record_iterator :
441 num_records += 1
442 db_loader.load_seqrecord(cur_record)
443 return num_records
444