Back to index

python-biopython  1.60
BioSeqDatabase.py
Go to the documentation of this file.
00001 # Copyright 2002 by Andrew Dalke.  All rights reserved.
00002 # Revisions 2007-2009 copyright by Peter Cock.  All rights reserved.
00003 # Revisions 2009 copyright by Cymon J. Cox.  All rights reserved.
00004 # This code is part of the Biopython distribution and governed by its
00005 # license.  Please see the LICENSE file that should have been included
00006 # as part of this package.
00007 #
00008 # Note that BioSQL (including the database schema and scripts) is
00009 # available and licensed separately.  Please consult www.biosql.org
00010 """Connect with a BioSQL database and load Biopython like objects from it.
00011 
00012 This provides interfaces for loading biological objects from a relational
00013 database, and is compatible with the BioSQL standards.
00014 """
00015 import BioSeq
00016 import Loader
00017 import DBUtils
00018 
00019 _POSTGRES_RULES_PRESENT = False # Hack for BioSQL Bug 2839
00020 
00021 def open_database(driver = "MySQLdb", **kwargs):
00022     """Main interface for loading a existing BioSQL-style database.
00023 
00024     This function is the easiest way to retrieve a connection to a
00025     database, doing something like:
00026         
00027         >>> from BioSeq import BioSeqDatabase
00028         >>> server = BioSeqDatabase.open_database(user="root", db="minidb")
00029 
00030     the various options are:
00031     driver -> The name of the database driver to use for connecting. The
00032     driver should implement the python DB API. By default, the MySQLdb
00033     driver is used.
00034     user -> the username to connect to the database with.
00035     password, passwd -> the password to connect with
00036     host -> the hostname of the database
00037     database or db -> the name of the database
00038     """
00039     if driver == "psycopg":
00040         raise ValueError("Using BioSQL with psycopg (version one) is no "
00041                          "longer supported. Use psycopg2 instead.")
00042 
00043     module = __import__(driver)
00044     connect = getattr(module, "connect")
00045 
00046     # Different drivers use different keywords...
00047     kw = kwargs.copy()
00048     if driver == "MySQLdb":
00049         if "database" in kw:
00050             kw["db"] = kw["database"]
00051             del kw["database"]
00052         if "password" in kw:
00053             kw["passwd"] = kw["password"]
00054             del kw["password"]
00055     else:
00056         # DB-API recommendations
00057         if "db" in kw:
00058             kw["database"] = kw["db"]
00059             del kw["db"]
00060         if "passwd" in kw:
00061             kw["password"] = kw["passwd"]
00062             del kw["passwd"]
00063     if driver in ["psycopg2", "pgdb"] and not kw.get("database"):
00064         kw["database"] = "template1"
00065     # SQLite connect takes the database name as input
00066     if driver in ["sqlite3"]:
00067         conn = connect(kw["database"])
00068     else:
00069         try:
00070             conn = connect(**kw)
00071         except module.InterfaceError:
00072             # Ok, so let's try building a DSN
00073             # (older releases of psycopg need this)
00074             if "database" in kw:
00075                 kw["dbname"] = kw["database"]
00076                 del kw["database"]
00077             elif "db" in kw:
00078                 kw["dbname"] = kw["db"]
00079                 del kw["db"]
00080             dsn = ' '.join(['='.join(i) for i in kw.items()])
00081             conn = connect(dsn)
00082 
00083     server = DBServer(conn, module)
00084 
00085     # TODO - Remove the following once BioSQL Bug 2839 is fixed.
00086     # Test for RULES in PostgreSQL schema, see also Bug 2833.
00087     if driver in ["psycopg2", "pgdb"]:
00088         sql = "SELECT ev_class FROM pg_rewrite WHERE " + \
00089               "rulename='rule_bioentry_i1' OR " + \
00090               "rulename='rule_bioentry_i2';"
00091         if server.adaptor.execute_and_fetchall(sql):
00092             import warnings
00093             warnings.warn("Your BioSQL PostgreSQL schema includes some "
00094                           "rules currently required for bioperl-db but "
00095                           "which may cause problems loading data using "
00096                           "Biopython (see BioSQL Bug 2839). If you do not "
00097                           "use BioPerl, please remove these rules. "
00098                           "Biopython should cope with the rules present, "
00099                           "but with a performance penalty when loading "
00100                           "new records.")
00101             global _POSTGRES_RULES_PRESENT
00102             _POSTGRES_RULES_PRESENT = True
00103 
00104     return server
00105 
00106 class DBServer:
00107     """Represents a BioSQL database continaing namespaces (sub-databases).
00108     
00109     This acts like a Python dictionary, giving access to each namespace
00110     (defined by a row in the biodatabase table) as a BioSeqDatabase object.
00111     """
00112     def __init__(self, conn, module, module_name=None):
00113         self.module = module
00114         if module_name is None:
00115             module_name = module.__name__
00116         self.adaptor = Adaptor(conn, DBUtils.get_dbutils(module_name))
00117         self.module_name = module_name
00118         
00119     def __repr__(self):
00120         return self.__class__.__name__ + "(%r)" % self.adaptor.conn
00121 
00122     def __getitem__(self, name):
00123         return BioSeqDatabase(self.adaptor, name)
00124 
00125     def __len__(self):
00126         """Number of namespaces (sub-databases) in this database."""
00127         sql = "SELECT COUNT(name) FROM biodatabase;"
00128         return int(self.adaptor.execute_and_fetch_col0(sql)[0])
00129 
00130     def __contains__(self, value):
00131         """Check if a namespace (sub-database) in this database."""
00132         sql = "SELECT COUNT(name) FROM biodatabase WHERE name=%s;"
00133         return bool(self.adaptor.execute_and_fetch_col0(sql, (value,))[0])
00134     
00135     def __iter__(self):
00136         """Iterate over namespaces (sub-databases) in the database."""
00137         #TODO - Iterate over the cursor, much more efficient
00138         return iter(self.adaptor.list_biodatabase_names())        
00139 
00140     if hasattr(dict, "iteritems"):
00141         #Python 2, use iteritems etc    
00142         def keys(self):
00143             """List of namespaces (sub-databases) in the database."""
00144             return self.adaptor.list_biodatabase_names()
00145 
00146         def values(self):
00147             """List of BioSeqDatabase objects in the database."""
00148             return [self[key] for key in self.keys()]
00149     
00150         def items(self):
00151             """List of (namespace, BioSeqDatabase) for entries in the database."""
00152             return [(key, self[key]) for key in self.keys()]
00153         
00154         def iterkeys(self):
00155             """Iterate over namespaces (sub-databases) in the database."""
00156             return iter(self)
00157     
00158         def itervalues(self):
00159             """Iterate over BioSeqDatabase objects in the database."""
00160             for key in self:
00161                 yield self[key]
00162             
00163         def iteritems(self):
00164             """Iterate over (namespace, BioSeqDatabase) in the database."""
00165             for key in self:
00166                 yield key, self[key]
00167     else:
00168         #Python 3, items etc are all iterators
00169         def keys(self):
00170             """Iterate over namespaces (sub-databases) in the database."""
00171             return iter(self)
00172             
00173         def values(self):
00174             """Iterate over BioSeqDatabase objects in the database."""
00175             for key in self:
00176                 yield self[key]
00177     
00178         def items(self):
00179             """Iterate over (namespace, BioSeqDatabase) in the database."""
00180             for key in self:
00181                 yield key, self[key]
00182 
00183     def __delitem__(self, name):
00184         """Remove a namespace and all its entries."""
00185         if name not in self:
00186             raise KeyError(name)
00187         self.remove_database(name)
00188 
00189     def remove_database(self, db_name):
00190         """Remove a namespace and all its entries (OBSOLETE).
00191         
00192         Try to remove all references to items in a database.
00193         
00194         server.remove_database(name)
00195         
00196         In keeping with the dictionary interface, you can now do this:
00197         
00198         del server[name]
00199         """
00200         import warnings
00201         warnings.warn("This method is obsolete.  In keeping with the dictionary interface, you can now use 'del server[name]' instead", PendingDeprecationWarning)
00202         db_id = self.adaptor.fetch_dbid_by_dbname(db_name)
00203         remover = Loader.DatabaseRemover(self.adaptor, db_id)
00204         remover.remove()
00205 
00206     def new_database(self, db_name, authority=None, description=None):
00207         """Add a new database to the server and return it.
00208         """
00209         # make the database
00210         sql = r"INSERT INTO biodatabase (name, authority, description)" \
00211               r" VALUES (%s, %s, %s)" 
00212         self.adaptor.execute(sql, (db_name,authority, description))
00213         return BioSeqDatabase(self.adaptor, db_name)
00214 
00215     def load_database_sql(self, sql_file):
00216         """Load a database schema into the given database.
00217 
00218         This is used to create tables, etc when a database is first created.
00219         sql_file should specify the complete path to a file containing
00220         SQL entries for building the tables.
00221         """
00222         # Not sophisticated enough for PG schema. Is it needed by MySQL?
00223         # Looks like we need this more complicated way for both. Leaving it
00224         # the default and removing the simple-minded approach.
00225 
00226         # read the file with all comment lines removed
00227         sql_handle = open(sql_file, "rU")
00228         sql = r""
00229         for line in sql_handle:
00230             if line.find("--") == 0: # don't include comment lines
00231                 pass
00232             elif line.find("#") == 0: # ditto for MySQL comments
00233                 pass
00234             elif line.strip(): # only include non-blank lines
00235                 sql += line.strip()
00236                 sql += ' '
00237         
00238         # two ways to load the SQL
00239         # 1. PostgreSQL can load it all at once and actually needs to
00240         # due to FUNCTION defines at the end of the SQL which mess up
00241         # the splitting by semicolons
00242         if self.module_name in ["psycopg2", "pgdb"]:
00243             self.adaptor.cursor.execute(sql)
00244         # 2. MySQL needs the database loading split up into single lines of
00245         # SQL executed one at a time
00246         elif self.module_name in ["MySQLdb", "sqlite3"]:
00247             sql_parts = sql.split(";") # one line per sql command
00248             for sql_line in sql_parts[:-1]: # don't use the last item, it's blank
00249                 self.adaptor.cursor.execute(sql_line)
00250         else:
00251             raise ValueError("Module %s not supported by the loader." %
00252                     (self.module_name))
00253 
00254     def commit(self):
00255         """Commits the current transaction to the database."""
00256         return self.adaptor.commit()
00257 
00258     def rollback(self):
00259         """Rolls backs the current transaction."""
00260         return self.adaptor.rollback()
00261 
00262     def close(self):
00263         """Close the connection. No further activity possible."""
00264         return self.adaptor.close()
00265 
00266 class Adaptor:
00267     def __init__(self, conn, dbutils):
00268         self.conn = conn
00269         self.cursor = conn.cursor()
00270         self.dbutils = dbutils
00271 
00272     def last_id(self, table):
00273         return self.dbutils.last_id(self.cursor, table)
00274 
00275     def autocommit(self, y=True):
00276         """Set the autocommit mode. True values enable; False value disable."""
00277         return self.dbutils.autocommit(self.conn, y)
00278 
00279     def commit(self):
00280         """Commits the current transaction."""
00281         return self.conn.commit()
00282 
00283     def rollback(self):
00284         """Rolls backs the current transaction."""
00285         return self.conn.rollback()
00286 
00287     def close(self):
00288         """Close the connection. No further activity possible."""
00289         return self.conn.close()
00290 
00291     def fetch_dbid_by_dbname(self, dbname):
00292         self.execute(
00293             r"select biodatabase_id from biodatabase where name = %s",
00294             (dbname,))
00295         rv = self.cursor.fetchall()
00296         if not rv:
00297             raise KeyError("Cannot find biodatabase with name %r" % dbname)
00298         # Cannot happen (UK)
00299 ##        assert len(rv) == 1, "More than one biodatabase with name %r" % dbname
00300         return rv[0][0]
00301 
00302     def fetch_seqid_by_display_id(self, dbid, name):
00303         sql = r"select bioentry_id from bioentry where name = %s"
00304         fields = [name]
00305         if dbid:
00306             sql += " and biodatabase_id = %s"
00307             fields.append(dbid)
00308         self.execute(sql, fields)
00309         rv = self.cursor.fetchall()
00310         if not rv:
00311             raise IndexError("Cannot find display id %r" % name)
00312         if len(rv) > 1:
00313             raise IndexError("More than one entry with display id %r" % name)
00314         return rv[0][0]
00315 
00316     def fetch_seqid_by_accession(self, dbid, name):
00317         sql = r"select bioentry_id from bioentry where accession = %s"
00318         fields = [name]
00319         if dbid:
00320             sql += " and biodatabase_id = %s"
00321             fields.append(dbid)
00322         self.execute(sql, fields)
00323         rv = self.cursor.fetchall()
00324         if not rv:
00325             raise IndexError("Cannot find accession %r" % name)
00326         if len(rv) > 1:
00327             raise IndexError("More than one entry with accession %r" % name)
00328         return rv[0][0]
00329 
00330     def fetch_seqids_by_accession(self, dbid, name):
00331         sql = r"select bioentry_id from bioentry where accession = %s"
00332         fields = [name]
00333         if dbid:
00334             sql += " and biodatabase_id = %s"
00335             fields.append(dbid)
00336         return self.execute_and_fetch_col0(sql, fields)
00337 
00338     def fetch_seqid_by_version(self, dbid, name):
00339         acc_version = name.split(".")
00340         if len(acc_version) > 2:
00341             raise IndexError("Bad version %r" % name)
00342         acc = acc_version[0]
00343         if len(acc_version) == 2:
00344             version = acc_version[1]
00345         else:
00346             version = "0"
00347         sql = r"SELECT bioentry_id FROM bioentry WHERE accession = %s" \
00348               r" AND version = %s"
00349         fields = [acc, version]
00350         if dbid:
00351             sql += " and biodatabase_id = %s"
00352             fields.append(dbid)
00353         self.execute(sql, fields)
00354         rv = self.cursor.fetchall()
00355         if not rv:
00356             raise IndexError("Cannot find version %r" % name)
00357         if len(rv) > 1:
00358             raise IndexError("More than one entry with version %r" % name)
00359         return rv[0][0]
00360 
00361     def fetch_seqid_by_identifier(self, dbid, identifier):
00362         # YB: was fetch_seqid_by_seqid
00363         sql = "SELECT bioentry_id FROM bioentry WHERE identifier = %s"
00364         fields = [identifier]
00365         if dbid:
00366             sql += " and biodatabase_id = %s"
00367             fields.append(dbid)
00368         self.execute(sql, fields)
00369         rv = self.cursor.fetchall()
00370         if not rv:
00371             raise IndexError("Cannot find display id %r" % identifier)
00372         return rv[0][0]
00373 
00374     def list_biodatabase_names(self):
00375         return self.execute_and_fetch_col0(
00376             "SELECT name FROM biodatabase")
00377 
00378     def list_bioentry_ids(self, dbid):
00379         return self.execute_and_fetch_col0(
00380             "SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %s",
00381             (dbid,))
00382 
00383     def list_bioentry_display_ids(self, dbid):
00384         return self.execute_and_fetch_col0(
00385             "SELECT name FROM bioentry WHERE biodatabase_id = %s",
00386             (dbid,))
00387 
00388     def list_any_ids(self, sql, args):
00389         """Return ids given a SQL statement to select for them.
00390         
00391         This assumes that the given SQL does a SELECT statement that
00392         returns a list of items. This parses them out of the 2D list
00393         they come as and just returns them in a list.
00394         """
00395         return self.execute_and_fetch_col0(sql, args)
00396 
00397     def execute_one(self, sql, args=None):
00398         self.execute(sql, args or ())
00399         rv = self.cursor.fetchall()
00400         assert len(rv) == 1, "Expected 1 response, got %d" % len(rv)
00401         return rv[0]
00402 
00403     def execute(self, sql, args=None):
00404         """Just execute an sql command.
00405         """
00406         self.dbutils.execute(self.cursor, sql, args)
00407 
00408     def get_subseq_as_string(self, seqid, start, end):
00409         length = end - start
00410         # XXX Check this on MySQL and PostgreSQL. substr should be general,
00411         # does it need dbutils?
00412         #return self.execute_one(
00413         #    """select SUBSTRING(seq FROM %s FOR %s)
00414         #             from biosequence where bioentry_id = %s""",
00415         #    (start+1, length, seqid))[0]
00416         # 
00417         # Convert to a string on returning for databases that give back
00418         # unicode. Shouldn't need unicode for sequences so this seems safe.
00419         return str(self.execute_one(
00420             """select SUBSTR(seq, %s, %s)
00421                      from biosequence where bioentry_id = %s""",
00422             (start+1, length, seqid))[0])
00423 
00424     def execute_and_fetch_col0(self, sql, args=None):
00425         self.execute(sql, args or ())
00426         return [field[0] for field in self.cursor.fetchall()]
00427 
00428     def execute_and_fetchall(self, sql, args=None):
00429         self.execute(sql, args or ())
00430         return self.cursor.fetchall()
00431 
00432 _allowed_lookups = {
00433     # Lookup name / function name to get id, function to list all ids
00434     'primary_id': "fetch_seqid_by_identifier",
00435     'gi':         "fetch_seqid_by_identifier",
00436     'display_id': "fetch_seqid_by_display_id",
00437     'name':       "fetch_seqid_by_display_id",
00438     'accession':  "fetch_seqid_by_accession",
00439     'version':    "fetch_seqid_by_version",
00440     }
00441 
00442 class BioSeqDatabase:
00443     """Represents a namespace (sub-database) within the BioSQL database.
00444     
00445     i.e. One row in the biodatabase table, and all all rows in the bioentry
00446     table associated with it.
00447     """
00448     def __init__(self, adaptor, name):
00449         self.adaptor = adaptor
00450         self.name = name
00451         self.dbid = self.adaptor.fetch_dbid_by_dbname(name)
00452 
00453     def __repr__(self):
00454         return "BioSeqDatabase(%r, %r)" % (self.adaptor, self.name)
00455         
00456     def get_Seq_by_id(self, name):
00457         """Gets a DBSeqRecord object by its name
00458 
00459         Example: seq_rec = db.get_Seq_by_id('ROA1_HUMAN')
00460         
00461         The name of this method is misleading since it returns a DBSeqRecord
00462         rather than a DBSeq ojbect, and presumably was to mirror BioPerl.
00463         """
00464         seqid = self.adaptor.fetch_seqid_by_display_id(self.dbid, name)
00465         return BioSeq.DBSeqRecord(self.adaptor, seqid)
00466 
00467     def get_Seq_by_acc(self, name):
00468         """Gets a DBSeqRecord object by accession number
00469 
00470         Example: seq_rec = db.get_Seq_by_acc('X77802')
00471 
00472         The name of this method is misleading since it returns a DBSeqRecord
00473         rather than a DBSeq ojbect, and presumably was to mirror BioPerl.
00474         """
00475         seqid = self.adaptor.fetch_seqid_by_accession(self.dbid, name)
00476         return BioSeq.DBSeqRecord(self.adaptor, seqid)
00477 
00478     def get_Seq_by_ver(self, name):
00479         """Gets a DBSeqRecord object by version number
00480 
00481         Example: seq_rec = db.get_Seq_by_ver('X77802.1')
00482 
00483         The name of this method is misleading since it returns a DBSeqRecord
00484         rather than a DBSeq ojbect, and presumably was to mirror BioPerl.
00485         """
00486         seqid = self.adaptor.fetch_seqid_by_version(self.dbid, name)
00487         return BioSeq.DBSeqRecord(self.adaptor, seqid)
00488 
00489     def get_Seqs_by_acc(self, name):
00490         """Gets a list of DBSeqRecord objects by accession number
00491 
00492         Example: seq_recs = db.get_Seq_by_acc('X77802')
00493 
00494         The name of this method is misleading since it returns a list of
00495         DBSeqRecord objects rather than a list of DBSeq ojbects, and presumably
00496         was to mirror BioPerl.
00497         """
00498         seqids = self.adaptor.fetch_seqids_by_accession(self.dbid, name)
00499         return [BioSeq.DBSeqRecord(self.adaptor, seqid) for seqid in seqids]
00500 
00501     def get_all_primary_ids(self):
00502         """All the primary_ids of the sequences in the database (OBSOLETE).
00503 
00504         These maybe ids (display style) or accession numbers or
00505         something else completely different - they *are not*
00506         meaningful outside of this database implementation.
00507         
00508         Please use .keys() instead of .get_all_primary_ids()
00509         """
00510         import warnings
00511         warnings.warn("Use bio_seq_database.keys() instead of "
00512                       "bio_seq_database.get_all_primary_ids()",
00513                       PendingDeprecationWarning)
00514         return self.keys()
00515 
00516     def __getitem__(self, key):
00517         return BioSeq.DBSeqRecord(self.adaptor, key)
00518 
00519     def __delitem__(self, key):
00520         """Remove an entry and all its annotation."""
00521         if key not in self:
00522             raise KeyError(key)
00523         #Assuming this will automatically cascade to the other tables...
00524         sql = "DELETE FROM bioentry " + \
00525               "WHERE biodatabase_id=%s AND bioentry_id=%s;"
00526         self.adaptor.execute(sql, (self.dbid,key))
00527 
00528     def __len__(self):
00529         """Number of records in this namespace (sub database)."""
00530         sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \
00531               "WHERE biodatabase_id=%s;"
00532         return int(self.adaptor.execute_and_fetch_col0(sql, (self.dbid,))[0])
00533 
00534     def __contains__(self, value):
00535         """Check if a primary (internal) id is this namespace (sub database)."""
00536         sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \
00537               "WHERE biodatabase_id=%s AND bioentry_id=%s;"
00538         #The bioentry_id field is an integer in the schema.
00539         #PostgreSQL will throw an error if we use a non integer in the query.
00540         try:
00541             bioentry_id = int(value)
00542         except ValueError:
00543             return False
00544         return bool(self.adaptor.execute_and_fetch_col0(sql,
00545                                                   (self.dbid, bioentry_id))[0])
00546     
00547     def __iter__(self):
00548         """Iterate over ids (which may not be meaningful outside this database)."""
00549         #TODO - Iterate over the cursor, much more efficient
00550         return iter(self.adaptor.list_bioentry_ids(self.dbid))        
00551 
00552     if hasattr(dict, "iteritems"):
00553         #Python 2, use iteritems etc    
00554         def keys(self):
00555             """List of ids which may not be meaningful outside this database."""
00556             return self.adaptor.list_bioentry_ids(self.dbid)
00557 
00558         def values(self):
00559             """List of DBSeqRecord objects in the namespace (sub database)."""
00560             return [self[key] for key in self.keys()]
00561     
00562         def items(self):
00563             """List of (id, DBSeqRecord) for the namespace (sub database)."""
00564             return [(key, self[key]) for key in self.keys()]
00565         
00566         def iterkeys(self):
00567             """Iterate over ids (which may not be meaningful outside this database)."""
00568             return iter(self)
00569     
00570         def itervalues(self):
00571             """Iterate over DBSeqRecord objects in the namespace (sub database)."""
00572             for key in self:
00573                 yield self[key]
00574             
00575         def iteritems(self):
00576             """Iterate over (id, DBSeqRecord) for the namespace (sub database)."""
00577             for key in self:
00578                 yield key, self[key]
00579     else:
00580         #Python 3, items etc are all iterators
00581         def keys(self):
00582             """Iterate over ids (which may not be meaningful outside this database)."""
00583             return iter(self)
00584             
00585         def values(self):
00586             """Iterate over DBSeqRecord objects in the namespace (sub database)."""
00587             for key in self:
00588                 yield self[key]
00589     
00590         def items(self):
00591             """Iterate over (id, DBSeqRecord) for the namespace (sub database)."""
00592             for key in self:
00593                 yield key, self[key]
00594 
00595     def lookup(self, **kwargs):
00596         if len(kwargs) != 1:
00597             raise TypeError("single key/value parameter expected")
00598         k, v = kwargs.items()[0]
00599         if k not in _allowed_lookups:
00600             raise TypeError("lookup() expects one of %s, not %r" % \
00601                             (repr(_allowed_lookups.keys())[1:-1], repr(k)))
00602         lookup_name = _allowed_lookups[k]
00603         lookup_func = getattr(self.adaptor, lookup_name)
00604         seqid = lookup_func(self.dbid, v)
00605         return BioSeq.DBSeqRecord(self.adaptor, seqid)
00606         
00607     def get_Seq_by_primary_id(self, seqid):
00608         """Get a DBSeqRecord by the primary (internal) id (OBSOLETE).
00609         
00610         Rather than db.get_Seq_by_primary_id(my_id) use db[my_id]
00611         
00612         The name of this method is misleading since it returns a DBSeqRecord
00613         rather than a DBSeq ojbect, and presumably was to mirror BioPerl.
00614         """
00615         import warnings
00616         warnings.warn("Use bio_seq_database[my_id] instead of "
00617                       "bio_seq_database.get_Seq_by_primary_id(my_id)",
00618                       PendingDeprecationWarning)
00619         return self[seqid]
00620 
00621     def load(self, record_iterator, fetch_NCBI_taxonomy=False):
00622         """Load a set of SeqRecords into the BioSQL database.
00623 
00624         record_iterator is either a list of SeqRecord objects, or an
00625         Iterator object that returns SeqRecord objects (such as the
00626         output from the Bio.SeqIO.parse() function), which will be
00627         used to populate the database.
00628 
00629         fetch_NCBI_taxonomy is boolean flag allowing or preventing
00630         connection to the taxonomic database on the NCBI server
00631         (via Bio.Entrez) to fetch a detailed taxonomy for each
00632         SeqRecord.
00633 
00634         Example:
00635         from Bio import SeqIO
00636         count = db.load(SeqIO.parse(open(filename), format))
00637 
00638         Returns the number of records loaded.
00639         """
00640         db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, \
00641                                           fetch_NCBI_taxonomy)
00642         num_records = 0
00643         global _POSTGRES_RULES_PRESENT
00644         for cur_record in record_iterator:
00645             num_records += 1
00646             #Hack to work arround BioSQL Bug 2839 - If using PostgreSQL and
00647             #the RULES are present check for a duplicate record before loading
00648             if _POSTGRES_RULES_PRESENT:
00649                 #Recreate what the Loader's _load_bioentry_table will do:
00650                 if cur_record.id.count(".") == 1:
00651                     accession, version = cur_record.id.split('.')
00652                     try:
00653                         version = int(version)
00654                     except ValueError:
00655                         accession = cur_record.id
00656                         version = 0
00657                 else:
00658                     accession = cur_record.id
00659                     version = 0
00660                 gi = cur_record.annotations.get("gi", None)
00661                 sql = "SELECT bioentry_id FROM bioentry WHERE (identifier " + \
00662                       "= '%s' AND biodatabase_id = '%s') OR (accession = " + \
00663                       "'%s' AND version = '%s' AND biodatabase_id = '%s')"
00664                 self.adaptor.execute(sql % (gi, self.dbid, accession, version, self.dbid))
00665                 if self.adaptor.cursor.fetchone():
00666                     raise self.adaptor.conn.IntegrityError("Duplicate record " 
00667                                      "detected: record has not been inserted")
00668             #End of hack
00669             db_loader.load_seqrecord(cur_record)
00670         return num_records