Back to index

python-biopython  1.60
Classes | Functions | Variables
BioSQL.BioSeqDatabase Namespace Reference

Classes

class  DBServer
class  Adaptor
class  BioSeqDatabase

Functions

def open_database
def fetch_seqid_by_display_id
 assert len(rv) == 1, "More than one biodatabase with name %r" % dbname
def fetch_seqid_by_accession
def fetch_seqids_by_accession
def fetch_seqid_by_version
def fetch_seqid_by_identifier
def list_biodatabase_names
def list_bioentry_ids
def list_bioentry_display_ids
def list_any_ids
def execute_one
def execute
def get_subseq_as_string
def execute_and_fetch_col0
def execute_and_fetchall

Variables

 _POSTGRES_RULES_PRESENT = False
dictionary _allowed_lookups

Function Documentation

def BioSQL.BioSeqDatabase.execute (   self,
  sql,
  args = None 
)
Just execute an sql command.

Definition at line 403 of file BioSeqDatabase.py.

00403 
00404     def execute(self, sql, args=None):
00405         """Just execute an sql command.
00406         """
00407         self.dbutils.execute(self.cursor, sql, args)

def BioSQL.BioSeqDatabase.execute_and_fetch_col0 (   self,
  sql,
  args = None 
)

Definition at line 424 of file BioSeqDatabase.py.

00424 
00425     def execute_and_fetch_col0(self, sql, args=None):
00426         self.execute(sql, args or ())
00427         return [field[0] for field in self.cursor.fetchall()]

def BioSQL.BioSeqDatabase.execute_and_fetchall (   self,
  sql,
  args = None 
)

Definition at line 428 of file BioSeqDatabase.py.

00428 
00429     def execute_and_fetchall(self, sql, args=None):
00430         self.execute(sql, args or ())
00431         return self.cursor.fetchall()

def BioSQL.BioSeqDatabase.execute_one (   self,
  sql,
  args = None 
)

Definition at line 397 of file BioSeqDatabase.py.

00397 
00398     def execute_one(self, sql, args=None):
00399         self.execute(sql, args or ())
00400         rv = self.cursor.fetchall()
00401         assert len(rv) == 1, "Expected 1 response, got %d" % len(rv)
00402         return rv[0]

def BioSQL.BioSeqDatabase.fetch_seqid_by_accession (   self,
  dbid,
  name 
)

Definition at line 316 of file BioSeqDatabase.py.

00316 
00317     def fetch_seqid_by_accession(self, dbid, name):
00318         sql = r"select bioentry_id from bioentry where accession = %s"
00319         fields = [name]
00320         if dbid:
00321             sql += " and biodatabase_id = %s"
00322             fields.append(dbid)
00323         self.execute(sql, fields)
00324         rv = self.cursor.fetchall()
00325         if not rv:
00326             raise IndexError("Cannot find accession %r" % name)
00327         if len(rv) > 1:
00328             raise IndexError("More than one entry with accession %r" % name)
00329         return rv[0][0]

def BioSQL.BioSeqDatabase.fetch_seqid_by_display_id (   self,
  dbid,
  name 
)

assert len(rv) == 1, "More than one biodatabase with name %r" % dbname

Definition at line 302 of file BioSeqDatabase.py.

00302 
00303     def fetch_seqid_by_display_id(self, dbid, name):
00304         sql = r"select bioentry_id from bioentry where name = %s"
00305         fields = [name]
00306         if dbid:
00307             sql += " and biodatabase_id = %s"
00308             fields.append(dbid)
00309         self.execute(sql, fields)
00310         rv = self.cursor.fetchall()
00311         if not rv:
00312             raise IndexError("Cannot find display id %r" % name)
00313         if len(rv) > 1:
00314             raise IndexError("More than one entry with display id %r" % name)
00315         return rv[0][0]

def BioSQL.BioSeqDatabase.fetch_seqid_by_identifier (   self,
  dbid,
  identifier 
)

Definition at line 361 of file BioSeqDatabase.py.

00361 
00362     def fetch_seqid_by_identifier(self, dbid, identifier):
00363         # YB: was fetch_seqid_by_seqid
00364         sql = "SELECT bioentry_id FROM bioentry WHERE identifier = %s"
00365         fields = [identifier]
00366         if dbid:
00367             sql += " and biodatabase_id = %s"
00368             fields.append(dbid)
00369         self.execute(sql, fields)
00370         rv = self.cursor.fetchall()
00371         if not rv:
00372             raise IndexError("Cannot find display id %r" % identifier)
00373         return rv[0][0]

def BioSQL.BioSeqDatabase.fetch_seqid_by_version (   self,
  dbid,
  name 
)

Definition at line 338 of file BioSeqDatabase.py.

00338 
00339     def fetch_seqid_by_version(self, dbid, name):
00340         acc_version = name.split(".")
00341         if len(acc_version) > 2:
00342             raise IndexError("Bad version %r" % name)
00343         acc = acc_version[0]
00344         if len(acc_version) == 2:
00345             version = acc_version[1]
00346         else:
00347             version = "0"
00348         sql = r"SELECT bioentry_id FROM bioentry WHERE accession = %s" \
00349               r" AND version = %s"
00350         fields = [acc, version]
00351         if dbid:
00352             sql += " and biodatabase_id = %s"
00353             fields.append(dbid)
00354         self.execute(sql, fields)
00355         rv = self.cursor.fetchall()
00356         if not rv:
00357             raise IndexError("Cannot find version %r" % name)
00358         if len(rv) > 1:
00359             raise IndexError("More than one entry with version %r" % name)
00360         return rv[0][0]

def BioSQL.BioSeqDatabase.fetch_seqids_by_accession (   self,
  dbid,
  name 
)

Definition at line 330 of file BioSeqDatabase.py.

00330 
00331     def fetch_seqids_by_accession(self, dbid, name):
00332         sql = r"select bioentry_id from bioentry where accession = %s"
00333         fields = [name]
00334         if dbid:
00335             sql += " and biodatabase_id = %s"
00336             fields.append(dbid)
00337         return self.execute_and_fetch_col0(sql, fields)

def BioSQL.BioSeqDatabase.get_subseq_as_string (   self,
  seqid,
  start,
  end 
)

Definition at line 408 of file BioSeqDatabase.py.

00408 
00409     def get_subseq_as_string(self, seqid, start, end):
00410         length = end - start
00411         # XXX Check this on MySQL and PostgreSQL. substr should be general,
00412         # does it need dbutils?
00413         #return self.execute_one(
00414         #    """select SUBSTRING(seq FROM %s FOR %s)
00415         #             from biosequence where bioentry_id = %s""",
00416         #    (start+1, length, seqid))[0]
00417         # 
00418         # Convert to a string on returning for databases that give back
00419         # unicode. Shouldn't need unicode for sequences so this seems safe.
00420         return str(self.execute_one(
00421             """select SUBSTR(seq, %s, %s)
00422                      from biosequence where bioentry_id = %s""",
00423             (start+1, length, seqid))[0])

def BioSQL.BioSeqDatabase.list_any_ids (   self,
  sql,
  args 
)
Return ids given a SQL statement to select for them.

This assumes that the given SQL does a SELECT statement that
returns a list of items. This parses them out of the 2D list
they come as and just returns them in a list.

Definition at line 388 of file BioSeqDatabase.py.

00388 
00389     def list_any_ids(self, sql, args):
00390         """Return ids given a SQL statement to select for them.
00391         
00392         This assumes that the given SQL does a SELECT statement that
00393         returns a list of items. This parses them out of the 2D list
00394         they come as and just returns them in a list.
00395         """
00396         return self.execute_and_fetch_col0(sql, args)

Definition at line 374 of file BioSeqDatabase.py.

00374 
00375     def list_biodatabase_names(self):
00376         return self.execute_and_fetch_col0(
00377             "SELECT name FROM biodatabase")

Definition at line 383 of file BioSeqDatabase.py.

00383 
00384     def list_bioentry_display_ids(self, dbid):
00385         return self.execute_and_fetch_col0(
00386             "SELECT name FROM bioentry WHERE biodatabase_id = %s",
00387             (dbid,))

def BioSQL.BioSeqDatabase.list_bioentry_ids (   self,
  dbid 
)

Definition at line 378 of file BioSeqDatabase.py.

00378 
00379     def list_bioentry_ids(self, dbid):
00380         return self.execute_and_fetch_col0(
00381             "SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %s",
00382             (dbid,))

def BioSQL.BioSeqDatabase.open_database (   driver = "MySQLdb",
  kwargs 
)
Main interface for loading a existing BioSQL-style database.

This function is the easiest way to retrieve a connection to a
database, doing something like:
    
    >>> from BioSeq import BioSeqDatabase
    >>> server = BioSeqDatabase.open_database(user="root", db="minidb")

the various options are:
driver -> The name of the database driver to use for connecting. The
driver should implement the python DB API. By default, the MySQLdb
driver is used.
user -> the username to connect to the database with.
password, passwd -> the password to connect with
host -> the hostname of the database
database or db -> the name of the database

Definition at line 21 of file BioSeqDatabase.py.

00021 
00022 def open_database(driver = "MySQLdb", **kwargs):
00023     """Main interface for loading a existing BioSQL-style database.
00024 
00025     This function is the easiest way to retrieve a connection to a
00026     database, doing something like:
00027         
00028         >>> from BioSeq import BioSeqDatabase
00029         >>> server = BioSeqDatabase.open_database(user="root", db="minidb")
00030 
00031     the various options are:
00032     driver -> The name of the database driver to use for connecting. The
00033     driver should implement the python DB API. By default, the MySQLdb
00034     driver is used.
00035     user -> the username to connect to the database with.
00036     password, passwd -> the password to connect with
00037     host -> the hostname of the database
00038     database or db -> the name of the database
00039     """
00040     if driver == "psycopg":
00041         raise ValueError("Using BioSQL with psycopg (version one) is no "
00042                          "longer supported. Use psycopg2 instead.")
00043 
00044     module = __import__(driver)
00045     connect = getattr(module, "connect")
00046 
00047     # Different drivers use different keywords...
00048     kw = kwargs.copy()
00049     if driver == "MySQLdb":
00050         if "database" in kw:
00051             kw["db"] = kw["database"]
00052             del kw["database"]
00053         if "password" in kw:
00054             kw["passwd"] = kw["password"]
00055             del kw["password"]
00056     else:
00057         # DB-API recommendations
00058         if "db" in kw:
00059             kw["database"] = kw["db"]
00060             del kw["db"]
00061         if "passwd" in kw:
00062             kw["password"] = kw["passwd"]
00063             del kw["passwd"]
00064     if driver in ["psycopg2", "pgdb"] and not kw.get("database"):
00065         kw["database"] = "template1"
00066     # SQLite connect takes the database name as input
00067     if driver in ["sqlite3"]:
00068         conn = connect(kw["database"])
00069     else:
00070         try:
00071             conn = connect(**kw)
00072         except module.InterfaceError:
00073             # Ok, so let's try building a DSN
00074             # (older releases of psycopg need this)
00075             if "database" in kw:
00076                 kw["dbname"] = kw["database"]
00077                 del kw["database"]
00078             elif "db" in kw:
00079                 kw["dbname"] = kw["db"]
00080                 del kw["db"]
00081             dsn = ' '.join(['='.join(i) for i in kw.items()])
00082             conn = connect(dsn)
00083 
00084     server = DBServer(conn, module)
00085 
00086     # TODO - Remove the following once BioSQL Bug 2839 is fixed.
00087     # Test for RULES in PostgreSQL schema, see also Bug 2833.
00088     if driver in ["psycopg2", "pgdb"]:
00089         sql = "SELECT ev_class FROM pg_rewrite WHERE " + \
00090               "rulename='rule_bioentry_i1' OR " + \
00091               "rulename='rule_bioentry_i2';"
00092         if server.adaptor.execute_and_fetchall(sql):
00093             import warnings
00094             warnings.warn("Your BioSQL PostgreSQL schema includes some "
00095                           "rules currently required for bioperl-db but "
00096                           "which may cause problems loading data using "
00097                           "Biopython (see BioSQL Bug 2839). If you do not "
00098                           "use BioPerl, please remove these rules. "
00099                           "Biopython should cope with the rules present, "
00100                           "but with a performance penalty when loading "
00101                           "new records.")
00102             global _POSTGRES_RULES_PRESENT
00103             _POSTGRES_RULES_PRESENT = True
00104 
00105     return server


Variable Documentation

Initial value:
00001 {
00002     # Lookup name / function name to get id, function to list all ids
00003     'primary_id': "fetch_seqid_by_identifier",
00004     'gi':         "fetch_seqid_by_identifier",
00005     'display_id': "fetch_seqid_by_display_id",
00006     'name':       "fetch_seqid_by_display_id",
00007     'accession':  "fetch_seqid_by_accession",
00008     'version':    "fetch_seqid_by_version",
00009     }

Definition at line 432 of file BioSeqDatabase.py.

Definition at line 19 of file BioSeqDatabase.py.