Back to index

python-biopython  1.60
__init__.py
Go to the documentation of this file.
00001 # Copyright 1999 by Jeffrey Chang.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """
00007 This module provides code to work with Medline.
00008 
00009 Classes:
00010 Record           A dictionary holding Medline data.
00011 
00012 Functions:
00013 read             Reads one Medline record
00014 parse            Allows you to iterate over a bunch of Medline records
00015 """
00016 
00017 class Record(dict):
00018     """A dictionary holding information from a Medline record.
00019     All data are stored under the mnemonic appearing in the Medline
00020     file. These mnemonics have the following interpretations:
00021 
00022     Mnemonic  Description
00023     AB        Abstract
00024     CI        Copyright Information
00025     AD        Affiliation
00026     IRAD      Investigator Affiliation
00027     AID       Article Identifier
00028     AU        Author
00029     FAU       Full Author
00030     CN        Corporate Author
00031     DCOM      Date Completed
00032     DA        Date Created
00033     LR        Date Last Revised
00034     DEP       Date of Electronic Publication
00035     DP        Date of Publication
00036     EDAT      Entrez Date
00037     GS        Gene Symbol
00038     GN        General Note
00039     GR        Grant Number
00040     IR        Investigator Name
00041     FIR       Full Investigator Name
00042     IS        ISSN
00043     IP        Issue
00044     TA        Journal Title Abbreviation
00045     JT        Journal Title
00046     LA        Language
00047     LID       Location Identifier
00048     MID       Manuscript Identifier
00049     MHDA      MeSH Date
00050     MH        MeSH Terms
00051     JID       NLM Unique ID
00052     RF        Number of References
00053     OAB       Other Abstract
00054     OCI       Other Copyright Information
00055     OID       Other ID
00056     OT        Other Term
00057     OTO       Other Term Owner
00058     OWN       Owner
00059     PG        Pagination
00060     PS        Personal Name as Subject
00061     FPS       Full Personal Name as Subject
00062     PL        Place of Publication
00063     PHST      Publication History Status
00064     PST       Publication Status
00065     PT        Publication Type
00066     PUBM      Publishing Model
00067     PMC       PubMed Central Identifier
00068     PMID      PubMed Unique Identifier
00069     RN        Registry Number/EC Number
00070     NM        Substance Name
00071     SI        Secondary Source ID
00072     SO        Source
00073     SFM       Space Flight Mission
00074     STAT      Status
00075     SB        Subset
00076     TI        Title
00077     TT        Transliterated Title
00078     VI        Volume
00079     CON       Comment on
00080     CIN       Comment in
00081     EIN       Erratum in
00082     EFR       Erratum for
00083     CRI       Corrected and Republished in
00084     CRF       Corrected and Republished from
00085     PRIN      Partial retraction in
00086     PROF      Partial retraction of
00087     RPI       Republished in
00088     RPF       Republished from
00089     RIN       Retraction in
00090     ROF       Retraction of
00091     UIN       Update in
00092     UOF       Update of
00093     SPIN      Summary for patients in
00094     ORI       Original report in
00095     """
00096 
00097 
00098 def parse(handle):
00099     """Read Medline records one by one from the handle.
00100 
00101     The handle is either is a Medline file, a file-like object, or a list
00102     of lines describing one or more Medline records.
00103 
00104     Typical usage:
00105 
00106         from Bio import Medline
00107         handle = open("mymedlinefile")
00108         records = Medline.parse(handle)
00109         for record in record:
00110             print record['TI']
00111 
00112     """
00113     # These keys point to string values
00114     textkeys = ("ID", "PMID", "SO", "RF", "NI", "JC", "TA", "IS", "CY", "TT",
00115                 "CA", "IP", "VI", "DP", "YR", "PG", "LID", "DA", "LR", "OWN",
00116                 "STAT", "DCOM", "PUBM", "DEP", "PL", "JID", "SB", "PMC",
00117                 "EDAT", "MHDA", "PST", "AB", "AD", "EA", "TI", "JT")
00118     handle = iter(handle)
00119     # First skip blank lines
00120     for line in handle:
00121         line = line.rstrip()
00122         if line:
00123             break
00124     else:
00125         return
00126     record = Record()
00127     finished = False
00128     while not finished:
00129         if line[:6]=="      ": # continuation line
00130             record[key].append(line[6:])
00131         elif line:
00132             key = line[:4].rstrip()
00133             if not key in record:
00134                 record[key] = []
00135             record[key].append(line[6:])
00136         try:
00137             line = handle.next()
00138         except StopIteration:
00139             finished = True
00140         else:
00141             line = line.rstrip()
00142             if line:
00143                 continue
00144         # Join each list of strings into one string.
00145         for key in textkeys:
00146             if key in record:
00147                 record[key] = " ".join(record[key])
00148         if record:
00149             yield record
00150         record = Record()
00151 
00152 def read(handle):
00153     """Read a single Medline records from the handle.
00154 
00155     The handle is either is a Medline file, a file-like object, or a list
00156     of lines describing a Medline record.
00157 
00158     Typical usage:
00159 
00160         from Bio import Medline
00161         handle = open("mymedlinefile")
00162         record = Medline.read(handle)
00163         print record['TI']
00164 
00165     """
00166     records = parse(handle)
00167     return records.next()