Back to index

python-biopython  1.60
utils.py
Go to the documentation of this file.
00001 """Useful utilities for helping in parsing GenBank files.
00002 """
00003 class FeatureValueCleaner(object):
00004     """Provide specialized capabilities for cleaning up values in features.
00005 
00006     This class is designed to provide a mechanism to clean up and process
00007     values in the key/value pairs of GenBank features. This is useful 
00008     because in cases like:
00009         
00010          /translation="MED
00011          YDPWNLRFQSKYKSRDA"
00012 
00013     you'll end up with a value with \012s and spaces in it like:
00014         "MED\012 YDPWEL..."
00015 
00016     which you probably don't want. 
00017     
00018     This cleaning needs to be done on a case by case basis since it is
00019     impossible to interpret whether you should be concatenating everything
00020     (as in translations), or combining things with spaces (as might be
00021     the case with /notes).
00022     """
00023     keys_to_process = ["translation"]
00024     def __init__(self, to_process = keys_to_process):
00025         """Initialize with the keys we should deal with.
00026         """
00027         self._to_process = to_process
00028 
00029     def clean_value(self, key_name, value):
00030         """Clean the specified value and return it.
00031 
00032         If the value is not specified to be dealt with, the original value
00033         will be returned.
00034         """
00035         if key_name in self._to_process:
00036             try:
00037                 cleaner = getattr(self, "_clean_%s" % key_name)
00038                 value = cleaner(value)
00039             except AttributeError:
00040                 raise AssertionError("No function to clean key: %s" 
00041                                      % key_name)
00042         return value
00043 
00044     def _clean_translation(self, value):
00045         """Concatenate a translation value to one long protein string.
00046         """
00047         translation_parts = value.split()
00048         return "".join(translation_parts)