'''
Module to define the mixins that support different persistence
patterns for external objects
- Dataframe saving (as tables in dedicated schema)
- Pickled Object saving
- In database as a binary blob
- To local filestore
- HDF5 object saving
- In database as a binary blob
- To local filestore
- Remote filestore saving
- S3
- Google Cloud
- Azure
'''
__author__ = 'Elisha Yadgaran'
from simpleml.persistables.binary_blob import BinaryBlob
from simpleml.utils.system_path import PICKLED_FILESTORE_DIRECTORY, HDF5_FILESTORE_DIRECTORY
from simpleml.persistables.meta_registry import KERAS_REGISTRY
from abc import ABCMeta, abstractmethod
import dill as pickle
from os.path import join
# Python 2/3 compatibility
try:
from cStringIO import StringIO
except ImportError:
from io import StringIO
from future.utils import with_metaclass
# Import optional dependencies
from simpleml import load_model, hickle
[docs]class ExternalSaveMixin(with_metaclass(ABCMeta, object)):
@abstractmethod
def _save_external_files(self):
'''
Define pattern for saving external files
'''
@abstractmethod
def _load_external_files(self):
'''
Define pattern for loading external files
should set the self._external_file attribute
'''
[docs]class DatabasePickleSaveMixin(ExternalSaveMixin):
'''
Mixin class to save binary objects to a database table
Expects the following available attributes:
- self._external_file
- self.id
- self.object_type
Sets the following attributes:
- self.filepaths
- self.unloaded_externals
'''
def _save_external_files(self):
'''
Unless overwritten only use this mixin's paradigm
'''
self._save_pickle_to_database()
def _load_external_files(self):
'''
Unless overwritten only use this mixin's paradigm
'''
self._load_pickle_from_database()
def _save_pickle_to_database(self):
'''
Shared method to save files into binary schema
Hardcoded to only store pickled objects in database so overwrite to use
other storage mechanism
'''
pickled_file = pickle.dumps(self._external_file, protocol=pickle.HIGHEST_PROTOCOL)
pickled_record = BinaryBlob.create(
object_type=self.object_type, object_id=self.id, binary_blob=pickled_file)
self.filepaths = {"database_pickled": [str(pickled_record.id)]}
def _load_pickle_from_database(self):
'''
Shared method to load files from database
Hardcoded to only pull from pickled so overwrite to use
other storage mechanism
'''
pickled_id = self.filepaths['database_pickled'][0]
pickled_file = BinaryBlob.find(pickled_id).binary_blob
self._external_file = pickle.loads(pickled_file)
# Indicate externals were loaded
self.unloaded_externals = False
[docs]class DiskPickleSaveMixin(ExternalSaveMixin):
'''
Mixin class to save objects to disk in pickled format
Expects the following available attributes:
- self._external_file
- self.id
Sets the following attributes:
- self.filepaths
- self.unloaded_externals
'''
def _save_external_files(self):
'''
Unless overwritten only use this mixin's paradigm
'''
self._save_pickle_to_disk()
def _load_external_files(self):
'''
Unless overwritten only use this mixin's paradigm
'''
self._load_pickle_from_disk()
def _save_pickle_to_disk(self):
'''
Shared method to save files to disk in pickled format
'''
with open(join(PICKLED_FILESTORE_DIRECTORY, str(self.id)) + '.pkl', 'wb') as pickled_file:
pickle.dump(self._external_file, pickled_file, protocol=pickle.HIGHEST_PROTOCOL)
self.filepaths = {"disk_pickled": [str(self.id) + '.pkl']}
def _load_pickle_from_disk(self):
'''
Shared method to load files from disk in pickled format
'''
pickled_id = self.filepaths['disk_pickled'][0]
with open(join(PICKLED_FILESTORE_DIRECTORY, pickled_id), 'rb') as pickled_file:
self._external_file = pickle.load(pickled_file)
# Indicate externals were loaded
self.unloaded_externals = False
[docs]class DiskHDF5SaveMixin(ExternalSaveMixin):
'''
Mixin class to save objects to disk in HDF5 format with hickle
Expects the following available attributes:
- self._external_file
- self.id
Sets the following attributes:
- self.filepaths
- self.unloaded_externals
'''
def _save_external_files(self):
'''
Unless overwritten only use this mixin's paradigm
'''
self._save_hdf5_to_disk()
def _load_external_files(self):
'''
Unless overwritten only use this mixin's paradigm
'''
self._load_hdf5_from_disk()
def _save_hdf5_to_disk(self):
'''
Shared method to save files to disk in hickle's HDF5 format
'''
filepath = join(HDF5_FILESTORE_DIRECTORY, str(self.id) + '.h5')
hickle.dump(self._external_file, filepath, compression='gzip', compression_opts=9)
self.filepaths = {"disk_hdf5": [str(self.id) + '.h5']}
def _load_hdf5_from_disk(self):
'''
Shared method to load files from disk in hickle's HDF5 format
'''
object_id = self.filepaths['disk_hdf5'][0]
self._external_file = hickle.load(join(HDF5_FILESTORE_DIRECTORY, object_id))
# Indicate externals were loaded
self.unloaded_externals = False
[docs]class KerasDiskHDF5SaveMixin(ExternalSaveMixin):
'''
Mixin class to save objects to disk in Keras's HDF5 format
Keras's internal persistence mechanism utilizes HDF5 and implements a custom pattern
Expects the following available attributes:
- self._external_file
- self.id
Sets the following attributes:
- self.filepaths
- self.unloaded_externals
'''
def _save_external_files(self):
'''
Unless overwritten only use this mixin's paradigm
'''
self._save_keras_hdf5_to_disk()
def _load_external_files(self):
'''
Unless overwritten only use this mixin's paradigm
'''
self._load_keras_hdf5_from_disk()
def _save_keras_hdf5_to_disk(self):
'''
Shared method to save files to disk in Keras's HDF5 format
'''
filepath = join(HDF5_FILESTORE_DIRECTORY, str(self.id) + '.h5')
self._external_file.save(filepath)
self.filepaths = {"disk_keras_hdf5": [str(self.id) + '.h5']}
def _load_keras_hdf5_from_disk(self):
'''
Shared method to load files from disk in Keras's HDF5 format
'''
object_id = self.filepaths['disk_keras_hdf5'][0]
self._external_file = load_model(
str(join(HDF5_FILESTORE_DIRECTORY, object_id)),
custom_objects=KERAS_REGISTRY.registry)
# Indicate externals were loaded
self.unloaded_externals = False
[docs]class AllSaveMixin(DataframeTableSaveMixin, DatabasePickleSaveMixin, DiskPickleSaveMixin,
DiskHDF5SaveMixin, KerasDiskHDF5SaveMixin):
def _save_external_files(self):
'''
Wrapper method around save mixins for different persistence patterns
'''
save_method = self.state['save_method']
if save_method == 'database':
self._save_dataframe_to_table()
elif save_method == 'database_pickled':
self._save_pickle_to_database()
elif save_method == 'disk_pickled':
self._save_pickle_to_disk()
elif save_method == 'disk_hdf5':
self._save_hdf5_to_disk()
elif save_method == 'disk_keras_hdf5':
self._save_keras_hdf5_to_disk()
else:
raise ValueError('Unsupported Save Method: {}'.format(save_method))
def _load_external_files(self):
'''
Wrapper method around save mixins for different persistence patterns
'''
save_method = self.state['save_method']
if save_method == 'database':
self._load_dataframe_from_table()
elif save_method == 'database_pickled':
self._load_pickle_from_database()
elif save_method == 'disk_pickled':
self._load_pickle_from_disk()
elif save_method == 'disk_hdf5':
self._load_hdf5_from_disk()
elif save_method == 'disk_keras_hdf5':
self._load_keras_hdf5_from_disk()
else:
raise ValueError('Unsupported Load Method: {}'.format(save_method))