Source code for simpleml.save_patterns.libcloud

'''
Module for cloud save pattern definitions
Uses Apache Libcloud as a universal engine
'''

[docs]__author__ = 'Elisha Yadgaran'
from os.path import join, isfile from typing import Any from simpleml.save_patterns.base import BaseSavePattern from simpleml.save_patterns.decorators import SavePatternDecorators from simpleml.utils.configuration import PICKLED_FILESTORE_DIRECTORY,\ HDF5_FILESTORE_DIRECTORY, PICKLE_DIRECTORY, HDF5_DIRECTORY, CONFIG, CLOUD_SECTION from simpleml.imports import Provider from simpleml.imports import get_driver
[docs]class CloudSavePatternMixin(object): ''' Mixin class to save/load objects via Apache Libcloud Generic api for all cloud providers so naming convention is extremely important to follow in the config. Please reference libcloud documentation for supported input parameters ``` [cloud] section = `name of the config section to use, ex: s3` [s3] param = value --> normal key:value syntax. match these to however they are referenced later, examples: key = abc123 secret = superSecure region = us-east-1 something_specific_to_s3 = s3_parameter --- Below are internally referenced SimpleML params --- driver = S3 --> this must be the Apache Libcloud provider (https://github.com/apache/libcloud/blob/trunk/libcloud/storage/types.py) connection_params = key,secret,region,something_specific_to_s3 --> this determines the key: value params passed to the constructor (it can be different for each provider) path = simpleml/specific/root --> similar to disk based home directory, cloud home directory will start relative to here container = simpleml --> the cloud bucket or container name ``` How this gets used: ``` from libcloud.storage.types import Provider from libcloud.storage.providers import get_driver cloud_section = CONFIG.get(CLOUD_SECTION, 'section') connection_params = CONFIG.getlist(cloud_section, 'connection_params') root_path = CONFIG.get(cloud_section, 'path', fallback='') driver_cls = get_driver(getattr(Provider, CONFIG.get(cloud_section, 'driver'))) driver = driver_cls(**{param: CONFIG.get(cloud_section, param) for param in connection_params}) container = driver.get_container(container_name=CONFIG.get(cloud_section, 'container')) extra = {'content_type': 'application/octet-stream'} obj = driver.upload_object(LOCAL_FILE_PATH, container=container, object_name=root_path + simpleml_folder_path + filename, extra=extra) obj = driver.download_object(CLOUD_OBJECT, destination_path=LOCAL_FILE_PATH, overwrite_existing=True, delete_on_failure=True) ``` ''' # Global initialization of cloud provider. Avoids reauthentication per persistable
[docs] CLOUD_DRIVER = None
@property
[docs] def driver(self): ''' "classproperty" to return and optionally globally set the cloud provider ''' if self.__class__.CLOUD_DRIVER is None: cloud_section = CONFIG.get(CLOUD_SECTION, 'section') connection_params = CONFIG.getlist(cloud_section, 'connection_params') driver_cls = get_driver(getattr(Provider, CONFIG.get(cloud_section, 'driver'))) driver = driver_cls(**{param: CONFIG.get(cloud_section, param) for param in connection_params}) self.__class__.CLOUD_DRIVER = driver return self.__class__.CLOUD_DRIVER
@classmethod
[docs] def reset_driver(cls): ''' Convenience method to set parsed driver back to None. Forces a config reread on next invocation ''' cls.CLOUD_DRIVER = None
[docs] def upload_to_cloud(self, folder: str, filename: str) -> None: ''' Upload any file from disk to cloud ''' cloud_section = CONFIG.get(CLOUD_SECTION, 'section') root_path = CONFIG.get(cloud_section, 'path', fallback='') container = self.driver.get_container(container_name=CONFIG.get(cloud_section, 'container')) extra = {'content_type': 'application/octet-stream'} if folder == 'pickle': filepath = join(PICKLED_FILESTORE_DIRECTORY, filename) object_name = join(root_path, PICKLE_DIRECTORY, filename) else: filepath = join(HDF5_FILESTORE_DIRECTORY, filename) object_name = join(root_path, HDF5_DIRECTORY, filename) self.driver.upload_object(filepath, container=container, object_name=object_name, extra=extra)
[docs] def download_from_cloud(self, folder: str, filename: str) -> None: ''' Download any file from cloud to disk ''' cloud_section = CONFIG.get(CLOUD_SECTION, 'section') root_path = CONFIG.get(cloud_section, 'path', fallback='') container = CONFIG.get(cloud_section, 'container') if folder == 'pickle': filepath = join(PICKLED_FILESTORE_DIRECTORY, filename) # Check if file was already downloaded before initiating cloud connection if isfile(filepath): return obj = self.driver.get_object( container_name=container, object_name=join(root_path, PICKLE_DIRECTORY, filename)) else: filepath = join(HDF5_FILESTORE_DIRECTORY, filename) # Check if file was already downloaded before initiating cloud connection if isfile(filepath): return obj = self.driver.get_object( container_name=container, object_name=join(root_path, HDF5_DIRECTORY, filename)) self.driver.download_object(obj, destination_path=filepath, overwrite_existing=True, delete_on_failure=True)
[docs]class CloudBase(BaseSavePattern, CloudSavePatternMixin):
''' Implementation class for extended base save patterns to/from the cloud via Apache Libcloud '''
[docs]@SavePatternDecorators.register_save_pattern class CloudPickleSavePattern(CloudBase): ''' Save pattern implementation to save objects to Cloud in pickled format '''
[docs] SAVE_PATTERN = 'cloud_pickled'
@classmethod
[docs] def save(cls, obj: Any, persistable_id: str, **kwargs) -> str: ''' Save method to save files to disk in pickled format Then upload pickled file from disk to cloud ''' filename = f'{persistable_id}.pkl' folder = 'pickle' cls.pickle_object(obj, filename) cls.upload_to_cloud(folder, filename) return filename
@classmethod
[docs] def load(cls, filename: str, **kwargs) -> Any: ''' Download pickled file from cloud to disk Then load files from disk in pickled format ''' folder = 'pickle' cls.download_from_cloud(folder, filename) return cls.load_pickled_object(filename)
[docs]@SavePatternDecorators.register_save_pattern class CloudHDF5SavePattern(CloudBase): ''' Save pattern implementation to save objects to Cloud in HDF5 format '''
[docs] SAVE_PATTERN = 'cloud_hdf5'
@classmethod
[docs] def save(cls, obj: Any, persistable_id: str, **kwargs) -> str: ''' Save method to save files to disk in HDF5 format Then upload HDF5 file from disk to cloud ''' filename = f'{persistable_id}.h5' folder = 'hdf5' cls.hickle_object(obj, filename) cls.upload_to_cloud(folder, filename) return filename
@classmethod
[docs] def load(cls, filename: str, **kwargs) -> Any: ''' Download HDF5 file from cloud to disk Then load files from disk in HDF5 format ''' folder = 'hdf5' cls.download_from_cloud(folder, filename) return cls.load_hickled_object(filename)
[docs]@SavePatternDecorators.register_save_pattern class CloudKerasHDF5SavePattern(CloudBase): ''' Save pattern implementation to save objects to Cloud in Keras HDF5 format '''
[docs] SAVE_PATTERN = 'cloud_keras_hdf5'
@classmethod
[docs] def save(cls, obj: Any, persistable_id: str, **kwargs) -> str: ''' Save method to save files to disk in Keras HDF5 format Then upload HDF5 file from disk to cloud ''' filename = f'{persistable_id}.h5' folder = 'hdf5' cls.save_keras_object(obj, filename) cls.upload_to_cloud(folder, filename) return filename
@classmethod
[docs] def load(cls, filename: str, **kwargs) -> Any: ''' Download HDF5 file from cloud to disk Then load files from disk in HDF5 format ''' folder = 'hdf5' cls.download_from_cloud(folder, filename) return cls.load_keras_object(filename)