Source code for simpleml.save_patterns

"""
Package for artifact persistence. Bindings are automatically included for SimpleML
persistables, but patterns can be used for any objects or frameworks.

Patterns are loaded into global registry on import and more can be added
externally by decorating

Patterns can be named anything since they are only mappings in the registry.
Convention is -> Location : Serializer : Format(s)



- Database Storage
    - database_table: Dataframe saving (as tables in dedicated schema)
    - database_pickled: In database as a binary blob
    - database_hdf5: In database as a binary blob
- Local Filesystem Storage
    - disk_pickled: Pickled file on local disk
    - disk_hdf5: HDF5 file on local disk
    - disk_keras_hdf5: Keras formatted HDF5 file on local disk
- Cloud Storage
    - cloud_pickled: Pickled file on cloud backend
    - cloud_hdf5: HDF5 file on cloud backend
    - cloud_keras_hdf5: Keras formatted HDF5 file on cloud backend
  Supported Backends:
    - Amazon S3
    - Google Cloud Platform
    - Microsoft Azure
    - Microsoft Onedrive
    - Aurora
    - Backblaze B2
    - DigitalOcean Spaces
    - OpenStack Swift
  Backend is determined by `cloud_section` in the configuration file
- Remote filestore saving
    - SCP to remote server
"""


[docs]__author__ = "Elisha Yadgaran"
import logging from os.path import join from typing import Any, Optional from simpleml.utils.configuration import PICKLE_DIRECTORY # Auto import all submodules to ensure registration on library import from .base import BaseSavePattern, BaseSerializer from .decorators import ( SavePatternDecorators, deregister_save_pattern, register_save_pattern, ) from .locations.disk import ( FilestoreCopyFileLocation, FilestoreCopyFilesLocation, FilestoreCopyFolderLocation, FilestorePassthroughLocation, ) from .locations.libcloud import ( LibcloudCopyFileLocation, LibcloudCopyFilesLocation, LibcloudCopyFolderLocation, ) from .serializers.cloudpickle import CloudpickleFileSerializer from .serializers.dask import ( DaskCSVSerializer, DaskJSONSerializer, DaskParquetSerializer, ) from .serializers.keras import KerasH5Serializer, KerasSavedModelSerializer from .serializers.pandas import ( PandasCSVSerializer, PandasJSONSerializer, PandasParquetSerializer, )
[docs]LOGGER = logging.getLogger(__name__)
""" (Cloud)Pickle Save Patterns """
[docs]@SavePatternDecorators.register_save_pattern class CloudpickleDiskSavePattern(BaseSavePattern): """ Save pattern implementation to save objects to disk in pickled format """
[docs] SAVE_PATTERN = "disk_pickled"
[docs] serializers = (CloudpickleFileSerializer, FilestoreCopyFileLocation)
[docs] deserializers = (FilestorePassthroughLocation, CloudpickleFileSerializer)
@classmethod
[docs] def load(cls, legacy: Optional[str] = None, **kwargs): """ Catch for legacy filepath data to dynamically update to new convention """ if legacy is not None: # legacy behavior for filename without directory info filepath = join(PICKLE_DIRECTORY, legacy) source_directory = "filestore" LOGGER.debug( f"Overwriting legacy filepath param with {filepath} and source_directory with {source_directory}" ) kwargs["filepath"] = filepath kwargs["source_directory"] = source_directory return super().load(**kwargs)
[docs]@SavePatternDecorators.register_save_pattern class CloudpickleLibcloudSavePattern(BaseSavePattern): """ Save pattern implementation to save objects to disk in pickled format """
[docs] SAVE_PATTERN = "cloud_pickled"
[docs] serializers = (CloudpickleFileSerializer, LibcloudCopyFileLocation)
[docs] deserializers = (LibcloudCopyFileLocation, CloudpickleFileSerializer)
@classmethod
[docs] def load(cls, legacy: Optional[str] = None, **kwargs): """ Catch for legacy filepath data to dynamically update to new convention """ if legacy is not None: # legacy behavior for filename without directory info filepath = join(PICKLE_DIRECTORY, legacy) source_directory = "libcloud_root_path" LOGGER.debug( f"Overwriting legacy filepath param with {filepath} and source_directory with {source_directory}" ) kwargs["filepath"] = filepath kwargs["source_directory"] = source_directory return super().load(**kwargs)
""" Dask Save Patterns """
[docs]@SavePatternDecorators.register_save_pattern class DaskDiskParquetSavePattern(BaseSavePattern): """ Save pattern implementation to save dask objects to disk in parquet format """
[docs] SAVE_PATTERN = "dask_disk_parquet"
[docs] serializers = (DaskParquetSerializer, FilestoreCopyFolderLocation)
[docs] deserializers = (FilestorePassthroughLocation, DaskParquetSerializer)
[docs]@SavePatternDecorators.register_save_pattern class DaskLibcloudParquetSavePattern(BaseSavePattern): """ Save pattern implementation to save dask objects to cloud via apached-libcloud in parquet format """
[docs] SAVE_PATTERN = "dask_libcloud_parquet"
[docs] serializers = (DaskParquetSerializer, LibcloudCopyFolderLocation)
[docs] deserializers = (LibcloudCopyFolderLocation, DaskParquetSerializer)
[docs]@SavePatternDecorators.register_save_pattern class DaskDiskCSVSavePattern(BaseSavePattern): """ Save pattern implementation to save dask objects to disk in csv format """
[docs] SAVE_PATTERN = "dask_disk_csv"
[docs] serializers = (DaskCSVSerializer, FilestoreCopyFilesLocation)
[docs] deserializers = (FilestorePassthroughLocation, DaskCSVSerializer)
[docs]@SavePatternDecorators.register_save_pattern class DaskLibcloudCSVSavePattern(BaseSavePattern): """ Save pattern implementation to save dask objects to cloud via apached-libcloud in csv format """
[docs] SAVE_PATTERN = "dask_libcloud_csv"
[docs] serializers = (DaskCSVSerializer, LibcloudCopyFilesLocation)
[docs] deserializers = (LibcloudCopyFilesLocation, DaskCSVSerializer)
[docs]@SavePatternDecorators.register_save_pattern class DaskDiskJSONSavePattern(BaseSavePattern): """ Save pattern implementation to save dask objects to disk in json format """
[docs] SAVE_PATTERN = "dask_disk_json"
[docs] serializers = (DaskJSONSerializer, FilestoreCopyFilesLocation)
[docs] deserializers = (FilestorePassthroughLocation, DaskJSONSerializer)
[docs]@SavePatternDecorators.register_save_pattern class DaskLibcloudJSONSavePattern(BaseSavePattern): """ Save pattern implementation to save dask objects to cloud via apached-libcloud in json format """
[docs] SAVE_PATTERN = "dask_libcloud_json"
[docs] serializers = (DaskJSONSerializer, LibcloudCopyFilesLocation)
[docs] deserializers = (LibcloudCopyFilesLocation, DaskJSONSerializer)
""" Pandas Save Patterns """
[docs]@SavePatternDecorators.register_save_pattern class PandasDiskParquetSavePattern(BaseSavePattern): """ Save pattern implementation to save pandas objects to disk in parquet format """
[docs] SAVE_PATTERN = "pandas_disk_parquet"
[docs] serializers = (PandasParquetSerializer, FilestoreCopyFileLocation)
[docs] deserializers = (FilestorePassthroughLocation, PandasParquetSerializer)
[docs]@SavePatternDecorators.register_save_pattern class PandasLibcloudParquetSavePattern(BaseSavePattern): """ Save pattern implementation to save pandas objects to cloud via apached-libcloud in parquet format """
[docs] SAVE_PATTERN = "pandas_libcloud_parquet"
[docs] serializers = (PandasParquetSerializer, LibcloudCopyFileLocation)
[docs] deserializers = (LibcloudCopyFileLocation, PandasParquetSerializer)
[docs]@SavePatternDecorators.register_save_pattern class PandasDiskCSVSavePattern(BaseSavePattern): """ Save pattern implementation to save pandas objects to disk in csv format """
[docs] SAVE_PATTERN = "pandas_disk_csv"
[docs] serializers = (PandasCSVSerializer, FilestoreCopyFileLocation)
[docs] deserializers = (FilestorePassthroughLocation, PandasCSVSerializer)
[docs]@SavePatternDecorators.register_save_pattern class PandasLibcloudCSVSavePattern(BaseSavePattern): """ Save pattern implementation to save pandas objects to cloud via apached-libcloud in csv format """
[docs] SAVE_PATTERN = "pandas_libcloud_csv"
[docs] serializers = (PandasCSVSerializer, LibcloudCopyFileLocation)
[docs] deserializers = (LibcloudCopyFileLocation, PandasCSVSerializer)
[docs]@SavePatternDecorators.register_save_pattern class PandasDiskJSONSavePattern(BaseSavePattern): """ Save pattern implementation to save pandas objects to disk in json format """
[docs] SAVE_PATTERN = "pandas_disk_json"
[docs] serializers = (PandasJSONSerializer, FilestoreCopyFileLocation)
[docs] deserializers = (FilestorePassthroughLocation, PandasJSONSerializer)
[docs]@SavePatternDecorators.register_save_pattern class PandasLibcloudJSONSavePattern(BaseSavePattern): """ Save pattern implementation to save pandas objects to cloud via apached-libcloud in json format """
[docs] SAVE_PATTERN = "pandas_libcloud_json"
[docs] serializers = (PandasJSONSerializer, LibcloudCopyFileLocation)
[docs] deserializers = (LibcloudCopyFileLocation, PandasJSONSerializer)
""" Keras Save Patterns """
[docs]@SavePatternDecorators.register_save_pattern class KerasDiskSavedModelSavePattern(BaseSavePattern): """ Save pattern implementation to save keras objects to disk in savedModel format """
[docs] SAVE_PATTERN = "keras_disk_saved_model"
[docs] serializers = (KerasSavedModelSerializer, FilestoreCopyFolderLocation)
[docs] deserializers = (FilestorePassthroughLocation, KerasSavedModelSerializer)
[docs]@SavePatternDecorators.register_save_pattern class KerasLibcloudSavedModelSavePattern(BaseSavePattern): """ Save pattern implementation to save keras objects to cloud via apached-libcloud in savedModel format """
[docs] SAVE_PATTERN = "keras_libcloud_saved_model"
[docs] serializers = (KerasSavedModelSerializer, LibcloudCopyFolderLocation)
[docs] deserializers = (LibcloudCopyFolderLocation, KerasSavedModelSerializer)
[docs]@SavePatternDecorators.register_save_pattern class KerasDiskH5SavePattern(BaseSavePattern): """ Save pattern implementation to save keras objects to disk in h5 format """
[docs] SAVE_PATTERN = "keras_disk_h5"
[docs] serializers = (KerasH5Serializer, FilestoreCopyFileLocation)
[docs] deserializers = (FilestorePassthroughLocation, KerasH5Serializer)
[docs]@SavePatternDecorators.register_save_pattern class KerasLibcloudH5SavePattern(BaseSavePattern): """ Save pattern implementation to save keras objects to cloud via apached-libcloud in h5 format """
[docs] SAVE_PATTERN = "keras_libcloud_h5"
[docs] serializers = (KerasH5Serializer, LibcloudCopyFileLocation)
[docs] deserializers = (LibcloudCopyFileLocation, KerasH5Serializer)
""" Hickle Save Patterns """ """ Database Save Patterns """ """ Onedrive Save Patterns """