"""
Package for artifact persistence. Bindings are automatically included for SimpleML
persistables, but patterns can be used for any objects or frameworks.
Patterns are loaded into global registry on import and more can be added
externally by decorating
Patterns can be named anything since they are only mappings in the registry.
Convention is -> Location : Serializer : Format(s)
- Database Storage
- database_table: Dataframe saving (as tables in dedicated schema)
- database_pickled: In database as a binary blob
- database_hdf5: In database as a binary blob
- Local Filesystem Storage
- disk_pickled: Pickled file on local disk
- disk_hdf5: HDF5 file on local disk
- disk_keras_hdf5: Keras formatted HDF5 file on local disk
- Cloud Storage
- cloud_pickled: Pickled file on cloud backend
- cloud_hdf5: HDF5 file on cloud backend
- cloud_keras_hdf5: Keras formatted HDF5 file on cloud backend
Supported Backends:
- Amazon S3
- Google Cloud Platform
- Microsoft Azure
- Microsoft Onedrive
- Aurora
- Backblaze B2
- DigitalOcean Spaces
- OpenStack Swift
Backend is determined by `cloud_section` in the configuration file
- Remote filestore saving
- SCP to remote server
"""
[docs]__author__ = "Elisha Yadgaran"
import logging
from os.path import join
from typing import Any, Optional
from simpleml.utils.configuration import PICKLE_DIRECTORY
# Auto import all submodules to ensure registration on library import
from .base import BaseSavePattern, BaseSerializer
from .decorators import (
SavePatternDecorators,
deregister_save_pattern,
register_save_pattern,
)
from .locations.disk import (
FilestoreCopyFileLocation,
FilestoreCopyFilesLocation,
FilestoreCopyFolderLocation,
FilestorePassthroughLocation,
)
from .locations.libcloud import (
LibcloudCopyFileLocation,
LibcloudCopyFilesLocation,
LibcloudCopyFolderLocation,
)
from .serializers.cloudpickle import CloudpickleFileSerializer
from .serializers.dask import (
DaskCSVSerializer,
DaskJSONSerializer,
DaskParquetSerializer,
)
from .serializers.keras import KerasH5Serializer, KerasSavedModelSerializer
from .serializers.pandas import (
PandasCSVSerializer,
PandasJSONSerializer,
PandasParquetSerializer,
)
[docs]LOGGER = logging.getLogger(__name__)
"""
(Cloud)Pickle Save Patterns
"""
[docs]@SavePatternDecorators.register_save_pattern
class CloudpickleDiskSavePattern(BaseSavePattern):
"""
Save pattern implementation to save objects to disk in pickled format
"""
[docs] SAVE_PATTERN = "disk_pickled"
[docs] serializers = (CloudpickleFileSerializer, FilestoreCopyFileLocation)
[docs] deserializers = (FilestorePassthroughLocation, CloudpickleFileSerializer)
@classmethod
[docs] def load(cls, legacy: Optional[str] = None, **kwargs):
"""
Catch for legacy filepath data to dynamically update to new convention
"""
if legacy is not None:
# legacy behavior for filename without directory info
filepath = join(PICKLE_DIRECTORY, legacy)
source_directory = "filestore"
LOGGER.debug(
f"Overwriting legacy filepath param with {filepath} and source_directory with {source_directory}"
)
kwargs["filepath"] = filepath
kwargs["source_directory"] = source_directory
return super().load(**kwargs)
[docs]@SavePatternDecorators.register_save_pattern
class CloudpickleLibcloudSavePattern(BaseSavePattern):
"""
Save pattern implementation to save objects to disk in pickled format
"""
[docs] SAVE_PATTERN = "cloud_pickled"
[docs] serializers = (CloudpickleFileSerializer, LibcloudCopyFileLocation)
[docs] deserializers = (LibcloudCopyFileLocation, CloudpickleFileSerializer)
@classmethod
[docs] def load(cls, legacy: Optional[str] = None, **kwargs):
"""
Catch for legacy filepath data to dynamically update to new convention
"""
if legacy is not None:
# legacy behavior for filename without directory info
filepath = join(PICKLE_DIRECTORY, legacy)
source_directory = "libcloud_root_path"
LOGGER.debug(
f"Overwriting legacy filepath param with {filepath} and source_directory with {source_directory}"
)
kwargs["filepath"] = filepath
kwargs["source_directory"] = source_directory
return super().load(**kwargs)
"""
Dask Save Patterns
"""
[docs]@SavePatternDecorators.register_save_pattern
class DaskDiskParquetSavePattern(BaseSavePattern):
"""
Save pattern implementation to save dask objects to disk in parquet format
"""
[docs] SAVE_PATTERN = "dask_disk_parquet"
[docs] serializers = (DaskParquetSerializer, FilestoreCopyFolderLocation)
[docs] deserializers = (FilestorePassthroughLocation, DaskParquetSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class DaskLibcloudParquetSavePattern(BaseSavePattern):
"""
Save pattern implementation to save dask objects to cloud via apached-libcloud in parquet format
"""
[docs] SAVE_PATTERN = "dask_libcloud_parquet"
[docs] serializers = (DaskParquetSerializer, LibcloudCopyFolderLocation)
[docs] deserializers = (LibcloudCopyFolderLocation, DaskParquetSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class DaskDiskCSVSavePattern(BaseSavePattern):
"""
Save pattern implementation to save dask objects to disk in csv format
"""
[docs] SAVE_PATTERN = "dask_disk_csv"
[docs] serializers = (DaskCSVSerializer, FilestoreCopyFilesLocation)
[docs] deserializers = (FilestorePassthroughLocation, DaskCSVSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class DaskLibcloudCSVSavePattern(BaseSavePattern):
"""
Save pattern implementation to save dask objects to cloud via apached-libcloud in csv format
"""
[docs] SAVE_PATTERN = "dask_libcloud_csv"
[docs] serializers = (DaskCSVSerializer, LibcloudCopyFilesLocation)
[docs] deserializers = (LibcloudCopyFilesLocation, DaskCSVSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class DaskDiskJSONSavePattern(BaseSavePattern):
"""
Save pattern implementation to save dask objects to disk in json format
"""
[docs] SAVE_PATTERN = "dask_disk_json"
[docs] serializers = (DaskJSONSerializer, FilestoreCopyFilesLocation)
[docs] deserializers = (FilestorePassthroughLocation, DaskJSONSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class DaskLibcloudJSONSavePattern(BaseSavePattern):
"""
Save pattern implementation to save dask objects to cloud via apached-libcloud in json format
"""
[docs] SAVE_PATTERN = "dask_libcloud_json"
[docs] serializers = (DaskJSONSerializer, LibcloudCopyFilesLocation)
[docs] deserializers = (LibcloudCopyFilesLocation, DaskJSONSerializer)
"""
Pandas Save Patterns
"""
[docs]@SavePatternDecorators.register_save_pattern
class PandasDiskParquetSavePattern(BaseSavePattern):
"""
Save pattern implementation to save pandas objects to disk in parquet format
"""
[docs] SAVE_PATTERN = "pandas_disk_parquet"
[docs] serializers = (PandasParquetSerializer, FilestoreCopyFileLocation)
[docs] deserializers = (FilestorePassthroughLocation, PandasParquetSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class PandasLibcloudParquetSavePattern(BaseSavePattern):
"""
Save pattern implementation to save pandas objects to cloud via apached-libcloud in parquet format
"""
[docs] SAVE_PATTERN = "pandas_libcloud_parquet"
[docs] serializers = (PandasParquetSerializer, LibcloudCopyFileLocation)
[docs] deserializers = (LibcloudCopyFileLocation, PandasParquetSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class PandasDiskCSVSavePattern(BaseSavePattern):
"""
Save pattern implementation to save pandas objects to disk in csv format
"""
[docs] SAVE_PATTERN = "pandas_disk_csv"
[docs] serializers = (PandasCSVSerializer, FilestoreCopyFileLocation)
[docs] deserializers = (FilestorePassthroughLocation, PandasCSVSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class PandasLibcloudCSVSavePattern(BaseSavePattern):
"""
Save pattern implementation to save pandas objects to cloud via apached-libcloud in csv format
"""
[docs] SAVE_PATTERN = "pandas_libcloud_csv"
[docs] serializers = (PandasCSVSerializer, LibcloudCopyFileLocation)
[docs] deserializers = (LibcloudCopyFileLocation, PandasCSVSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class PandasDiskJSONSavePattern(BaseSavePattern):
"""
Save pattern implementation to save pandas objects to disk in json format
"""
[docs] SAVE_PATTERN = "pandas_disk_json"
[docs] serializers = (PandasJSONSerializer, FilestoreCopyFileLocation)
[docs] deserializers = (FilestorePassthroughLocation, PandasJSONSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class PandasLibcloudJSONSavePattern(BaseSavePattern):
"""
Save pattern implementation to save pandas objects to cloud via apached-libcloud in json format
"""
[docs] SAVE_PATTERN = "pandas_libcloud_json"
[docs] serializers = (PandasJSONSerializer, LibcloudCopyFileLocation)
[docs] deserializers = (LibcloudCopyFileLocation, PandasJSONSerializer)
"""
Keras Save Patterns
"""
[docs]@SavePatternDecorators.register_save_pattern
class KerasDiskSavedModelSavePattern(BaseSavePattern):
"""
Save pattern implementation to save keras objects to disk in savedModel format
"""
[docs] SAVE_PATTERN = "keras_disk_saved_model"
[docs] serializers = (KerasSavedModelSerializer, FilestoreCopyFolderLocation)
[docs] deserializers = (FilestorePassthroughLocation, KerasSavedModelSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class KerasLibcloudSavedModelSavePattern(BaseSavePattern):
"""
Save pattern implementation to save keras objects to cloud via apached-libcloud in savedModel format
"""
[docs] SAVE_PATTERN = "keras_libcloud_saved_model"
[docs] serializers = (KerasSavedModelSerializer, LibcloudCopyFolderLocation)
[docs] deserializers = (LibcloudCopyFolderLocation, KerasSavedModelSerializer)
[docs]@SavePatternDecorators.register_save_pattern
class KerasDiskH5SavePattern(BaseSavePattern):
"""
Save pattern implementation to save keras objects to disk in h5 format
"""
[docs] SAVE_PATTERN = "keras_disk_h5"
[docs] serializers = (KerasH5Serializer, FilestoreCopyFileLocation)
[docs] deserializers = (FilestorePassthroughLocation, KerasH5Serializer)
[docs]@SavePatternDecorators.register_save_pattern
class KerasLibcloudH5SavePattern(BaseSavePattern):
"""
Save pattern implementation to save keras objects to cloud via apached-libcloud in h5 format
"""
[docs] SAVE_PATTERN = "keras_libcloud_h5"
[docs] serializers = (KerasH5Serializer, LibcloudCopyFileLocation)
[docs] deserializers = (LibcloudCopyFileLocation, KerasH5Serializer)
"""
Hickle Save Patterns
"""
"""
Database Save Patterns
"""
"""
Onedrive Save Patterns
"""