Source code for simpleml.utils.configuration

"""
Module to set a reference for the SimpleML home directory

Defaults to user's home directory if no environment variable is set

example config file:

```
    [path]
    home_directory = ~/.simpleml  <-- This details where all binaries are stored on the local disk. Only necessary if different than the default

    [libcloud]  <-- This section is used for any persistable that specifies an apache-libcloud persistence location
    section = gcp-read-only  <-- The name of the heading for the cloud credentials.
                                 In this example this line is the only value that needs to be changed to move
                                 from saving in GCP to, say, S3. No code would have to change whatsoever because
                                 the save location is "cloud" which does a lookup in the config

    [onedrive]  <--- This section outlines an example authorization scheme with onedrive personal
    client_secret = aaaaaabbbbbbbbbcccccc  <--- Put your client secret here
    root_id = xxxxxyyyyyyzzzzzzzz  <--- Put the item id of the root filestore bucket here
    client_id = abcdefg-hijk-lmno-pqrs-tuvwxyz  <--- Put your client_id here
    scopes = onedrive.readwrite  <--- Mark the scopes here (reference the onedrive api for examples)
    redirect_uri = http://localhost:8000/example/callback  <--- Put the callback url here to return the auth token

    [gcp-read-write]  <--- This section outlines an example for a read/write iam in GCP
    driver = GOOGLE_STORAGE  <--- Apache-libcloud driver used (can be any of the supported ones)
    connection_params = key,secret  <--- Which parameters in this section to pass to apache-libcloud
    key = read-write@iam.gserviceaccount.com  <--- The gcp iam account
    secret = ./gcp-read-write.json  <--- The token for that gcp account
    container = simpleml  <--- The gcp container (or "bucket") that houses the files

    [gcp-read-only]  <--- Duplicate example with a read only IAM -- recommended practice to train with the
                          cloud section = gcp-read-write and deploy in production with read only access
    driver = GOOGLE_STORAGE
    connection_params = key,secret
    key = read-only@iam.gserviceaccount.com
    secret = ./gcp-read-only.json
    container = simpleml

    [s3]
    param = value --> normal key:value syntax. match these to however they are referenced later, examples:
    key = abc123
    secret = superSecure
    region = us-east-1
    something_specific_to_s3 = s3_parameter
    --- Below are internally referenced SimpleML params ---
    driver = S3 --> this must be the Apache Libcloud provider (https://github.com/apache/libcloud/blob/trunk/libcloud/storage/types.py)
    connection_params = key,secret,region,something_specific_to_s3 --> this determines the key: value params passed to the constructor (it can be different for each provider)
    path = simpleml/specific/root --> similar to disk based home directory, cloud home directory will start relative to here
    container = simpleml --> the cloud bucket or container name

    [simpleml-database]  <--- Database credentials for the simpleml models
                              (used by specifying Database(configuration_section='simpleml-database'))
    database=SimpleML
    username=simpleml
    password=simpleml
    drivername=postgresql
    host=localhost
    port=5432

    [app-database]  <--- Database credentials for application logs (used by
                         specifying Database(configuration_section='app-database'))
    database=APPLICATION_DB
    username=simpleml
    password=simpleml
    drivername=postgresql
    host=localhost
    port=5432
```
"""

[docs]__author__ = "Elisha Yadgaran"
import errno # os.errno deprecated in python 3.7+ import logging import os import tempfile from configparser import ConfigParser from simpleml.registries import FILEPATH_REGISTRY
[docs]LOGGER = logging.getLogger(__name__)
# Configuration CONFIGURATION_FILE = os.getenv("SIMPLEML_CONFIGURATION_FILE", None) if CONFIGURATION_FILE is None: LOGGER.debug( "Configuration File Environment Variable Not Set (`SIMPLEML_CONFIGURATION_FILE`), using default" )
[docs] CONFIGURATION_FILE = os.path.expanduser("~/.simpleml/simpleml.conf")
[docs]CONFIG = ConfigParser(converters={"list": lambda x: [i.strip() for i in x.split(",")]})
if os.path.isfile(CONFIGURATION_FILE): CONFIG.read(CONFIGURATION_FILE) else: LOGGER.warning("No Configuration File Found, Falling Back to Default Values") # Config Sections
[docs]PATH_SECTION = "path"
[docs]LIBCLOUD_SECTION = "libcloud"
# Local Filestore if PATH_SECTION in CONFIG:
[docs] SIMPLEML_DIRECTORY = os.path.expanduser(CONFIG.get(PATH_SECTION, "home_directory"))
if not os.path.isdir(SIMPLEML_DIRECTORY): LOGGER.error( "Invalid Home Directory Specified: {}, using ~/.simpleml".format( SIMPLEML_DIRECTORY ) ) SIMPLEML_DIRECTORY = os.path.expanduser("~/.simpleml") else: LOGGER.debug("Home Directory Path Not Set (`[path]`), using default") LOGGER.debug("Expected Configuration Section as Follows:") LOGGER.debug("[path]") LOGGER.debug("home_directory = ~/.simpleml") SIMPLEML_DIRECTORY = os.path.expanduser("~/.simpleml") # Libcloud configs if LIBCLOUD_SECTION in CONFIG:
[docs] LIBCLOUD_CONFIG_SECTION = CONFIG.get(LIBCLOUD_SECTION, "section")
LIBCLOUD_ROOT_PATH = CONFIG.get(LIBCLOUD_CONFIG_SECTION, "path", fallback="") else: LOGGER.debug( "Libcloud config parameters not set. Attempts to use persistence patterns with the library will fail" ) LIBCLOUD_ROOT_PATH = "" LIBCLOUD_CONFIG_SECTION = None # Reference paths
[docs]PICKLE_DIRECTORY = "pickle/"
[docs]HDF5_DIRECTORY = "HDF5/"
[docs]PARQUET_DIRECTORY = "parquet/"
[docs]CSV_DIRECTORY = "csv/"
[docs]ORC_DIRECTORY = "orc/"
[docs]JSON_DIRECTORY = "json/"
[docs]TENSORFLOW_SAVED_MODEL_DIRECTORY = "saved_model/"
[docs]FILESTORE_DIRECTORY = os.path.join(SIMPLEML_DIRECTORY, "filestore/")
[docs]SYSTEM_TEMP_DIRECTORY = tempfile.gettempdir()
# register paths for consistent reference FILEPATH_REGISTRY.register("filestore", FILESTORE_DIRECTORY) FILEPATH_REGISTRY.register("system_temp", SYSTEM_TEMP_DIRECTORY) FILEPATH_REGISTRY.register("libcloud_root_path", LIBCLOUD_ROOT_PATH) # Create Paths if they don't exist - use try/excepts to catch race conditions
[docs]def safe_makedirs(dir): try: os.makedirs(dir) except OSError as e: if e.errno != errno.EEXIST: raise
if not os.path.exists(SIMPLEML_DIRECTORY): safe_makedirs(SIMPLEML_DIRECTORY) if not os.path.exists(FILESTORE_DIRECTORY): safe_makedirs(FILESTORE_DIRECTORY)