Source code for simpleml.datasets

'''
Import modules to register class names in global registry

Define convenience classes composed of different mixins
'''

__author__ = 'Elisha Yadgaran'


from .base_dataset import Dataset
from .pandas_mixin import PandasDatasetMixin
from .numpy_mixin import NumpyDatasetMixin

from simpleml.utils.errors import DatasetError

import pandas as pd


# Mixin implementations for convenience
[docs]class PandasDataset(Dataset, PandasDatasetMixin):
[docs] def build_dataframe(self): ''' Transform raw dataset via dataset pipeline for production ready dataset Overwrite this method to disable raw dataset requirement ''' if self.pipeline is None: raise DatasetError('Must set pipeline before building dataframe') X, y = self.pipeline.transform(X=None, return_y=True) if y is None: y = pd.DataFrame() self.config['label_columns'] = y.columns.tolist() self._external_file = pd.concat([X, y], axis=1)
[docs]class NumpyDataset(Dataset, NumpyDatasetMixin):
[docs] def build_dataframe(self): ''' Transform raw dataset via dataset pipeline for production ready dataset Overwrite this method to disable raw dataset requirement ''' if self.pipeline is None: raise DatasetError('Must set pipeline before building dataframe') X, y = self.pipeline.transform(X=None, return_y=True) if y is not None: self.config['label_columns'] = ['y'] self._external_file = {'X': X, 'y': y}