Source code for simpleml.datasets.numpy_mixin

'''
Numpy Module for external "dataframe"

Inherit and extend for particular patterns. It is a bit of a misnomer to use the
term "dataframe", since there are very few expected attributes and they are by no
means unique to pandas.
'''

[docs]__author__ = 'Elisha Yadgaran'
from simpleml.datasets.abstract_mixin import AbstractDatasetMixin
[docs]class NumpyDatasetMixin(AbstractDatasetMixin): ''' Assumes _external_file is a dictionary of numpy ndarrays ''' @property
[docs] def X(self): ''' Return the subset that isn't in the target labels ''' return self.get(column='X', split=None)
@property
[docs] def y(self): ''' Return the target label columns ''' return self.get(column='y', split=None)
[docs] def get(self, column, split): ''' Explicitly split validation splits Assumes self.dataframe has a get method to return a dictionary of {'X': X, 'y': y} Uses self.label_columns if y is named something else -- only looks at first entry in list returns None for any combination of column/split that isn't present ''' if column not in ('X', 'y'): raise ValueError('Only support columns: X & y') if split is None: # Assumes there is no top level split split_dict = self.dataframe else: split_dict = self.dataframe.get(split) if split_dict is None: split_dict = {} # Make compatible with return syntax if column == 'y': return split_dict.get(self.label_columns[0], None) else: return split_dict.get('X', None)
[docs] def get_feature_names(self): ''' Should return a list of the features in the dataset ''' return ['X']