Source code for simpleml.datasets.numpy_mixin

'''
Numpy Module for external "dataframe"

Inherit and extend for particular patterns. It is a bit of a misnomer to use the
term "dataframe", since there are very few expected attributes and they are by no
means unique to pandas.
'''

[docs]__author__ = 'Elisha Yadgaran'


from simpleml.datasets.abstract_mixin import AbstractDatasetMixin


[docs]class NumpyDatasetMixin(AbstractDatasetMixin):
    '''
    Assumes _external_file is a dictionary of numpy ndarrays
    '''
    @property
[docs]    def X(self):
        '''
        Return the subset that isn't in the target labels
        '''
        return self.get(column='X', split=None)

    @property
[docs]    def y(self):
        '''
        Return the target label columns
        '''
        return self.get(column='y', split=None)

[docs]    def get(self, column, split):
        '''
        Explicitly split validation splits
        Assumes self.dataframe has a get method to return a dictionary of {'X': X, 'y': y}
        Uses self.label_columns if y is named something else -- only looks at first entry in list

        returns None for any combination of column/split that isn't present
        '''
        if column not in ('X', 'y'):
            raise ValueError('Only support columns: X & y')

        if split is None:  # Assumes there is no top level split
            split_dict = self.dataframe
        else:
            split_dict = self.dataframe.get(split)

        if split_dict is None:
            split_dict = {}  # Make compatible with return syntax

        if column == 'y':
            return split_dict.get(self.label_columns[0], None)

        else:
            return split_dict.get('X', None)

[docs]    def get_feature_names(self):
        '''
        Should return a list of the features in the dataset
        '''
        return ['X']