Source code for simpleml.datasets.pandas.file_based

"""
Extension implementations for loading file based datasets
"""

[docs]__author__ = "Elisha Yadgaran"
from typing import Dict, Optional from simpleml.save_patterns.serializers.pandas import PandasPersistenceMethods from simpleml.utils.errors import DatasetError from .base import BasePandasDataset
[docs]PANDAS_READER_MAP = { "csv": PandasPersistenceMethods.read_csv, "json": PandasPersistenceMethods.read_json, "parquet": PandasPersistenceMethods.read_parquet,
}
[docs]class PandasFileBasedDataset(BasePandasDataset): """ Pandas dataset class that generates the dataframe by reading in a file """ def __init__( self, filepath: str, format: str, reader_params: Optional[Dict] = None, **kwargs ): super().__init__(**kwargs) if format not in PANDAS_READER_MAP: raise DatasetError( f"No reader configured for provided file format: {format}" ) self.config.update( { "filepath": filepath, "format": format, "reader_params": reader_params or {}, } )
[docs] def build_dataframe(self) -> None: filepath = self.config.get("filepath") format = self.config.get("format") params = self.config.get("reader_params") self.dataframe = PANDAS_READER_MAP[format](filepath, **params)