"""
External pipeline support for scikit-learn pipeline
"""
[docs]__author__ = "Elisha Yadgaran"
from typing import Any, Dict, List, Optional, Tuple
from sklearn.pipeline import Pipeline
from simpleml.pipelines.external_pipelines import ExternalPipelineMixin
[docs]class SklearnExternalPipeline(Pipeline, ExternalPipelineMixin):
"""
wrap sklearn pipeline with standardized methods
"""
[docs] def get_params(
self, params_only: Optional[bool] = False, **kwargs
) -> Dict[str, Any]:
"""
Wrapper around sklearn implementation to drop non parameter returns
:param params_only: boolean to filter down to actual transformer parameters
"""
params = super(SklearnExternalPipeline, self).get_params(**kwargs)
if params_only:
# actual params have k__v format
steps = params.pop("steps", [])
step_names = [step[0] for step in steps]
return {k: v for k, v in params.items() if k not in step_names}
else:
return params
[docs] def get_feature_names(self, feature_names: List[str]) -> List[str]:
"""
Iterate through each transformer and return list of resulting features
starts with empty list by default but can pass in dataset as starting
point to guide transformations
:param feature_names: list of initial feature names before transformations
:type: list
"""
for step, transformer in self.steps:
feature_names = transformer.get_feature_names(feature_names)
return feature_names