Source code for normalize_scaling

import pandas as pd
import numpy as np
from sklearn.preprocessing import (
    MinMaxScaler,
    StandardScaler,
    RobustScaler,
    MaxAbsScaler,
    Normalizer,
    QuantileTransformer,
    PowerTransformer
)
from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin
from typing import Optional, Any, Dict, List, Union


[docs] class ScalingNormalizer(BaseEstimator, TransformerMixin): """ A utility class for scaling and normalizing data using various methods such as Min-Max scaling, standard scaling, robust scaling, max absolute scaling, and normalization. Non-specified columns are left unchanged. """ def __init__( self, method: Union[str, Dict[str, str]] = 'standard', columns: Optional[List[str]] = None, **kwargs: Any ): """ Initializes the ScalingNormalizer class with a specified scaling or normalization method. Args: method (Union[str, Dict[str, str]]): The scaling or normalization method to use. Can be a single method for all columns or a dictionary mapping column names to methods. Supported methods: - 'minmax' - 'standard' - 'robust' - 'maxabs' - 'l1' - 'l2' - 'max' - 'quantile' - 'power' columns (List[str], optional): List of columns to scale or normalize. If None, all numeric columns are used. **kwargs (Any): Additional parameters to pass to the scaling or normalization methods. """ self.method = method self.columns = columns self.kwargs = kwargs self.scalers: Dict[str, Any] = {} def _get_scaler(self, method: str) -> Any: """ Retrieves the appropriate scaler or normalizer object based on the specified method. Args: method (str): The scaling method to use. Returns: Any: The scaler or normalizer object corresponding to the specified method. """ if method == 'minmax': return MinMaxScaler(**self.kwargs) elif method == 'standard': return StandardScaler(**self.kwargs) elif method == 'robust': return RobustScaler(**self.kwargs) elif method == 'maxabs': return MaxAbsScaler(**self.kwargs) elif method in ['l1', 'l2', 'max']: return Normalizer(norm=method, **self.kwargs) elif method == 'quantile': return QuantileTransformer(**self.kwargs) elif method == 'power': return PowerTransformer(**self.kwargs) else: raise ValueError(f"Unsupported scaling method: {method}")
[docs] def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> 'ScalingNormalizer': """ Fits the scaler or normalizer to the specified columns of the input data. Args: X (pd.DataFrame): The input data to be scaled or normalized. y (pd.Series, optional): Not used in the scaling process, provided for compatibility. Returns: ScalingNormalizer: Returns the instance after fitting. """ if self.columns is None: # Default to numeric columns self.columns = X.select_dtypes(include=[np.number]).columns.tolist() if not self.columns: raise ValueError("No columns to scale or normalize.") if isinstance(self.method, str): # Same method for all columns, but still fit each column independently for col in self.columns: scaler = self._get_scaler(self.method) scaler.fit(X[[col]]) # Fit the scaler on individual column self.scalers[col] = scaler # Store each fitted scaler separately elif isinstance(self.method, dict): # Different methods per column for col in self.columns: method = self.method.get(col, 'standard') # Default to 'standard' if not specified scaler = self._get_scaler(method) scaler.fit(X[[col]]) # Fit the scaler on individual column self.scalers[col] = scaler else: raise ValueError("Method must be a string or a dictionary mapping columns to methods.") return self
[docs] def transform(self, X: pd.DataFrame) -> pd.DataFrame: """ Transforms the specified columns of the input data using the fitted scalers or normalizers. Args: X (pd.DataFrame): The input data to transform. Returns: pd.DataFrame: Transformed data with specified columns scaled or normalized. """ X_transformed = X.copy() if self.columns is not None: for col in self.columns: if col in X.columns: scaler = self.scalers[col] X_transformed[col] = scaler.transform(X[[col]]) else: raise ValueError(f"Column '{col}' not found in the input data.") return X_transformed
[docs] def fit_transform(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> pd.DataFrame: """ Fits the scalers or normalizers to the specified columns of the data and transforms it. Args: X (pd.DataFrame): The input data to scale or normalize. y (pd.Series, optional): Not used in the scaling process, provided for compatibility. Returns: pd.DataFrame: The transformed data with specified columns scaled or normalized. """ self.fit(X, y) return self.transform(X)
[docs] def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame: """ Inverses the transformation on the specified columns of the input data. Args: X (pd.DataFrame): The transformed data to inverse transform. Returns: pd.DataFrame: Original data with specified columns inverse transformed. """ X_inv_transformed = X.copy() if self.columns is not None: for col in self.columns: if col in X.columns: scaler = self.scalers[col] if hasattr(scaler, 'inverse_transform'): X_inv_transformed[col] = scaler.inverse_transform(X[[col]]) else: raise ValueError(f"Scaler for column '{col}' does not support inverse_transform.") else: raise ValueError(f"Column '{col}' not found in the input data.") return X_inv_transformed
[docs] @staticmethod def create_column_transformer( column_methods: Dict[str, str], remainder: str = 'passthrough', **kwargs: Any ) -> ColumnTransformer: """ Creates a ColumnTransformer to apply different scaling or normalization methods to different columns. Args: column_methods (Dict[str, str]): A dictionary mapping column names to scaling or normalization methods. Example: {'column1': 'minmax', 'column2': 'standard'} remainder (str): Strategy for handling remaining columns. Defaults to 'passthrough'. **kwargs (Any): Additional parameters to pass to the scaling or normalization methods. Returns: ColumnTransformer: A ColumnTransformer object to apply specified methods to different columns. """ transformers = [] for column, method in column_methods.items(): scaler = ScalingNormalizer(method=method, columns=[column], **kwargs) transformers.append((f"{method}_scaler_{column}", scaler, [column])) return ColumnTransformer(transformers=transformers, remainder=remainder)
[docs] def get_params(self, deep: bool = True) -> Dict[str, Any]: """ Get parameters for this estimator. Args: deep (bool): If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns: Dict[str, Any]: Parameter names mapped to their values. """ params = {'method': self.method, 'columns': self.columns} params.update(self.kwargs) return params
[docs] def set_params(self, **params: Any) -> 'ScalingNormalizer': """ Set the parameters of this estimator. Args: **params: Estimator parameters. Returns: ScalingNormalizer: Returns self. """ for key, value in params.items(): setattr(self, key, value) return self