Source code for interact_features

import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.base import BaseEstimator, TransformerMixin
from typing import List, Tuple, Optional


[docs] class PolynomialFeaturesTransformer(BaseEstimator, TransformerMixin): """ Generates polynomial features for specified features in the input DataFrame. """ def __init__( self, degree: int = 2, include_bias: bool = False, interaction_only: bool = False, features: Optional[List[str]] = None ): """ Initializes the PolynomialFeaturesTransformer. Args: degree (int): Degree of polynomial features to generate. Default is 2. include_bias (bool): Whether to include a bias column. Default is False. interaction_only (bool): If True, only interaction features are produced (no powers of single features). Default is False. features (List[str], optional): List of feature names to generate polynomial interactions. If None, all numeric features are used. """ self.degree = degree self.include_bias = include_bias self.interaction_only = interaction_only self.features = features self.poly = None
[docs] def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> 'PolynomialFeaturesTransformer': """ Fits the transformer to the data. Args: X (pd.DataFrame): Input DataFrame. y (pd.Series, optional): Target variable (not used). Returns: self """ if self.features is None: self.features = X.select_dtypes(include=[np.number]).columns.tolist() self.poly = PolynomialFeatures( degree=self.degree, include_bias=self.include_bias, interaction_only=self.interaction_only ) self.poly.fit(X[self.features]) # type: ignore return self
[docs] def transform(self, X: pd.DataFrame) -> pd.DataFrame: """ Transforms the input DataFrame by adding polynomial features. Args: X (pd.DataFrame): Input DataFrame. Returns: pd.DataFrame: DataFrame with polynomial features added. """ if self.poly is None: raise RuntimeError("You must fit the transformer before transforming the data.") X_transformed = X.copy() poly_features = self.poly.transform(X[self.features]) feature_names = self.poly.get_feature_names_out(self.features) df_poly = pd.DataFrame(poly_features, columns=feature_names, index=X.index) # Remove columns that already exist in X to avoid duplicates df_poly = df_poly.drop(columns=self.features, errors='ignore') X_transformed = pd.concat([X_transformed, df_poly], axis=1) return X_transformed
[docs] class ProductFeaturesTransformer(BaseEstimator, TransformerMixin): """ Creates product interaction features between specified pairs of features. """ def __init__(self, feature_pairs: Optional[List[Tuple[str, str]]] = None): """ Initializes the ProductFeaturesTransformer. Args: feature_pairs (List[Tuple[str, str]], optional): List of tuples representing feature pairs to create product features. If None, all possible pairs of numeric features are used. """ self.feature_pairs = feature_pairs
[docs] def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> 'ProductFeaturesTransformer': """ Fits the transformer to the data. Args: X (pd.DataFrame): Input DataFrame. y (pd.Series, optional): Target variable (not used). Returns: self """ if self.feature_pairs is None: numeric_features = X.select_dtypes(include=[np.number]).columns.tolist() self.feature_pairs = [ (f1, f2) for i, f1 in enumerate(numeric_features) for f2 in numeric_features[i + 1:] ] return self
[docs] def transform(self, X: pd.DataFrame) -> pd.DataFrame: """ Transforms the input DataFrame by adding product features. Args: X (pd.DataFrame): Input DataFrame. Returns: pd.DataFrame: DataFrame with product features added. """ if self.feature_pairs is None: raise ValueError("feature_pairs is None. Fit the transformer or provide feature pairs before calling transform.") X_transformed = X.copy() for (f1, f2) in self.feature_pairs: if f1 in X.columns and f2 in X.columns: X_transformed[f'{f1}_x_{f2}'] = X[f1] * X[f2] else: raise ValueError(f"Features '{f1}' and/or '{f2}' not found in DataFrame.") return X_transformed
[docs] class ArithmeticCombinationsTransformer(BaseEstimator, TransformerMixin): """ Generates arithmetic combination features for specified feature pairs. """ def __init__( self, feature_pairs: Optional[List[Tuple[str, str]]] = None, operations: Optional[List[str]] = None ): """ Initializes the ArithmeticCombinationsTransformer. Args: feature_pairs (List[Tuple[str, str]], optional): List of tuples representing feature pairs for arithmetic combinations. If None, all possible pairs of numeric features are used. operations (List[str], optional): List of arithmetic operations to apply. Options are 'add', 'subtract', 'multiply', 'divide'. Default is ['add', 'subtract']. """ self.feature_pairs = feature_pairs self.operations = operations or ['add', 'subtract']
[docs] def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> 'ArithmeticCombinationsTransformer': """ Fits the transformer to the data. Args: X (pd.DataFrame): Input DataFrame. y (pd.Series, optional): Target variable (not used). Returns: self """ if self.feature_pairs is None: numeric_features = X.select_dtypes(include=[np.number]).columns.tolist() self.feature_pairs = [ (f1, f2) for i, f1 in enumerate(numeric_features) for f2 in numeric_features[i + 1:] ] return self
[docs] def transform(self, X: pd.DataFrame) -> pd.DataFrame: """ Transforms the input DataFrame by adding arithmetic combination features. Args: X (pd.DataFrame): Input DataFrame. Returns: pd.DataFrame: DataFrame with arithmetic combination features added. """ if self.feature_pairs is None: raise ValueError("feature_pairs is None. Fit the transformer or provide feature pairs before calling transform.") X_transformed = X.copy() for (f1, f2) in self.feature_pairs: if f1 not in X.columns or f2 not in X.columns: raise ValueError(f"Features '{f1}' and/or '{f2}' not found in DataFrame.") if 'add' in self.operations: X_transformed[f'{f1}_plus_{f2}'] = X[f1] + X[f2] if 'subtract' in self.operations: X_transformed[f'{f1}_minus_{f2}'] = X[f1] - X[f2] if 'multiply' in self.operations: X_transformed[f'{f1}_times_{f2}'] = X[f1] * X[f2] if 'divide' in self.operations: with np.errstate(divide='ignore', invalid='ignore'): division_result = X[f1] / X[f2] division_result.replace([np.inf, -np.inf], np.nan, inplace=True) X_transformed[f'{f1}_div_{f2}'] = division_result return X_transformed
[docs] class CrossedFeaturesTransformer(BaseEstimator, TransformerMixin): """ Creates crossed interaction features for specified categorical variable pairs. """ def __init__(self, feature_pairs: Optional[List[Tuple[str, str]]] = None): """ Initializes the CrossedFeaturesTransformer. Args: feature_pairs (List[Tuple[str, str]], optional): List of tuples representing pairs of categorical features to create crossed features. If None, all possible pairs of categorical features are used. """ self.feature_pairs = feature_pairs
[docs] def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> 'CrossedFeaturesTransformer': """ Fits the transformer to the data. Args: X (pd.DataFrame): Input DataFrame. y (pd.Series, optional): Target variable (not used). Returns: self """ if self.feature_pairs is None: categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist() self.feature_pairs = [ (f1, f2) for i, f1 in enumerate(categorical_features) for f2 in categorical_features[i + 1:] ] return self
[docs] def transform(self, X: pd.DataFrame) -> pd.DataFrame: """ Transforms the input DataFrame by adding crossed features. Args: X (pd.DataFrame): Input DataFrame. Returns: pd.DataFrame: DataFrame with crossed features added. """ if self.feature_pairs is None: raise ValueError("feature_pairs is None. Fit the transformer or provide feature pairs before calling transform.") X_transformed = X.copy() for (f1, f2) in self.feature_pairs: if f1 not in X.columns or f2 not in X.columns: raise ValueError(f"Features '{f1}' and/or '{f2}' not found in DataFrame.") X_transformed[f'{f1}_{f2}_crossed'] = X[f1].astype(str) + '_' + X[f2].astype(str) return X_transformed