types of feature engineering

1. One-Hot Encoding
import pandas as pd

# Sample DataFrame
df = pd.DataFrame({'Animal': ['Cat', 'Dog', 'Fish']})

# One-Hot Encoding
df_one_hot = pd.get_dummies(df, columns=['Animal'], dtype=int)
print(df_one_hot)

Output:

   Animal_Cat  Animal_Dog  Animal_Fish
0           1           0            0
1           0           1            0
2           0           0            1

2. Label Encoding

from sklearn.preprocessing import LabelEncoder

# Sample DataFrame
df = pd.DataFrame({'Animal': ['Cat', 'Dog', 'Fish']})

# Label Encoding
label_encoder = LabelEncoder()
df['Animal_Label'] = label_encoder.fit_transform(df['Animal'])
print(df)

Output:

  Animal  Animal_Label
0   Cat             0
1   Dog             1
2  Fish             2


3. Binary Encoding

!pip install category_encoders

import pandas as pd
from category_encoders import BinaryEncoder

# Sample DataFrame
df = pd.DataFrame({'Color': ['Red', 'Blue', 'Green']})

# Binary Encoding
encoder = BinaryEncoder(cols=['Color'])
df_binary = encoder.fit_transform(df)
print(df_binary)

Output:

   Color_0  Color_1
0        0        1
1        1        0
2        1        1


4. Frequency Encoding

# Sample DataFrame
df = pd.DataFrame({'Fruit': ['Apple', 'Banana', 'Apple', 'Orange', 'Banana']})

# Frequency Encoding
frequency_map = df['Fruit'].value_counts().to_dict()
df['Fruit_Frequency'] = df['Fruit'].map(frequency_map)
print(df)

Output:


    Fruit  Fruit_Frequency
0   Apple                2
1  Banana                2
2   Apple                2
3  Orange                1
4  Banana                2


5. Target Encoding

# Sample DataFrame
df = pd.DataFrame({
    'City': ['New York', 'London', 'New York', 'Tokyo'],
    'Sales': [100, 200, 150, 300]
})

# Target Encoding
target_mean = df.groupby('City')['Sales'].mean()
df['City_Target_Encoding'] = df['City'].map(target_mean)
print(df)

Output:

       City  Sales  City_Target_Encoding
0  New York    100                 125.0
1    London    200                 200.0
2  New York    150                 125.0
3     Tokyo    300                 300.0


6. Ordinal Encoding

from sklearn.preprocessing import OrdinalEncoder

# Sample DataFrame
df = pd.DataFrame({'Size': ['Small', 'Medium', 'Large']})

# Define Custom Order
size_order = [['Small', 'Medium', 'Large']]

# Ordinal Encoding
ordinal_encoder = OrdinalEncoder(categories=size_order)
df['Size_Ordinal'] = ordinal_encoder.fit_transform(df[['Size']])
print(df)

Output:

     Size  Size_Ordinal
0   Small           0.0
1  Medium           1.0
2   Large           2.0


7. Hash Encoding

!pip install category_encoders

import pandas as pd
from category_encoders import HashingEncoder

# Sample DataFrame
df = pd.DataFrame({'Fruit': ['Apple', 'Banana', 'Orange']})

# Hash Encoding
hash_encoder = HashingEncoder(cols=['Fruit'], n_components=2)  # 2 hash columns
df_hashed = hash_encoder.fit_transform(df)
print(df_hashed)
Output (example):


   col_0  col_1
0  0.45   0.75
1  0.66   0.33
2  0.22   0.88