sklearnのカテゴリフィーチャー回転数値タイプ

2760 ワード

-------------------------           -------------------------------------------------------------------------------
import numpy as np
import pandas as pd

df = pd.DataFrame([['green', 'M', 10.1, 'class1'],
          ['red', 'L', 13.5, 'class2'],
          ['blue', 'XL', 15.3, 'class1']],columns=['color', 'size', 'price', 'classlabel'])
print(df)
   color size  price classlabel
0  green    M   10.1     class1
1    red    L   13.5     class2
2   blue   XL   15.3     class1

size_mapping = {'XL':3, 'L':2, 'M':1}
df['size'] = df['size'].map(size_mapping)
print(df)
   color  size  price classlabel
0  green     1   10.1     class1
1    red     2   13.5     class2
2   blue     3   15.3     class1

Series

##   Series
for idx, label in enumerate(df['classlabel']):
  print(idx, label)
0 class1
1 class2
2 class1

1 LabelEncoder
from sklearn.preprocessing import LabelEncoder

class_le = LabelEncoder()
color_le = LabelEncoder()
df['classlabel'] = class_le.fit_transform(df['classlabel'].values)
df['color'] = color_le.fit_transform(df['color'].values)
print(df)
 color  size  price  classlabel
0      0     1   10.1           0
1      1     2   13.5           1
2      0     3   15.3           0

2.

class_mapping = {label: idx for idx, label in enumerate(np.unique(df['classlabel']))}
df['classlabel'] = df['classlabel'].map(class_mapping)
print('2,', df)
2,    color size  price  classlabel
0  green    M   10.1           0
1    red    L   13.5           1
2  green   XL   15.3           0

3.one-hot

pf = pd.get_dummies(df[['color']])
df = pd.concat([df, pf], axis=1)
df.drop(['color'], axis=1, inplace=True)
print(df)
   size  price classlabel  color_green  color_red
0    M   10.1     class1            1          0
1    L   13.5     class2            0          1
2   XL   15.3     class1            1          0