優達機械学習:フィーチャースケール
4039 ワード
練習:最小/最大再スケーラエンコーディング
import numpy as np
def featureScaling(arr):
arr = np.array(arr)
max = np.max(arr)
min = np.min(arr)
res = []
for item in arr:
data = float(item-min)/(max-min)
res.append(data)
return res
# tests of your feature scaler--line below is input data
data = [115, 140, 175]
print featureScaling(data)
練習:再スケールが必要なアルゴリズムの練習
練習:スケールタイプ
練習:再スケールフィーチャーの計算
import numpy as np
stocklist = []
for item in data_dict:
stock = data_dict[item]['exercised_stock_options']
if stock != 'NaN':
stocklist.append( stock )
stocklist = np.array(stocklist)
print (1000000.0 - np.min(stocklist)) / (np.max(stocklist) - np.min(stocklist))
salarylist = []
for item in data_dict:
salary = data_dict[item]['salary']
if salary != 'NaN':
salarylist.append( salary )
print (200000.0 - np.min(salarylist)) / (np.max(salarylist) - np.min(salarylist))
不思議なことに、sklearnの中のMinMaxScalerのスケール率を使って計算すると誤差があり、コードは以下の通りです.
import numpy as np
stocklist = []
for item in data_dict:
stock = data_dict[item]['exercised_stock_options']
if stock != 'NaN':
stocklist.append( stock )
stocklist = np.array(stocklist)
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
min_max_scaler.fit_transform(stocklist)
print 1000000 * min_max_scaler.scale_
salarylist = []
for item in data_dict:
salary = data_dict[item]['salary']
if salary != 'NaN':
salarylist.append( salary )
salarylist = np.array(salarylist)
min_max_scaler.fit_transform(salarylist)
print 200000 * min_max_scaler.scale_
print np.max(salarylist)
print np.min(salarylist)