学習を監視するpython実装
4413 ワード
学習はpythonとその学習経典の実例【美】Pratek Joshi著陶俊傑陳小莉訳1.データプリプロセッシング関連アクションタグ符号化 線形回帰モデル構築コード【テンプレート】
4.(6)回帰精度の算出----回帰器のフィット効果の評価嶺回帰器を作成し最小二乗法でモデリングする際にすべてのデータ点を考慮したが,異常値の存在によりモデルが最適ではないデータもあるため,この問題を回避するために正規化項の係数をバルブ値として導入して異常値を除去する必要があり,この方法は嶺回帰である.
#(1)
import numpy as np
from sklearn import preprocessing
data=np.array([[3,-1.5,2,-5.4],[0,4,-0.3,2.1],[1,3.3,-1.9,-4.3]])
#(2)
data_standardized=preprocessing.scale(data) # 0
print('
Mean=',data_standardized.mean(axis=0))
print('Std deviation=',data_standardized.std(axis=0)) # 0, 1
#(3)
data_scaler=preprocessing.MinMaxScaler(feature_range=(0,1))
data_scaled=data_scaler.fit_transform(data)
print('
Min max scaler data=',data_scaled)
#(4)
data_normalized=preprocessing.normalize(data,norm='l1')
print('
L1 normalized data=',data_normalized)
#(5)
data_binarized=preprocessing.Binarizer(threshold=1.4).transform(data)
print('
Binarized data=',data_binarized)
#(6)
encoder=preprocessing.OneHotEncoder()
encoder.fit([[0,2,1,12],[1,3,5,3],[2,3,2,12],[1,2,4,3]])
encoder_vector=encoder.transform([[2,3,5,3]]).toarray()
print('
Encoded vector=',encoder_vector)
#from sklearn import preprocessing
label_encoder=preprocessing.LabelEncoder() #
input_classes=['audi','ford','audi','toyota','ford','bmw']#
#
label_encoder.fit(input_classes)
print('
Class mapping:')
for i,item in enumerate(label_encoder.classes_):
print(item,'-->',i)
#
labels=['toyota','ford','audi']
encoded_labels=label_encoder.transform(labels)
print('
Labels=',labels)
print('Encoded labels=',list(encoded_labels))
#
encoded_labels=[2,1,1,3]
decoded_labels=label_encoder.inverse_transform(encoded_labels)
print('
Encoded labels=',encoded_labels)
print('Decoded labels',list(decoded_labels))
#3.
#(1)
import sys
import numpy as np
filename=sys.argv[1]
x=[]
y=[]
with open(filename,'r')as f:
for line in f.readlines() :
xt,yt=[float(i) for i in line.split(',')]
x.append(xt)
y.append(yt)
#(2) : ( ) ( )
num_training=int(0.8*len(x)) # 80% , 20%
num_test=len(x)-num_training
#
x_train=np.array(x[:num_training]).reshape((num_training,1))
y_train=np.array(y[:num_training])
#
x_test=np.array(x[num_training:]).reshape((num_test,1))
y_test=np.array(y[num_training:])
#(3) ,
from sklearn import linear_model
#
linear_regressor=linear_model.LinearRegression()
#
linear_regressor.fit(x_train,y_train)
#(4)
import matplot.pyplot as plt
y_train_pred=linear_regressor.predict(x_train)
plt.figure()
plt.scatter(x_train,y_train,color='green')
plt.plot(x_train,y_train_pred,color='black',linewidth=4)
plt.title('Train data')
plt.show()
#
y_test_pred=linear_regressor.predict(x_test)
plt.plot(x_test,y_test_pred,color='black',linewidth=4)
plt.title('Test data')
plt.show()
#(5).
import cPickle as pickle
output_model_file='save_model.pkl' # save_model.pkl
with open(output_model_file,'w') as f:
pickle.dump(linear_regressor,f)
#
with open(output_model_file,'r') as f:
model_linregr=pickle.load(f)
y_test_pred_new=model_linregr.predict(x_test)
# ---- , 4
print("
New mean absolute error=",round(sm.mean_absolute_error(y_test,y_test_pred_new),2))
4.(6)回帰精度の算出----回帰器のフィット効果の評価
# 3 (6)
# , sklearn ,
# , ,
import sklearn.metrics as sm
print("Mean absolute error( ):",round(sm.mean_absolute_error(y_test,y_test_pred),2))
print("Mean squared error( ):",round(sm.mean_squared_error(y_test,y_test_pred),2))
print("Median absolute error( ):",round(sm.median_absolute_error(y_test,y_test_pred),2))
print("Explained variance score( ):",round(sm.explained_variance_score(y_test,y_test_pred),2))
print("R2 score(R2 )",round(sm.r2_score(y_test,y_test_pred),2))