学習を監視するpython実装

4413 ワード

学習はpythonとその学習経典の実例【美】Pratek Joshi著陶俊傑陳小莉訳1.データプリプロセッシング関連アクション
#(1)    
import numpy as np
from sklearn import preprocessing
data=np.array([[3,-1.5,2,-5.4],[0,4,-0.3,2.1],[1,3.3,-1.9,-4.3]])

#(2)    
data_standardized=preprocessing.scale(data)  #          0           
print('
Mean=',data_standardized.mean(axis=0)) print('Std deviation=',data_standardized.std(axis=0)) # 0, 1 #(3) data_scaler=preprocessing.MinMaxScaler(feature_range=(0,1)) data_scaled=data_scaler.fit_transform(data) print('
Min max scaler data=',data_scaled) #(4) data_normalized=preprocessing.normalize(data,norm='l1') print('
L1 normalized data=',data_normalized) #(5) data_binarized=preprocessing.Binarizer(threshold=1.4).transform(data) print('
Binarized data=',data_binarized) #(6) encoder=preprocessing.OneHotEncoder() encoder.fit([[0,2,1,12],[1,3,5,3],[2,3,2,12],[1,2,4,3]]) encoder_vector=encoder.transform([[2,3,5,3]]).toarray() print('
Encoded vector=',encoder_vector)
  • タグ符号化
  • #from sklearn import preprocessing
    label_encoder=preprocessing.LabelEncoder()  #     
    input_classes=['audi','ford','audi','toyota','ford','bmw']#      
    #        
    label_encoder.fit(input_classes)
    print('
    Class mapping:') for i,item in enumerate(label_encoder.classes_): print(item,'-->',i) # labels=['toyota','ford','audi'] encoded_labels=label_encoder.transform(labels) print('
    Labels=',labels) print('Encoded labels=',list(encoded_labels)) # encoded_labels=[2,1,1,3] decoded_labels=label_encoder.inverse_transform(encoded_labels) print('
    Encoded labels=',encoded_labels) print('Decoded labels',list(decoded_labels))
  • 線形回帰モデル構築コード【テンプレート】
  • #3.        
    #(1)    
    import sys
    import numpy as np
    filename=sys.argv[1]
    x=[]
    y=[]
    with open(filename,'r')as f:
        for line in f.readlines() :
            xt,yt=[float(i) for i in line.split(',')]
            x.append(xt)
            y.append(yt)
    
    #(2)       :   (    )    (    )
    num_training=int(0.8*len(x)) # 80%       , 20%          
    num_test=len(x)-num_training
    
     #    
    x_train=np.array(x[:num_training]).reshape((num_training,1))
    y_train=np.array(y[:num_training])
    
     #    
    x_test=np.array(x[num_training:]).reshape((num_test,1))
    y_test=np.array(y[num_training:])
    
     #(3)            ,          
    from sklearn import linear_model
     #        
    linear_regressor=linear_model.LinearRegression()
     #        
    linear_regressor.fit(x_train,y_train)
    
     #(4)     
    import matplot.pyplot as plt
    y_train_pred=linear_regressor.predict(x_train)
    plt.figure()
    plt.scatter(x_train,y_train,color='green')
    plt.plot(x_train,y_train_pred,color='black',linewidth=4)
    plt.title('Train data')
    plt.show()
    
     #            
    y_test_pred=linear_regressor.predict(x_test)
    plt.plot(x_test,y_test_pred,color='black',linewidth=4)
    plt.title('Test data')
    plt.show()
    
    #(5).       
    import cPickle as pickle
    output_model_file='save_model.pkl' #   save_model.pkl  
    with open(output_model_file,'w') as f:
        pickle.dump(linear_regressor,f)
        
    #       
    with open(output_model_file,'r') as f:
        model_linregr=pickle.load(f)
        
    y_test_pred_new=model_linregr.predict(x_test)
    #            ----          ,        4 
    print("
    New mean absolute error=",round(sm.mean_absolute_error(y_test,y_test_pred_new),2))

    4.(6)回帰精度の算出----回帰器のフィット効果の評価
    #      3  (6)         
    #               ,      sklearn               ,
    #                ,         ,       
    
    import sklearn.metrics as sm
    print("Mean absolute error(      ):",round(sm.mean_absolute_error(y_test,y_test_pred),2))
    print("Mean squared error(    ):",round(sm.mean_squared_error(y_test,y_test_pred),2))
    print("Median absolute error(       ):",round(sm.median_absolute_error(y_test,y_test_pred),2))
    print("Explained variance score(     ):",round(sm.explained_variance_score(y_test,y_test_pred),2))
    print("R2 score(R2   )",round(sm.r2_score(y_test,y_test_pred),2))
    
  • 嶺回帰器を作成し最小二乗法でモデリングする際にすべてのデータ点を考慮したが,異常値の存在によりモデルが最適ではないデータもあるため,この問題を回避するために正規化項の係数をバルブ値として導入して異常値を除去する必要があり,この方法は嶺回帰である.