機械学習-精度と再現率


ジルコニウム精度と再現率の交換

  • 制御精度と再生
  • 決定閾値は調整可能であるが、二つは相補的な評価指標であり、一つは上昇し、もう一つは下降する.
  • # 데이터 읽기
    import pandas as pd
    # 와인 통합 데이터
    wine = pd.read_csv('wine.csv', sep=',', index_col=0)
    wine['taste'] = [1. if grade > 5 else 0. for grade in wine['quality']]
    
    X = wine.drop(['taste', 'quality'], axis=1)
    y = wine['taste']
    # 데이터 분리
    from sklearn.model_selection import train_test_split
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)
    # Logistic Regression
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import accuracy_score
    
    lr = LogisticRegression(solver='liblinear', random_state=13)
    lr.fit(X_train, y_train)
    
    y_pred_tr = lr.predict(X_train)
    y_pred_test = lr.predict(X_test)
    
    print("Train Acc :", accuracy_score(y_train, y_pred_tr))
    print("Test Acc :", accuracy_score(y_test, y_pred_test))

  • classification_report:評価指標が一目瞭然.
  • # classification_report
    from sklearn.metrics import classification_report
    
    print(classification_report(y_test, lr.predict(X_test)))

  • confusion matrix:予測値と実績値の比較表
    positivenegativepositiveTPFNnegativeFPTN
  • # confusion matrix
    from sklearn.metrics import confusion_matrix
    
    print(confusion_matrix(y_test, lr.predict(X_test)))

  • precision_recall curve:PrecisionとRecallのCurveを追加
  • # precision_recall curve
    import matplotlib.pyplot as plt
    import set_matplotlib_korean
    from sklearn.metrics import precision_recall_curve
    
    plt.figure(figsize=(10, 8))
    pred = lr.predict_proba(X_test)[:, 1]
    precisions, recalls, thresholds = precision_recall_curve(y_test, pred)
    plt.plot(thresholds, precisions[:len(thresholds)], label='precision')
    plt.plot(thresholds, recalls[:len(thresholds)], label='recall')
    plt.grid()
    plt.legend()
    plt.show()

  • 基本状況threshold = 0.5羅山
  • # 예측 확률과 값 연결
    import numpy as np
    
    pred_proba = lr.predict_proba(X_test)
    np.concatenate([pred_proba, y_pred_test.reshape(-1, 1)], axis=1)

    # threshold 값 변경하기 - Binarizer
    from sklearn.preprocessing import Binarizer
    
    binarizer = Binarizer(threshold=0.6).fit(pred_proba)
    pred_bin = binarizer.transform(pred_proba)[:, 1]
    
    binarizer.threshold, pred_bin

    # classification_report 재확인
    from sklearn.metrics import classification_report
    
    print(classification_report(y_test, pred_bin))