マシン学習/意思決定ツリー


import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
iris = load_iris() 
X = iris.data[:,2:] # petal length, width
y = iris.target
tree_clf = DecisionTreeClassifier(max_depth=2, random_state=42)
tree_clf.fit(X, y)
DecisionTreeClassifier(max_depth=2, random_state=42)
from sklearn.tree import export_graphviz
export_graphviz(tree_clf, 
                out_file="iris_tree.dot", 
                class_names=iris.target_names,
                feature_names = iris.feature_names[2:],
                rounded = True,
                filled = True
               )
import graphviz
with open("iris_tree.dot") as f:
    dot_graph = f.read()
graphviz.Source(dot_graph)

トレーニングはコスト関数を最小限に抑える
  • 線形回帰(予測):MSE
  • ポインタ(分類):ログ喪失
  • ソフトMax回帰(予測):クロスエントロピー
  • 決定木:CARTコスト関数[特性、閾値]
  • SVM:シャフトロス
  • tree_clf.predict_proba([[5, 1.5]])
    array([[0.        , 0.90740741, 0.09259259]])
    tree_clf.predict([[5, 1.5]])
    array([1])
    tree_clf.feature_importances_
    array([0.56199095, 0.43800905])
    iris.feature_names[2:]
    ['petal length (cm)', 'petal width (cm)']
  • 回帰木
  • np.random.seed(42)
    m = 200
    X = np.random.rand(m, 1)
    y = 4 * (X-0.5) ** 2
    y = y  + np.random.rand(m,1) / 10
    from sklearn.tree import DecisionTreeRegressor
    tree_reg = DecisionTreeRegressor(max_depth=2, random_state=42)
    tree_reg.fit(X, y)
    DecisionTreeRegressor(max_depth=2, random_state=42)
    export_graphviz(
        tree_reg,
        out_file="regression_tree.dot",
        feature_names = ["x1"], 
        rounded=True, 
        filled=True)
    import graphviz
    with open("regression_tree.dot") as f:
        dot_graph = f.read()
        
    graphviz.Source(dot_graph)    
    tree_reg.predict([[0.6]])
    array([0.22015219])