【1週間アルゴリズム実践合宿】【モデル構築】baseline
3163 ワード
データの読み込み
import pandas as pd
data_all = pd.read_csv('data_all.csv',encoding='gbk') # encoding='gbk'
データセットの分割
#
from sklearn.model_selection import train_test_split
features = [x for x in data_all.columns if x not in ['status']]
X = data_all[features]
y = data_all['status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=2018)
モデルの構築
#
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state =2018)
lr.fit(X_train, y_train)
モデルスコア
lr.score(X_test,y_test)
# 0.7484232655921513