【1週間アルゴリズム実践合宿】【モデル構築】baseline

3163 ワード

データの読み込み

import pandas as pd
data_all = pd.read_csv('data_all.csv',encoding='gbk')  # encoding='gbk'  

データセットの分割

#  
from sklearn.model_selection import train_test_split
features = [x for x in data_all.columns if x not in ['status']]
X = data_all[features]
y = data_all['status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=2018)

モデルの構築

#  
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state =2018)
lr.fit(X_train, y_train)

モデルスコア

lr.score(X_test,y_test)
# 0.7484232655921513