sklearnラーニングコード

5696 ワード

 
  
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
import pandas as pd
from numpy import *
import types

train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")
#train["T2_V12"],_ = pd.factorize(train["T2_V12"])
for i in range(2,len(train.columns),1):
    if type(train.iloc[0][i]) is types.StringType:
        train.iloc[:,i],_=pd.factorize(train.iloc[:,i])
for i in range(1,len(test.columns),1):
    if type(test.iloc[0][i]) is types.StringType:
        test.iloc[:,i],_=pd.factorize(test.iloc[:,i])
'''
#RandomForest
clf = RandomForestClassifier(n_jobs=2)
features = train.columns[2:]
clf.fit(train[features],train["Hazard"])
result = clf.predict(test[features])
'''
'''
#SVM
features = train.columns[2:]
clf =svm.SVC(kernel='linear').fit(train[features],train["Hazard"])
result = clf.predict(test[features])
'''
'''
#LogisticRegression
features = train.columns[2:]
clf = LogisticRegression()
clf.fit(train[features],train["Hazard"])
result = clf.predict(test[features])
'''
'''
#RandomForest
clf = AdaBoostClassifier(n_estimators=40,learning_rate=0.001)
features = train.columns[2:]
clf.fit(train[features],train["Hazard"])
result = clf.predict(test[features])
'''
#RandomForest
clf = GradientBoostingClassifier(n_estimators=100)
features = train.columns[2:]
clf.fit(train[features],train["Hazard"])
result = clf.predict(test[features])
test["Hazard"]=result
final = test.loc[:,["Id","Hazard"]]
final.to_csv("data/out.csv",index=False)
print(final)