sklearnラーニングコード
5696 ワード
from sklearn.ensemble import RandomForestClassifier from sklearn import svm from sklearn.linear_model import LogisticRegression from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble import GradientBoostingClassifier import pandas as pd from numpy import * import types train = pd.read_csv("data/train.csv") test = pd.read_csv("data/test.csv") #train["T2_V12"],_ = pd.factorize(train["T2_V12"]) for i in range(2,len(train.columns),1): if type(train.iloc[0][i]) is types.StringType: train.iloc[:,i],_=pd.factorize(train.iloc[:,i]) for i in range(1,len(test.columns),1): if type(test.iloc[0][i]) is types.StringType: test.iloc[:,i],_=pd.factorize(test.iloc[:,i]) ''' #RandomForest clf = RandomForestClassifier(n_jobs=2) features = train.columns[2:] clf.fit(train[features],train["Hazard"]) result = clf.predict(test[features]) ''' ''' #SVM features = train.columns[2:] clf =svm.SVC(kernel='linear').fit(train[features],train["Hazard"]) result = clf.predict(test[features]) ''' ''' #LogisticRegression features = train.columns[2:] clf = LogisticRegression() clf.fit(train[features],train["Hazard"]) result = clf.predict(test[features]) ''' ''' #RandomForest clf = AdaBoostClassifier(n_estimators=40,learning_rate=0.001) features = train.columns[2:] clf.fit(train[features],train["Hazard"]) result = clf.predict(test[features]) ''' #RandomForest clf = GradientBoostingClassifier(n_estimators=100) features = train.columns[2:] clf.fit(train[features],train["Hazard"]) result = clf.predict(test[features]) test["Hazard"]=result final = test.loc[:,["Id","Hazard"]] final.to_csv("data/out.csv",index=False) print(final)