Pythonモデリング共通コード

6879 ワード

レコードの整理、不定期更新
評価指標
from scipy.stats import ks_2samp
ks_value = lambda y_pred,y_true: ks_2samp(y_pred[y_true==1], y_pred[y_true!=1]).statistic
from sklearn import metrics
print 'AUC: %.4f' % metrics.roc_auc_score(test_y,y_pred)
print 'ACCURACY: %.4f' % metrics.accuracy_score(test_y,y_pred_binary)
print 'Recall: %.4f' % metrics.recall_score(test_y,y_pred_binary)
print 'F1-score: %.4f' %metrics.f1_score(test_y,y_pred_binary)
print 'Precesion: %.4f' %metrics.precision_score(test_y,y_pred_binary)
print metrics.confusion_matrix(test_y,y_pred_binary)

モデルストレージと読み取り
import pickle
pickle.dump(model,open("model.txt","wb"))
model = pickle.load(open("model.txt","rb"))

Pythonはshellを呼び出してSQLを実行する
#!/usr/bin/python
#-*-coding:utf-8 -*-

import subprocess
import traceback

sql = """
sql  
"""
cmd = 'hive -e """'+sql.replace('"', "\'")+'"""'
print cmd
try:
    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
    while True:
        buff = p.stdout.readline()
        print buff
        if buff == '' and p.poll() != None:
            break

except Exception,re:
    print "message is:%s" %(str(re))
    traceback.print_exc();