PythonのK-means詳細ケース


#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
# from __future__ import division                                                                                                
import requests
import json
import numpy as np
import pandas as pd
import sys
from sklearn.cluster import KMeans

reload(sys)
sys.setdefaultencoding('utf-8')


#      
def normalization(one_list):
    """[0,1] normaliaztion"""
    norm_value = (one_list - np.min(one_list)) / (np.max(one_list) - np.min(one_list))
    return norm_value


#           , DataFrame 
def get_dt(URL):
    api = requests.get(url=URL)
    try:
        json_dt = api.json()
    except Exception as e:
        print str(e)
    if json_dt['code'] == 0:
        load1_info = json_dt['data']['sysload']['load1']['dps']
        load1 = [load1_xi[-1] for load1_xi in load1_info]

        sysio_info = json_dt['data']['sysiops']['sda']['ioutil']['dps']
        ioutil = [ioutil_xi[-1] for ioutil_xi in sysio_info]

        memused_info = json_dt['data']['sysmeminfo']['memused_percentage']['dps']
        memused = [memused_xi[-1] for memused_xi in memused_info]  

        cpuused_info = json_dt['data']['syscpuidle']['cpu']['cpuwa']['dps']
        cpuused = [cpuused_xi[-1] for cpuused_xi in cpuused_info]   
        # dt = pd.DataFrame([load1, ioutil, memused, cpuused])
        dt = pd.DataFrame([normalization(load1), 
        normalization(ioutil), normalization(memused), 
        normalization(cpuused)])
        dt = dt.T
        dt.columns = ['load1', 'ioutil', 'memused', 'cpuused']
    else:
        pass
    return dt



#   
if __name__ == '__main__':
    URL = '       '
    dt = get_dt(URL)
    print dt.head(100)

    clf_kmeans = KMeans(n_clusters=3, random_state=10).fit(dt)
    dt['tag_col']=clf_kmeans.labels_
    df_count_type=dt.groupby('tag_col').apply(np.size)
    
    

    ##       
    print df_count_type
    ##    
    print clf_kmeans.cluster_centers_

    #          
    type0 = dt[(clf_kmeans.labels_ == 0)]
    type1 = dt[(clf_kmeans.labels_ == 1)]
    type2 = dt[(clf_kmeans.labels_ == 2)]

    print 'type0', type0 
    print 'type1', type1
    print 'type2', type2