K-meansクラスタリングアルゴリズム擬似コードpython 3コード

29252 ワード

K-meansアルゴリズムとそのコード
  • K-meansアルゴリズム紹介
  • K-means疑似コード
  • K-means pythonコード
  • K-meansアルゴリズムの紹介
    リンク:パターン認識-クラスタリング分析
    K-means擬似コード
  • 2点間の欧風距離の計算
  • 	def calcluate_distance(core: tuple, dot: tuple):
        """
                    
        :param core:      (x,y)    tuple
        :param dot:         (m,n)    tuple
        :return:    dist    float
        """
        #dist         
        return dist
    
  • ポイントに割り当てるべきコアを計算
  • def calculate_cluster(dot: tuple, cores: list):
        """
                       
        :param dot:      
        :param cores:     
        :return:            
        """
        distance_list = []
        for core in cores:
            #      dist
            #   distance   
        min_dist = min(distance_list)
        #      
        put_to_index = distance_list.index(min_dist)
        #         index
        return put_to_index
    
  • 最も近いコアのクラスタに点を割り当てる
  • def put_dot_into_clusters(row_data: list, k: int, cores: list):
        """
                   
        :param cores:
        :param row_data:
        :param k:
        :return:       
        """
        clusters = []
        for each in range(k):
    		#        
        for every_data in row_data:
    		#  every_data index
    		#     every_data     
        return clusters
    
  • 現在のクラスタの次のコアを計算
  • def re_calculate_core(cluster: set):
        """
                   
        :param cluster:
        :return: new_core
        """
        all_x = []	
    	all_y = []
        for each_dot in cluster:
            #       X y
    	#     X Y
        new_core = (round(avg_x, 2), round(avg_y, 2))
    	#      ,       
        return new_core
    
  • 初期化データ点
  • 	for num in range(10):
    		#adot random  , round      
        data_list.append(adot)
    	#  adot data_list 
    

    K-means pythonコード
    IDE: Pyharm Version:Python 3.7.3
    from random import random, sample
    from math import pow
    #Made by       
    #Made by       
    #Made by       
    
    def calcluate_distance(core: tuple, dot: tuple):
       """
                   
       :param core:      (x,y)    tuple
       :param dot:         (m,n)    tuple
       :return:    dist    float
       """
       dist = pow(((dot[0] - core[0]) ** 2 + (dot[1] - core[1]) ** 2), 0.5)
       # if dist == 0:
       #     print("00000000000", dot)
       #      
       return dist
    
    def calculate_cluster(dot: tuple, cores: list):
       """
                      
       :param dot:      
       :param cores:     
       :return:            
       """
       distance_list = []
       for core in cores:
           dist = calcluate_distance(core, dot)
           #      dist
           distance_list.append(dist)
    
       min_dist = min(distance_list)
       #      
       put_to_index = distance_list.index(min_dist)
       #         index
       return put_to_index
    
    def initiation_cores(row_data: list, k: int):
       """
         row_data         
       :param row_data:     
       :param k: k 
       :return:     
       """
       cores = sample(row_data, k)
       #python random.sample()                N      ,       
       return cores
    
    def put_dot_into_clusters(row_data: list, k: int, cores: list):
       """
                  
       :param cores:
       :param row_data:
       :param k:
       :return:       
       """
       clusters = []
       for each in range(k):
           clusters.append(set())
       #set()               
       for every_data in row_data:
           index = calculate_cluster(every_data, cores)
           clusters[index].add(every_data)
       return clusters
    
    def re_calculate_core(cluster: set):
       """
                  
       :param cluster:
       :return:
       """
       all_x = []
       all_y = []
       for each_dot in cluster:
           all_x.append(each_dot[0])
           all_y.append(each_dot[1])
       avg_x = sum(all_x) / len(all_x)
       avg_y = sum(all_y) / len(all_y)
       new_core = (round(avg_x, 2), round(avg_y, 2))
       return new_core
    
    if __name__ == '__main__':
       # if __name__ == 'main':              (           )     , import             
       #  n  
       data_list = []
       #round()        x      
       #round(80.23456, 2) :  80.23
       #random()              ,  [0,1)   
       for num in range(10):
           adot = (round(random() * 20 - 100, 2), round(random() * 20 - 100, 2))
           data_list.append(adot)
    
       for num in range(100):
           adot = (round(random() * 100 + 100, 2), round(random() * 50 + 150, 2))
           data_list.append(adot)
    
       for num in range(50):
           adot = (round(random() * 20, 2), round(random() * 20, 2))
           data_list.append(adot)
    
       for num in range(50):
           adot = (round(random() * 100 + 100, 2), round(random() * 20, 2))
           data_list.append(adot)
    
       for num in range(100):
           adot = (round(random() * 200, 2), round(random() * 200, 2))
           data_list.append(adot)
    
       #   k 
       k = 4
       #       
       my_cores = initiation_cores(data_list, k)
       roundx = 0
       while True:
           roundx += 1
           #   
           cl = put_dot_into_clusters(data_list, k, my_cores)
           new_cores = list()
           for index in range(k):
               new_cores.append(re_calculate_core(cl[index]))
           if new_cores == my_cores:
               break
           else:
               my_cores = new_cores
    
       import matplotlib.pyplot as plt
       colors = ['#0000FF', '#FF0000', '#00FF00', '#666666', '#FFFF00']
       for index in range(k):
           color = colors[index % 5]
           for every_dot in cl[index]:
               plt.scatter(every_dot[0], every_dot[1], c=color, alpha=0.53)
           plt.scatter(my_cores[index][0], my_cores[index][1], marker='+', c='#000000', s=180)
       plt.show()
    
    #Made by