K-meansクラスタリングアルゴリズム擬似コードpython 3コード

29252 ワード

パターン認識

K-meansアルゴリズムとそのコード

K-meansアルゴリズム紹介

K-means疑似コード

K-means pythonコード

K-meansアルゴリズムの紹介
リンク:パターン認識-クラスタリング分析
K-means擬似コード

2点間の欧風距離の計算

	def calcluate_distance(core: tuple, dot: tuple):
    """
                
    :param core:      (x,y)    tuple
    :param dot:         (m,n)    tuple
    :return:    dist    float
    """
    #dist         
    return dist

ポイントに割り当てるべきコアを計算

def calculate_cluster(dot: tuple, cores: list):
    """
                   
    :param dot:      
    :param cores:     
    :return:            
    """
    distance_list = []
    for core in cores:
        #      dist
        #   distance   
    min_dist = min(distance_list)
    #      
    put_to_index = distance_list.index(min_dist)
    #         index
    return put_to_index

最も近いコアのクラスタに点を割り当てる

def put_dot_into_clusters(row_data: list, k: int, cores: list):
    """
               
    :param cores:
    :param row_data:
    :param k:
    :return:       
    """
    clusters = []
    for each in range(k):
		#        
    for every_data in row_data:
		#  every_data index
		#     every_data     
    return clusters

現在のクラスタの次のコアを計算

def re_calculate_core(cluster: set):
    """
               
    :param cluster:
    :return: new_core
    """
    all_x = []	
	all_y = []
    for each_dot in cluster:
        #       X y
	#     X Y
    new_core = (round(avg_x, 2), round(avg_y, 2))
	#      ，       
    return new_core

初期化データ点

	for num in range(10):
		#adot random  ， round      
    data_list.append(adot)
	#  adot data_list

K-means pythonコード
IDE: Pyharm Version:Python 3.7.3

from random import random, sample
from math import pow
#Made by       
#Made by       
#Made by       

def calcluate_distance(core: tuple, dot: tuple):
   """
               
   :param core:      (x,y)    tuple
   :param dot:         (m,n)    tuple
   :return:    dist    float
   """
   dist = pow(((dot[0] - core[0]) ** 2 + (dot[1] - core[1]) ** 2), 0.5)
   # if dist == 0:
   #     print("00000000000", dot)
   #      
   return dist

def calculate_cluster(dot: tuple, cores: list):
   """
                  
   :param dot:      
   :param cores:     
   :return:            
   """
   distance_list = []
   for core in cores:
       dist = calcluate_distance(core, dot)
       #      dist
       distance_list.append(dist)

   min_dist = min(distance_list)
   #      
   put_to_index = distance_list.index(min_dist)
   #         index
   return put_to_index

def initiation_cores(row_data: list, k: int):
   """
     row_data         
   :param row_data:     
   :param k: k 
   :return:     
   """
   cores = sample(row_data, k)
   #python random.sample()                N      ，       
   return cores

def put_dot_into_clusters(row_data: list, k: int, cores: list):
   """
              
   :param cores:
   :param row_data:
   :param k:
   :return:       
   """
   clusters = []
   for each in range(k):
       clusters.append(set())
   #set()               
   for every_data in row_data:
       index = calculate_cluster(every_data, cores)
       clusters[index].add(every_data)
   return clusters

def re_calculate_core(cluster: set):
   """
              
   :param cluster:
   :return:
   """
   all_x = []
   all_y = []
   for each_dot in cluster:
       all_x.append(each_dot[0])
       all_y.append(each_dot[1])
   avg_x = sum(all_x) / len(all_x)
   avg_y = sum(all_y) / len(all_y)
   new_core = (round(avg_x, 2), round(avg_y, 2))
   return new_core

if __name__ == '__main__':
   # if __name__ == 'main':              （           ）     ， import             
   #  n  
   data_list = []
   #round()        x      
   #round(80.23456, 2) :  80.23
   #random()              ，  [0,1)   
   for num in range(10):
       adot = (round(random() * 20 - 100, 2), round(random() * 20 - 100, 2))
       data_list.append(adot)

   for num in range(100):
       adot = (round(random() * 100 + 100, 2), round(random() * 50 + 150, 2))
       data_list.append(adot)

   for num in range(50):
       adot = (round(random() * 20, 2), round(random() * 20, 2))
       data_list.append(adot)

   for num in range(50):
       adot = (round(random() * 100 + 100, 2), round(random() * 20, 2))
       data_list.append(adot)

   for num in range(100):
       adot = (round(random() * 200, 2), round(random() * 200, 2))
       data_list.append(adot)

   #   k 
   k = 4
   #       
   my_cores = initiation_cores(data_list, k)
   roundx = 0
   while True:
       roundx += 1
       #   
       cl = put_dot_into_clusters(data_list, k, my_cores)
       new_cores = list()
       for index in range(k):
           new_cores.append(re_calculate_core(cl[index]))
       if new_cores == my_cores:
           break
       else:
           my_cores = new_cores

   import matplotlib.pyplot as plt
   colors = ['#0000FF', '#FF0000', '#00FF00', '#666666', '#FFFF00']
   for index in range(k):
       color = colors[index % 5]
       for every_dot in cl[index]:
           plt.scatter(every_dot[0], every_dot[1], c=color, alpha=0.53)
       plt.scatter(my_cores[index][0], my_cores[index][1], marker='+', c='#000000', s=180)
   plt.show()

#Made by

hdu 5366 The mook jong動的計画(BC 50 C題)

最大ストリーム問題プリフロー推進アルゴリズム(BFS最適化)