K-meansクラスタリングアルゴリズム擬似コードpython 3コード
29252 ワード
K-meansアルゴリズムとそのコードK-meansアルゴリズム紹介 K-means疑似コード K-means pythonコード K-meansアルゴリズムの紹介
リンク:パターン認識-クラスタリング分析
K-means擬似コード2点間の欧風距離の計算 ポイントに割り当てるべきコアを計算 最も近いコアのクラスタに点を割り当てる 現在のクラスタの次のコアを計算 初期化データ点
K-means pythonコード
IDE: Pyharm Version:Python 3.7.3
リンク:パターン認識-クラスタリング分析
K-means擬似コード
def calcluate_distance(core: tuple, dot: tuple):
"""
:param core: (x,y) tuple
:param dot: (m,n) tuple
:return: dist float
"""
#dist
return dist
def calculate_cluster(dot: tuple, cores: list):
"""
:param dot:
:param cores:
:return:
"""
distance_list = []
for core in cores:
# dist
# distance
min_dist = min(distance_list)
#
put_to_index = distance_list.index(min_dist)
# index
return put_to_index
def put_dot_into_clusters(row_data: list, k: int, cores: list):
"""
:param cores:
:param row_data:
:param k:
:return:
"""
clusters = []
for each in range(k):
#
for every_data in row_data:
# every_data index
# every_data
return clusters
def re_calculate_core(cluster: set):
"""
:param cluster:
:return: new_core
"""
all_x = []
all_y = []
for each_dot in cluster:
# X y
# X Y
new_core = (round(avg_x, 2), round(avg_y, 2))
# ,
return new_core
for num in range(10):
#adot random , round
data_list.append(adot)
# adot data_list
K-means pythonコード
IDE: Pyharm Version:Python 3.7.3
from random import random, sample
from math import pow
#Made by
#Made by
#Made by
def calcluate_distance(core: tuple, dot: tuple):
"""
:param core: (x,y) tuple
:param dot: (m,n) tuple
:return: dist float
"""
dist = pow(((dot[0] - core[0]) ** 2 + (dot[1] - core[1]) ** 2), 0.5)
# if dist == 0:
# print("00000000000", dot)
#
return dist
def calculate_cluster(dot: tuple, cores: list):
"""
:param dot:
:param cores:
:return:
"""
distance_list = []
for core in cores:
dist = calcluate_distance(core, dot)
# dist
distance_list.append(dist)
min_dist = min(distance_list)
#
put_to_index = distance_list.index(min_dist)
# index
return put_to_index
def initiation_cores(row_data: list, k: int):
"""
row_data
:param row_data:
:param k: k
:return:
"""
cores = sample(row_data, k)
#python random.sample() N ,
return cores
def put_dot_into_clusters(row_data: list, k: int, cores: list):
"""
:param cores:
:param row_data:
:param k:
:return:
"""
clusters = []
for each in range(k):
clusters.append(set())
#set()
for every_data in row_data:
index = calculate_cluster(every_data, cores)
clusters[index].add(every_data)
return clusters
def re_calculate_core(cluster: set):
"""
:param cluster:
:return:
"""
all_x = []
all_y = []
for each_dot in cluster:
all_x.append(each_dot[0])
all_y.append(each_dot[1])
avg_x = sum(all_x) / len(all_x)
avg_y = sum(all_y) / len(all_y)
new_core = (round(avg_x, 2), round(avg_y, 2))
return new_core
if __name__ == '__main__':
# if __name__ == 'main': ( ) , import
# n
data_list = []
#round() x
#round(80.23456, 2) : 80.23
#random() , [0,1)
for num in range(10):
adot = (round(random() * 20 - 100, 2), round(random() * 20 - 100, 2))
data_list.append(adot)
for num in range(100):
adot = (round(random() * 100 + 100, 2), round(random() * 50 + 150, 2))
data_list.append(adot)
for num in range(50):
adot = (round(random() * 20, 2), round(random() * 20, 2))
data_list.append(adot)
for num in range(50):
adot = (round(random() * 100 + 100, 2), round(random() * 20, 2))
data_list.append(adot)
for num in range(100):
adot = (round(random() * 200, 2), round(random() * 200, 2))
data_list.append(adot)
# k
k = 4
#
my_cores = initiation_cores(data_list, k)
roundx = 0
while True:
roundx += 1
#
cl = put_dot_into_clusters(data_list, k, my_cores)
new_cores = list()
for index in range(k):
new_cores.append(re_calculate_core(cl[index]))
if new_cores == my_cores:
break
else:
my_cores = new_cores
import matplotlib.pyplot as plt
colors = ['#0000FF', '#FF0000', '#00FF00', '#666666', '#FFFF00']
for index in range(k):
color = colors[index % 5]
for every_dot in cl[index]:
plt.scatter(every_dot[0], every_dot[1], c=color, alpha=0.53)
plt.scatter(my_cores[index][0], my_cores[index][1], marker='+', c='#000000', s=180)
plt.show()
#Made by