簡易K-Meansクラスタリングアルゴリズム(Python)
5613 ワード
# -*- coding: utf-8 -*-
__author__ = 'Zhao'
import math
from decimal import getcontext
getcontext().prec = 4
origin_set = []
origin_set = input("please input a set which is consist of numbers:
").split(" ")
k = int(input("please input k:
"))
origin_set = [float(item) for item in origin_set]
step = (len(origin_set) - 0) / k
centroid = []
temp = 0
while temp < len(origin_set):
centroid.append(origin_set[math.trunc(temp)])
temp = temp + step
print("original centroids: ", centroid, "
")
class_i = [[] for i in range(len(centroid))]
# class_i is the null class for k centroid
flag = 1
number = 0
# sign if k never change
while flag == 1:
number += 1
flag = 0
class_i = [[] for i in range(len(centroid))]
# class_i is the null class for k centroid
for i in range(0, len(origin_set)):
distance = float("inf")
centroid_in_choose = 0
for j in range(0, len(centroid)):
if abs(origin_set[i] - centroid[j]) < distance:
distance = abs(origin_set[i] - centroid[j])
centroid_in_choose = j
class_i[centroid_in_choose].append(origin_set[i])
# sort all the elements into proper class
print("after %sth cluster: " % number, "
")
print("centroid class")
for i in range(0, len(class_i)):
print(centroid[i], ' ', class_i[i])
print("---------")
for i in range(0, len(class_i)):
sum = 0
for j in range(0, len(class_i[i])):
sum += class_i[i][j]
if sum != 0:
new_centroid = round(sum / len(class_i[i]), 3)
else:
continue
if new_centroid != centroid[i]:
print("change centroid ", centroid[i], "as ", end="")
centroid[i] = new_centroid
print(centroid[i])
flag = 1
print("---------")
# change the wrong centroid
print("THE CONCLUSION IS:")
print("centroid class")
for i in range(0, len(class_i)):
print(centroid[i], ' ', [int(element) for element in class_i[i]])