Unsupervised learner--k-Nearest Neighbor
2354 ワード
K最近接(k-Nearest Neighbor,KNN)分類アルゴリズム
導入背景
げんり
ステップ
長所と短所
Python実現
# === ( KNN )====
def classify0(x, dataset, labels, k):
dataset_size = dataset.shape[0] # shape[0] stands for the num of row
# step 1: calculate Euclidean distance
# tile(A, reps): Construct an array by repeating A reps times
# the following copy numSamples rows for dataSet
diff_mat = np.tile(x, (dataset_size, 1)) - dataset
sq_diff_mat = diff_mat ** 2
sq_distances = sq_diff_mat.sum(axis=1)
distances = sq_distances ** 0.5
# step 2: sort the distance
# argsort() returns the indices that would sort an array in a ascending order
sorted_dist_indicies = distances.argsort()
class_count = {}
for i in range(k):
# step 3: choose the min k distance
voted_label = labels[sorted_dist_indicies[i]]
# step 4: count the times labels occur
# when the key voteLabel is not in dictionary classCount, get()
# will return 0
class_count[voted_label] = class_count.get(voted_label, 0) + 1
# step 5: the max voted class will return
# Python 2.x `class_count.iteritems()
sorted_class_count = sorted(class_count.iteritems(),
key=operator.itemgetter(1),
reverse=True)
return sorted_class_count[0][0]