机器学习--kmeans的基本实现
作者:互联网
# _*_ coding:utf-8 _*_
import numpy as np
def loadDataset():
a = np.array([(3, 4), (3, 6), (7, 3), (4, 7), (3, 8), (8, 5), (4, 5), (4, 1), (7, 4), (5, 5)]).astype(np.float)
return a
def initCenter(dataset,k):
index = np.random.choice(len(dataset),k,replace=False)
# print(dataset[index])
return dataset[index]
def cal_dis(a,b):
return np.sum((a-b)**2)**0.5
def kmeans(dataset,k):
centers = initCenter(dataset,k)
m = dataset.shape[0]
clusters = np.full(m,np.nan)
flag = True
while(flag):
flag=False
for i in range(len(dataset)):
mini_dst,index = 9999,-1
for j in range(len(centers)):
dst = cal_dis(dataset[i],centers[j])
#判断最小距离是否发生变化
if dst<mini_dst:
mini_dst = dst
index = j
if clusters[i]!=index:
clusters[i]=index
flag =True
#更新centers
print(np.where(clusters==1))
for i in range(len(centers)):
centers[i] = np.mean(dataset[np.where(clusters==i)])
return clusters,centers
if __name__ == '__main__':
dataset = loadDataset()
clusters,centers = kmeans(dataset,3)
print(centers,clusters)
标签:index,机器,kmeans,dataset,学习,np,return,centers,def 来源: https://blog.csdn.net/Haiqiang1995/article/details/91353598