编程语言
首页 > 编程语言> > K-Means聚类算法k值选取——轮廓系数

K-Means聚类算法k值选取——轮廓系数

作者:互联网

 1 import matplotlib.pyplot as plt
 2 from sklearn.datasets import make_blobs
 3 from sklearn.cluster import KMeans
 4 from sklearn.metrics import calinski_harabasz_score, silhouette_score
 5 from matplotlib.font_manager import FontProperties
 6 
 7 font_set = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=15)
 8 with open('result.csv', 'r', encoding='GBK') as f:
 9     results = f.readlines()[1:24963]
10     # res = []
11     # for it in results:
12     #     tmp = it.split(',')[1:]
13     #     tmp = list(map(lambda x: float(x), tmp))
14     #     res.append(it)
15     X = [list(map(lambda x: float(x), it.split(',')[1:])) for it in results]
16 
17 # # 1、模型训练
18 # y_pre = KMeans(n_clusters=clu_num).fit_predict(X)
19 
20 
21 # 1、创建空列表
22 sc_list = []
23 
24 # 2、设置中心点个数,查看SC的变化范围
25 for clu_num in range(2, 10):
26     # 初始化迭代器一次的Kmeans
27     my_kmeans = KMeans(n_clusters=clu_num, max_iter=100, random_state=0)
28     # 模型训练
29     my_kmeans.fit(X)
30     y_pre = my_kmeans.fit_predict(X)
31 
32     # 将SC的每一次迭代结果添加到空列表内
33     sc_list.append(silhouette_score(X, y_pre))
34 
35 # 3、图像可视化
36 plt.figure(figsize=(20, 8), dpi=100)
37 plt.scatter(range(2, 10), sc_list)
38 plt.plot(range(2, 10), sc_list)
39 x_ticks = range(1, 10, 1)
40 plt.xticks(x_ticks)
41 plt.xlabel('中心点个数', fontproperties=font_set)
42 plt.ylabel('轮廓系数', fontproperties=font_set)
43 plt.grid()
44 plt.show()

 

标签:10,plt,Means,list,range,算法,聚类,import,font
来源: https://www.cnblogs.com/dabaiX/p/16274118.html