K-Means聚类算法k值选取——轮廓系数
作者:互联网
1 import matplotlib.pyplot as plt 2 from sklearn.datasets import make_blobs 3 from sklearn.cluster import KMeans 4 from sklearn.metrics import calinski_harabasz_score, silhouette_score 5 from matplotlib.font_manager import FontProperties 6 7 font_set = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=15) 8 with open('result.csv', 'r', encoding='GBK') as f: 9 results = f.readlines()[1:24963] 10 # res = [] 11 # for it in results: 12 # tmp = it.split(',')[1:] 13 # tmp = list(map(lambda x: float(x), tmp)) 14 # res.append(it) 15 X = [list(map(lambda x: float(x), it.split(',')[1:])) for it in results] 16 17 # # 1、模型训练 18 # y_pre = KMeans(n_clusters=clu_num).fit_predict(X) 19 20 21 # 1、创建空列表 22 sc_list = [] 23 24 # 2、设置中心点个数,查看SC的变化范围 25 for clu_num in range(2, 10): 26 # 初始化迭代器一次的Kmeans 27 my_kmeans = KMeans(n_clusters=clu_num, max_iter=100, random_state=0) 28 # 模型训练 29 my_kmeans.fit(X) 30 y_pre = my_kmeans.fit_predict(X) 31 32 # 将SC的每一次迭代结果添加到空列表内 33 sc_list.append(silhouette_score(X, y_pre)) 34 35 # 3、图像可视化 36 plt.figure(figsize=(20, 8), dpi=100) 37 plt.scatter(range(2, 10), sc_list) 38 plt.plot(range(2, 10), sc_list) 39 x_ticks = range(1, 10, 1) 40 plt.xticks(x_ticks) 41 plt.xlabel('中心点个数', fontproperties=font_set) 42 plt.ylabel('轮廓系数', fontproperties=font_set) 43 plt.grid() 44 plt.show()
标签:10,plt,Means,list,range,算法,聚类,import,font 来源: https://www.cnblogs.com/dabaiX/p/16274118.html