其他分享
首页 > 其他分享> > K-means图像聚类

K-means图像聚类

作者:互联网

图像聚类,将一堆各种各样原始图像文件中,通过算法模型进行图片特征提取,然后采用聚类算法对特征进行聚类,将相似的图片进行分组归为一类。这里介绍K-means算法对特征进行聚类,可应用于测试数据的清洗、数据的搜索。

特征提取

from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_inputimport numpy as np

from numpy import linalg as LA
import io
from PIL import Image

model = VGG16(weights='imagenet', pooling='max', include_top=False)

img_path = 'test.jpg'
# 加载图像有两种方式,区别图像来源是本地的还是网络传输的bytes
# 方式一
img = image.load_img(img_path, target_size=(224, 224))    # 加载图像,对象为PIL Image实例
# 方式二
img_bytes = open(img_path, 'rb').read()
img = Image.open(io.BytesIO(img_bytes))
img = img.convert('RGB')
img = img.resize((224, 224), Image.NEAREST)

def RemoveBlackEdge(img):
    """移除图片黑边,防止无用的黑边进行的干扰
    Args:
        img: PIL image 实例
    Returns:
        PIL image 实例
    """
    width = img.width
    img = image.img_to_array(img)
    img_without_black = img[~np.all(img == np.zeros((1, width, 3), np.uint8), axis=(1, 2))]
    img = image.array_to_img(img_without_black)
    return img
img = RemoveBlackEdge(img)

x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
features = model.predict(x)    # 这里提取出来的 feature 就是特性向量。
norm_feat = feat[0]/LA.norm(feat[0])    # 方便后续操作,将特征向量归一化处理

图片聚类

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans 
import pandas as pd 
import numpy as np 

# 特征长度512
features = [ 
    [],
    []
]
# 图像名称
names = ['', ''] 

df = pd.DataFrame(features) 
name_df = pd.DataFrame(names) 
samples = df.values

kmeans=KMeans(n_clusters=3)    # n_clusters分组数量
kmeans.fit(samples)    # 训练模型 
labels=kmeans.predict(samples)    # 预测 
name_df[labels==1]

测试脚本

from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
import os 
import numpy as np
import pylab
import io
from PIL import Image
from numpy import linalg as LA
from sklearn.cluster import KMeans


test_path = '/content/img'
model = VGG16(weights='imagenet', pooling='max',include_top=False)  #这里也可以使用自己的数据集进行训练

def RemoveBlackEdge(img):
    """移除图片黑边,防止无用的黑边进行的干扰
    Args:
        img: PIL image 实例
    Returns:
        PIL image 实例
    """
    width = img.width
    img = image.img_to_array(img)
    img_without_black = img[~np.all(img == np.zeros((1, width, 3), np.uint8), axis=(1, 2))]
    img = image.array_to_img(img_without_black)
    return img

def get_image_feature(path):
    img = Image.open(path)
    img = img.convert('RGB')
    img = img.resize((224,224), Image.NEAREST)
    img = RemoveBlackEdge(img)    # 移除黑边
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features[0]


feature_ls = []
names = []
for name in os.listdir(test_path):
    features = get_image_feature(os.path.join(test_path, name))
    #特征归一化
    vec = features/LA.norm(features)
    # print(name,"==", vec.tolist())
    names.append(name)
    feature_ls.append(vec.tolist())
    


df = pd.DataFrame(feature_ls)
names_df = pd.DataFrame(names)
samples=df.values 
kmeans=KMeans(n_clusters=3)
kmeans.fit(samples) # 训练模型
labels=kmeans.predict(samples)  # 预测
print(labels, names_df[labels==0] ) 
print(labels, names_df[labels==1] ) 
print(labels, names_df[labels==2] ) 

案例运行

google colab代码
https://colab.research.google.com/drive/1XkdM5Qvioysdn3zUPr2N2a_aQGX3Ifhw?usp=sharing

标签:labels,img,means,df,image,聚类,图像,np,import
来源: https://www.cnblogs.com/mcboy/p/16249480.html