Python图像识别（聚类）

 1 # -*- coding: utf-8 -*-
 2 """
 3 Created on Fri Sep 21 15:37:26 2018
 4 
 5 @author: zhen
 6 """
 7 from PIL import Image
 8 import numpy as np
 9 from sklearn.cluster import KMeans
10 import matplotlib
11 import matplotlib.pyplot as plt
12 
13 def restore_image(cb, cluster, shape):
14     row, col, dummy = shape
15     image = np.empty((row, col, dummy))
16     for r in range(row):
17         for c in range(col):
18             image[r, c] = cb[cluster[r * col + c]]
19     return image
20 
21 def show_scatter(a):
22     N = 10
23     density, edges = np.histogramdd(a, bins=[N, N, N], range=[(0, 1), (0, 1), (0, 1)])
24     density /= density.max()
25     x = y = z = np.arange(N)
26     d = np.meshgrid(x, y, z)
27     
28     fig = plt.figure(1, facecolor=\'w\')
29     ax = fig.add_subplot(111, projection=\'3d\')
30 
31     cm = matplotlib.colors.ListedColormap(list(\'rgbm\'))
32     ax.scatter(d[0], d[1], d[2], s=100 * density, cmap=cm, marker=\'o\', depthshade=True)
33     ax.set_xlabel(u\'红\')
34     ax.set_ylabel(u\'绿\')
35     ax.set_zlabel(u\'蓝\')
36     plt.title(u\'图像颜色三维频数分布\', fontsize=20)
37     
38     plt.figure(2, facecolor=\'w\')
39     den = density[density > 0]
40     den = np.sort(den)[::-1]
41     t = np.arange(len(den))
42     plt.plot(t, den, \'r-\', t, den, \'go\', lw=2)
43     plt.title(u\'图像颜色频数分布\', fontsize=18)
44     plt.grid(True)
45     
46     plt.show()
47       
48 if __name__ == \'__main__\':
49     matplotlib.rcParams[\'font.sans-serif\'] = [u\'SimHei\']
50     matplotlib.rcParams[\'axes.unicode_minus\'] = False
51     # 聚类数2,6,30
52     num_vq = 2
53     im = Image.open(\'C:/Users/zhen/.spyder-py3/images/Lena.png\')
54     image = np.array(im).astype(np.float) / 255
55     image = image[:, :, :3]
56     image_v = image.reshape((-1, 3))
57     kmeans = KMeans(n_clusters=num_vq, init=\'k-means++\')
58     show_scatter(image_v)
59     
60     N = image_v.shape[0]  # 图像像素总数
61     # 选择样本，计算聚类中心
62     idx = np.random.randint(0, N, size=int(N * 0.7))
63     image_sample = image_v[idx]
64     kmeans.fit(image_sample)
65     result = kmeans.predict(image_v)  # 聚类结果
66     print(\'聚类结果:\n\', result)
67     print(\'聚类中心:\n\', kmeans.cluster_centers_)
68     
69     plt.figure(figsize=(15, 8), facecolor=\'w\')
70     plt.subplot(211)
71     plt.axis(\'off\')
72     plt.title(u\'原始图片\', fontsize=18)
73     plt.imshow(image)
74     # plt.savefig(\'原始图片.png\')
75     
76     plt.subplot(212)
77     vq_image = restore_image(kmeans.cluster_centers_, result, image.shape)
78     plt.axis(\'off\')
79     plt.title(u\'聚类个数:%d\' % num_vq, fontsize=20)
80     plt.imshow(vq_image)
81     # plt.savefig(\'矢量化图片.png\')
82     
83     plt.tight_layout(1.2)
84     plt.show()
结果：
　　1.当k=2时：
　　2.当k=6时：
　　3.当k=30时：
总结：当聚类个数较少时，算法运算速度快但效果较差，当聚类个数较多时，运算速度慢效果好但容易过拟合，所以恰当的k值对于聚类来说影响极其明显！！