确定 K 的一个技巧是首先对您的数据集运行 DBSCAN。从 DBSCAN 中确定聚类的数量,然后使用 K-means 得到聚类中心
这是一些python代码:
from sklearn.cluster import DBSCAN #python -m pip install scikit-learn
import cv2 as cv #python -m pip install opencv-python
import numpy as np #python -m pip install numpy
Z=np.array([0.0,1.0,0.25,0.11,0.12,0.27,0.99,1.1,0.05,0.06])
Z=np.unique(Z) #speed up the DBSCAN by considering only unique points
Z=Z.reshape((-1,1)).astype(np.float32)
K=int(np.max(DBSCAN(eps=0.05,min_samples=2).fit(Z).labels_))+1
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 10, 1.0)
_,label,center=cv.kmeans(Z,K,None,criteria,10,cv.KMEANS_RANDOM_CENTERS)
print(f"\nK={K}")
print("\nlabel=")
print(label)
print("\ncenter=")
print(center)
代码输出:
K=4
label=
[[0]
[0]
[0]
[3]
[3]
[1]
[1]
[2]
[2]
[2]]
center=
[[0.03666667]
[0.26 ]
[1.0300001 ]
[0.11499999]]