from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
from pandas import Series
import random
from sklearn.metrics import silhouette_samples, silhouette_score
import time
def pressure_cluster(n_clusters,pressure_array):
'''
n_clusters是要聚类的类数
pressure_array是要进行聚类的压力数据数组
'''
data_set = np.array(pressure_array).reshape(-1,1)
clf1 = KMeans(n_clusters=n_clusters) #调用KMeans算法
y_pred = clf1.fit_predict(data_set) #加载数据集合
silhouette_avg = silhouette_score(data_set,clf1.labels_ )
sample_silhouette_values = silhouette_samples(data_set,clf1.labels_)
# print(silhouette_avg)
cluster_res_label = clf1.labels_
data_set_df=Series(pressure_array,index=cluster_res_label)
pressure_ctr=[]
pressure_out=[]
#算出每一个类别的均值和方差,取mean+2*std,作为该类别的值
for i in range(n_clusters):
pressure_ctr.append(data_set_df[i].mean()+2*data_set_df[i].std())
#将原始压力数据替换成聚类结果,确定压力控制时间段
for i in range(len(pressure_array)):
pressure_out.append(pressure_ctr[clf1.labels_[i]])
return pressure_out,silhouette_avg
def kmeans_auto(dataset,n):
start=time.time()
score_list=[]
for k in range(2,n,1):
pressure_out,score=pressure_cluster(k,dataset)
score_list.append(score)
best_k=score_list.index(max(score_list))+2
pressure_out,score=pressure_cluster(best_k,dataset)
end=time.time()
print((end-start))
return pressure_out
if __name__=="__main__":
num_point = 72*2
signal1=[]
for i in range(num_point):
if i<30:
signal1.append(15+1.5*random.random())
elif i<100:
signal1.append(20+2*random.random())
elif i<120:
signal1.append(25+2*random.random())
else:
signal1.append(20+3*random.random())
pressure_cluster_res=kmeans_auto(signal1,10)
plt.plot(signal1)
plt.plot(pressure_cluster_res)
plt.show()
结果图片