源码:
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 13 23:58:25 2019
@author: dell
"""
import numpy as np
from sklearn.cluster import KMeans #sklearn库中的cluster类中的KMeans算法
def loadData(filePath): #加载文件并进行解析
with open(filePath,'r+') as fr:
lines = fr.readlines() #readlines按照每行进行读取,空行就会被读取为空list
irisData = []
irisName = []
for line in lines:
items = line.strip().split(",")
#print(items)
#print(type(items[0]))
irisName.append(items[4]) #读取数据的Label一栏
#print(irisName)
#print("len(items) = ",len(items))
irisData.append([float(items[i]) for i in range(0,len(items)-1)]) #range(a,b)的范围问题!!!
#print("irisData",irisData)
return irisData,irisName
if __name__ == '__main__':
# a = [1,2,3,4,5]
# print(a[i] for i in range(0,5))
# print([a[i] for i in range(0,5)])
irisData,irisName = loadData('iris.data')
#print(data)
#print(irisName)
km = KMeans(n_clusters=3) #聚类中心
label = km.fit_predict(irisData) #根据聚类中心对数据部分进行区域划分/聚类
#print(label)
character = np.sum(km.cluster_centers_,axis=1) #三个聚类的值
#print(character)
irisCluster = [[],[],[]]
for i in range(len(irisName)):
irisCluster[label[i]].append(irisName[i])
for i in range(len(irisCluster)):
print("Iris-Character:%.2f" % character[i])
print(irisCluster[i])
测试结果:
