【发布时间】:2016-06-07 16:55:13
【问题描述】:
我正在编写一个代码,这给了我一个错误提示 'int' object has no attribute 'flatten'。我正在使用 k=2 的 kmean 算法对我的数据进行聚类(作为数据点列表传递)。
import csv
import numpy as np
k=2
data=[23,43,67,89,1,10,101,243,34]
clusters = [[] for i in range(k)]
def kmeans(data, k):
centroids = []
centroids = randomize_centroids(data, centroids, k)
old_centroids = [[] for i in range(k)]
iterations = 0
while not (has_converged(centroids, old_centroids, iterations)):
iterations += 1
clusters = euclidean_dist(data, centroids, clusters) # assign data points to clusters
index =0 # recalculate centroids
for cluster in clusters:
old_centroids[index] = centroids[index]
centroids[index] = np.mean(cluster, axis=0).tolist()
index += 1
print("The total number of data instances is: " + str(len(data)))
print("The total number of iterations necessary is: " + str(iterations))
print("The means of each cluster are: " + str(centroids))
print("The clusters are as follows:")
for cluster in clusters:
print("Cluster with a size of " + str(len(cluster)) + " starts here:")
print(np.array(cluster).tolist())
print("Cluster ends here.")
# Calculates euclidean distance between
# a data point and all the available cluster
# centroids.
def euclidean_dist(data, centroids, clusters):
for instance in data: # Find which centroid is the closestto the given data point.
print("hello")
mu_index = min([(i[0], np.linalg.norm(instance-centroids[i[0]])) \
for i in enumerate(centroids)], key=lambda t:t[1])[0]
try:
clusters[mu_index].append(instance)
except KeyError:
clusters[mu_index] = [instance]
# If any cluster is empty then assign one point
# from data set randomly so as to not have empty
# clusters and 0 means.
for cluster in clusters:
if not cluster:
cluster.append(data[np.random.randint(0, len(data), size=1)].flatten().tolist())
return clusters
# randomize initial centroids
def randomize_centroids(data, centroids, k):
for cluster in range(0, k):
print("hello")
centroids.append(data[np.random.randint(0, len(data), size=1)].flatten().tolist())
return centroids
# check if clusters have converged
def has_converged(centroids, old_centroids, iterations):
MAX_ITERATIONS = 1000
if iterations > MAX_ITERATIONS:
return True
return old_centroids == centroids
if __name__=='__main__' :
kmeans([23,43,67,89,1,10,101,243,34],2)
print("hello")
Traceback(最近一次调用最后一次):
文件“C:\Users\shri ram\Desktop\summer_code_files\ddpcr_analysis.py”,第 66 行,在 kmeans([23,43,7,89,1,10,101,243,34,2],2) 文件“C:\Users\shri ram\Desktop\summer_code_files\ddpcr_analysis.py”,第 13 行,kmeans
clusters = euclidean_dist(data, centroids, clusters) # 将数据点分配给集群
UnboundLocalError: 赋值前引用了局部变量“clusters”
【问题讨论】:
-
data[np.random.randint(0, len(data), size=1)].flatten()所以很明显data是一个整数数组。你没有展平阵列。您正在尝试从该数组中展平一个元素,该数组恰好是一个 int。正如错误所说,你不能展平一个 int。 -
嗨,马克,感谢您的帮助。我是 python 新手,所以我不太了解编码,所以我应该做什么编辑才能让它工作?
标签: python python-3.x