【发布时间】:2014-10-15 05:42:47
【问题描述】:
我正在尝试从 Apache Mahout Cookbook 中的示例合成控制数据中找出 Canopy 集群。但是,我得到的结果不是 6 个集群,而是 600 - 集合中的每个样本一个。
C-0{n=1 c=[0:28.781, 1:34.463, 2:31.338, 3:31.283, 4:28.921, 5:33.760, 6:25.397, 7:27.785, 8:35.248, 9 :27.116, 10:32.872, 11:29.217, 12:36.025, 13:32.337, 15:34.525, 16:32.872, 17:34.117, 18:26.524, 19:27.662, 20:26.34, 21:29.25:20:26.74, 21:25:20 , 25:30.733, 26:29.505, 27:33.029, 28:25.040, 31:28.917, 32:24.344, 33:26.120, 34:34.942, 35:25.029, 36:26.631, 37:35.654, 37:28.454, 39.38: :29.150, 40:28.158, 41:26.193, 42:33.318, 43:30.977, 44:27.044, 45:35.534, 46:26.235, 47:28.996, 48:32.004, 49:31.056, 50:31.056, 50:31.056, 50:3.7451 , 52:28.940, 53:35.497, 54:29.747, 56:31.433, 57:24.556, 58:33.743, 59:25.047, 60:34.932] r=[]}
C-1{n=1 c=[0:24.892, 1:25.741, 3:27.553, 4:32.822, 5:27.879, 6:31.593, 7:31.486, 8:35.547, 9:27.952, 10 :31.660, 11:27.542, 12:31.189, 13:27.487, 14:31.391, 16:27.811, 18:24.488, 20:27.592, 21:35.627, 22:35.410, 23:31.45, 24:31.45, 24:4.17, 24:4.175 , 26:35.142, 27:30.472, 28:31.987, 29:33.662, 30:25.551, 31:30.469, 32:33.647, 33:25.070, 34:34.077, 35:32.598, 36:28.304, 38.47 :26.941, 39:31.520, 40:33.109, 41:24.149, 42:28.516, 43:25.791, 44:35.952, 45:26.530, 46:24.858, 47:25.956, 48:32.836, 49:2.3650 , 51:30.621, 52:28.986, 53:29.405, 54:32.558, 55:31.021, 56:26.642, 57:28.433, 58:33.656, 59:26.424, 60:28.466] r=[]}
C-2{n=1 c=[0:31.399, 1:30.632, 2:26.398, 3:24.291, 4:27.861, 5:28.549, 6:24.972, 7:32.436, 8:25.224, 9 :27.307, 10:31.839, 11:27.259, 12:28.257, 13:26.582, 14:24.046, 15:35.063, 16:31.572, 17:32.561, 18:31.031, 19:34.120, 20:1.4,6:218 , 22:35.017, 23:32.385, 24:24.332, 25:30.200, 26:31.245, 27:26.681, 28:31.514, 29:28.878, 30:27.309, 31:24.246, 33:26.963, 35.32: :31.611, 36:24.713, 37:27.481, 38:24.208, 39:26.806, 40:35.125, 41:32.629, 42:31.056, 43:26.358, 44:28.086, 45:31.40, 46:26.7:46:26.7:8 , 48:35.973, 49:34.144, 50:27.172, 51:33.632, 52:26.597, 53:25.539, 54:32.543, 55:25.577, 56:29.990, 57:31.351, 59:33.900, 60: =[]}
C-3{n=1 c=[0:25.774, 2:30.526, 3:35.421, 4:25.603, 5:27.970, 8:25.270, 9:28.132, 11:29.427, 12:31.455, 13 :27.320, 16:28.956, 17:28.992, 18:29.958, 19:30.277, 20:30.445, 21:24.304, 22:24.314, 24:35.097, 25:25.368, 26:32.097, 27:30.13:38. , 29:35.316, 30:31.626, 31:29.281, 32:34.202, 33:26.508, 34:32.228, 35:25.527, 36:24.824, 38:27.559, 39:28.371, 40:32.367, 4.21: :35.935, 43:35.115, 44:24.375, 45:27.608, 46:27.843, 47:29.856, 48:32.419, 49:26.891, 50:31.321, 51:29.385, 52:34.38, 53:29.74:4:4 , 56:31.873, 57:34.205, 58:31.156, 60:34.629] r=[]}
以此类推,直到 C-600。
谁能想到原因?
我正在使用
mahout canopy -i $WORK_DIR/sequencefile/synthetic_control.seq -o
$WORK_DIR/output/canopy.output -t1 80 -t2 55
我正在使用在 Hadoop 1.2.1 上运行的 Mahout 0.9。书中的例子是0.9版本的Mahout,调用函数的方式有变化吗?
我什至尝试使用不同的 t1 和 t2 值,但结果是一样的。
谢谢
【问题讨论】:
-
你能找到答案吗?我对集合中的每个样本都得到了相似的结果。
标签: mahout