mstk

KMenas算法比较简单,不详细介绍了,直接上代码。

import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.clustering._

/**
  * Created by Administrator on 2017/7/11.
  */
object Kmenas {

  def main(args:Array[String]): Unit ={
    // 设置运行环境
    val conf = new SparkConf().setAppName("KMeans Test")
      .setMaster("spark://master:7077").setJars(Seq("E:\\Intellij\\Projects\\MachineLearning\\MachineLearning.jar"))
    val sc = new SparkContext(conf)
    Logger.getRootLogger.setLevel(Level.WARN)

    // 读取样本数据并解析
    val data = sc.textFile("hdfs://master:9000/ml/data/kmeans_data.txt")
    val parsedData = data.map(s => Vectors.dense(s.split(\' \').map(_.toDouble))).cache()

    // 新建KMeans聚类模型并训练
    val initMode = "k-means||"
    val numClusters = 2
    val numIterations = 500
    val model = new KMeans().
      setInitializationMode(initMode).
      setK(numClusters).
      setMaxIterations(numIterations).
      run(parsedData)
    val centers = model.clusterCenters
    println("Centers:")
    for (i <- 0 to centers.length - 1) {
      println(centers(i)(0) + "\t" + centers(i)(1))
    }

    // 误差计算
    val Error = model.computeCost(parsedData)
    println("Errors = " + Error)

  }

}

运行结果:

分类:

技术点:

相关文章:

  • 2021-10-19
  • 2021-10-19
  • 2021-10-19
  • 2021-05-05
  • 2021-06-20
  • 2021-11-11
  • 2021-10-06
猜你喜欢
  • 2021-07-28
  • 2021-10-19
  • 2021-08-12
  • 2021-06-09
  • 2021-09-13
  • 2021-09-14
  • 2021-10-19
相关资源
相似解决方案