基于算法进行预测时出错答案

【问题标题】：Error in making predictions based on an algorithm基于算法进行预测时出错
【发布时间】：2018-03-27 10:48:16
【问题描述】：

我有一个这样组织的数据集：

library(lubridate)
library(e1071)
library(rpart)
library(pROC)
library(rpart.plot)
library(RColorBrewer)
library(ada)
library(maboost)
library(adabag)
library(ROCR)
library(data.table)

> head(crypto_data)
                  time btc_price eth_price block_size difficulty estimated_btc_sent estimated_transaction_volume_usd  hash_rate
1: 2017-09-02 21:54:00  4537.834   330.727  142521291   8.88e+11           2.04e+13                        923315360 7417412092
2: 2017-09-02 22:29:00  4577.605   337.804  136524566   8.88e+11           2.03e+13                        918188067 7152504517
3: 2017-09-02 23:04:00  4566.360   336.938  134845546   8.88e+11           2.01e+13                        910440916 7240807042
4: 2017-09-02 23:39:00  4590.031   342.929  133910638   8.88e+11           1.99e+13                        901565930 7284958305
5: 2017-09-03 00:14:00  4676.193   354.171  130678099   8.88e+11           2.01e+13                        922422228 7152504517
6: 2017-09-03 00:49:00  4699.936   352.299  127557140   8.88e+11           1.99e+13                        910457430 7064201992
   miners_revenue_btc miners_revenue_usd minutes_between_blocks n_blocks_mined n_blocks_total n_btc_mined   n_tx nextretarget total_btc_sent
1:               2395           10839520                   8.00            168         483207    2.10e+11 241558       483839       1.62e+14
2:               2317           10482320                   8.33            162         483208    2.03e+11 236661       483839       1.60e+14
3:               2342           10596900                   8.22            164         483216    2.05e+11 238682       483839       1.60e+14
4:               2352           10642439                   8.14            165         483220    2.06e+11 237159       483839       1.58e+14
5:               2316           10611798                   8.38            162         483223    2.03e+11 237464       483839       1.58e+14
6:               2288           10481960                   8.41            160         483226    2.00e+11 234472       483839       1.57e+14
   total_fees_btc totalbtc trade_volume_btc trade_volume_usd targetVar
1:    29597881711 1.65e+15        102451.92        463497285       buy
2:    29202300823 1.65e+15        102451.92        463497285       buy
3:    29234981721 1.65e+15        102451.92        463497285       buy
4:    28991577368 1.65e+15        102451.92        463497285       buy
5:    29179041967 1.65e+15         96216.78        440710136      hold
6:    28844391629 1.65e+15         96216.78        440710136      hold

然后我创建了一个函数：

classification_error <- function(conf_mat) {
  conf_mat = as.matrix(conf_mat)

  error = 1-sum(diag(conf_mat))/sum(conf_mat)

  return (list(conf_mat = conf_mat, 
               error = error))
}
    predFunc <- function(inData,trainPct){



  trainP <- trainPct * .6
  valP <- trainPct * .2
  testP <- trainPct * .2

  #SplitData
  trainObs <- sample(nrow(inData), trainP * nrow(inData), replace = FALSE)
  valObs <- sample(nrow(inData), valP * nrow(inData), replace = FALSE)
  testObs <- sample(nrow(inData), testP * nrow(inData), replace = FALSE)

  # Create the training/va/test datasets
  trainDS <- inData[trainObs,]
  valDS <- inData[valObs,]
  testDS <- inData[testObs,]

  # SVM- linear kernel
  linearSVM <- svm(targetVar ~ ., data = trainDS, method = "C-classification", kernel = "linear")


  # linear SVM Predictions
  predSVMlin <- predict(linearSVM, trainDS[,-c("targetVar")])
  valSVMlin <- predict(linearSVM, valDS[,-c("targetVar")])
  testSVMlin <- predict(linearSVM, testDS[,-c("targetVar")])

  # SVM Confusion matrix
  trainConfusion <- table(true = trainDS[,c("targetVar")], pred = predSVMlin)
  valConfusion <- table(true = trainDS[,c("targetVar")], pred = valSVMlin)
  testConfusion <- table(true = trainDS[,c("targetVar")], pred = testSVMlin)

  # Linear SVM Classification error
  trainClassificationError <- classification_error(trainConfusion)
  valClassificationError <- classification_error(valConfusion)
  testClassificationError <- classification_error(testConfusion)

  return(list(trainError = trainClassificationError,
              valError = valClassificationError,
              testError = testClassificationError,
              ))
}

然后我调用了函数：crypt <- predFunc(crypto_data, .7)，但是当我运行这段代码时，我得到了以下错误：

sort.list(y) 中的错误：对于 'sort.list'，'x' 必须是原子的，你有吗在列表中称为“排序”？ 5. stop("'x' 对于 'sort.list' 必须是原子的\n你在列表上调用过 'sort' 吗？") 4. sort.list(y) 3.因素（a，排除=排除） 2. table(true = trainDs[, c("targetVar")], pred = predSVM) 1. predFunc(crypto_data, 0.7)

基本上，我认为该函数在创建混淆矩阵时遇到了问题，但我不明白为什么或如何解决它。建议？

【问题讨论】：

请写出你使用什么库。
@Alex 我包含了库

标签： r function machine-learning error-handling

【解决方案1】：

当你做线性 SVM 预测时，你应该像这样使用 smth：

predSVMlin <- predict(linearSVM, trainDS %>% select(-targetVar))
在# SVM Confusion matrix 中，您可以使用trainDS$targetVar 来制作数字向量，而不是数据框列。
这取决于你计算的指标，但最后两个步骤在我看来不清楚，你能解释一下吗？
我找不到classification_error函数，请写，在哪里找到或者写源代码。

【讨论】：

请看我的编辑，我添加了classification_error函数

【解决方案2】：

在你的cmets之后，我可以给你一个解决方案：

classification_error <- function(conf_mat) {
  conf_mat = as.matrix(conf_mat)

  error = 1-sum(diag(conf_mat))/sum(conf_mat)

  return (list(conf_mat = conf_mat, 
               error = error))
}

predFunc <- function(inData,trainPct){

  trainP <- trainPct * .6
  valP <- trainPct * .2
  testP <- trainPct * .2

  #SplitData
  trainObs <- sample(nrow(inData), trainP * nrow(inData), replace = FALSE)
  valObs <- sample(nrow(inData), valP * nrow(inData), replace = FALSE)
  testObs <- sample(nrow(inData), testP * nrow(inData), replace = FALSE)

  # Create the training/va/test datasets
  trainDS <- inData[trainObs,]
  valDS <- inData[valObs,]
  testDS <- inData[testObs,]

  # SVM- linear kernel
  linearSVM <- svm(targetVar ~ ., data = trainDS, method = "C-classification", kernel = "linear")


  # linear SVM Predictions
  predSVMlin <- predict(linearSVM, trainDS %>% select(-targetVar))
  valSVMlin <- predict(linearSVM, valDS %>% select(-targetVar))
  testSVMlin <- predict(linearSVM, testDS %>% select(-targetVar))

  # SVM Confusion matrix
  trainConfusion <- table(true = trainDS$targetVar, pred = predSVMlin)
  valConfusion <- table(true = valDS$targetVar, pred = valSVMlin)
  testConfusion <- table(true = testDS$targetVar, pred = testSVMlin)

  # Linear SVM Classification error
  trainClassificationError <- classification_error(trainConfusion)
  valClassificationError <- classification_error(valConfusion)
  testClassificationError <- classification_error(testConfusion)

  return(list(trainError = trainClassificationError,
              valError = valClassificationError,
              testError = testClassificationError))
}

【讨论】：