【问题标题】:Confusion Matrix and caret package - rpart algorithm混淆矩阵和插入符号包 - rpart 算法
【发布时间】:2018-09-07 10:29:49
【问题描述】:

我正在使用 caret 包开发一个简单的 rpart 模型。

library(caret)
data <- fread(sprintf("%s/%s", dataDir, dataFile), header=T, stringsAsFactors = TRUE)
split <- createDataPartition(y = data$Audit, p = 0.7, list = FALSE)
trainSet <- data[split,]
test <- data[-split,]

fitControl <- trainControl( method = "repeatedcv",  number = 10,repeats = 10)


fitControl <- trainControl( method = "repeatedcv",  number = 10,repeats = 10)

fit <- train(Audit ~ Activity1 + Activity2 + Activity3 + Activity4 + Activity5 + Activity6 + Activity7 + Activity8, data = trainSet, method="rpart", trControl = fitControl)

现在我正在尝试创建一个混淆矩阵:

preds<-predict(fit2, newdata = test)
cm<-confusionMatrix(preds,test$Audit)
print(cm)

当我看到输出时,我看到了这个:

     [,1] [,2]
[1,]    0    0
[2,]    4    0

这真的让我很困惑,而且没有任何意义。我期待这样的东西成为我的混淆矩阵

preds A B U
    A 0 0 0
    B 0 4 0
    U 2 0 9

这是我的数据快照:

snapshot of the data

这是我的数据输入:

dput(as.data.frame(data))
structure(list(Activity1 = structure(c(2L, 1L, 2L, 1L, 1L, 1L, 
1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 
2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
2L, 2L), .Label = c("a", "b"), class = "factor"), Activity2 = 
structure(c(2L, 
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 
1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 1L, 2L, 2L, 1L, 2L, 2L), .Label = c("a", "b"), class = "factor"), 
Activity3 = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = c("a", "b"), class = "factor"), Activity4 = structure(c(2L, 
1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("b", 
"e"), class = "factor"), Activity5 = structure(c(2L, 1L, 
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 
2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("b", "d"), class = "factor"), 
Activity6 = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = c("d", "e"), class = "factor"), Activity7 = structure(c(1L, 
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 
2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L), .Label = c("a", 
"c"), class = "factor"), Activity8 = structure(c(2L, 2L, 
4L, 2L, 2L, 2L, 2L, 4L, 2L, 1L, 3L, 1L, 2L, 4L, 1L, 2L, 2L, 
2L, 2L, 2L, 4L, 2L, 3L, 2L, 4L, 2L, 1L, 2L, 3L, 4L, 2L, 2L, 
1L, 2L, 2L, 2L, 3L, 4L, 1L, 2L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 
3L, 2L, 2L, 2L, 3L, 2L, 2L, 4L, 3L), .Label = c("a", "c", 
"d", "e"), class = "factor"), Audit = structure(c(3L, 1L, 
3L, 2L, 1L, 1L, 1L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 1L, 3L, 
3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 
2L, 1L, 3L, 2L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L), .Label = c("A", "B", 
"U"), class = "factor")), .Names = c("Activity1", "Activity2", 
"Activity3", "Activity4", "Activity5", "Activity6", "Activity7", 
"Activity8", "Audit"), row.names = c(NA, -56L), class = "data.frame")

这是我的测试数据的输出:

> dput(as.data.frame(test))
structure(list(Activity1 = structure(c(1L, 1L, 1L, 2L, 2L, 1L, 
1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L), .Label = c("a", "b"), class = 
"factor"), 
Activity2 = structure(c(1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 
1L, 2L, 2L, 1L, 2L, 2L), .Label = c("a", "b"), class = "factor"), 
Activity3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("a", "b"), class = "factor"), 
Activity4 = structure(c(2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("b", "e"), class = "factor"), 
Activity5 = structure(c(2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("b", "d"), class = "factor"), 
Activity6 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("d", "e"), class = "factor"), 
Activity7 = structure(c(2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
1L, 1L, 1L, 2L, 1L, 1L), .Label = c("a", "c"), class = "factor"), 
Activity8 = structure(c(2L, 2L, 2L, 4L, 2L, 2L, 2L, 3L, 2L, 
2L, 3L, 2L, 2L, 4L, 3L), .Label = c("a", "c", "d", "e"), class = "factor"), 
Audit = structure(c(2L, 3L, 2L, 3L, 3L, 1L, 2L, 3L, 1L, 3L, 
3L, 3L, 2L, 3L, 3L), .Label = c("A", "B", "U"), class = "factor")), .Names = c("Activity1", 
"Activity2", "Activity3", "Activity4", "Activity5", "Activity6", 
"Activity7", "Activity8", "Audit"), class = "data.frame", row.names = c(NA, 
-15L))

谁能帮我解决这个问题?谢谢

【问题讨论】:

  • 为了回答,我们需要查看训练和测试数据。你能给他们提供dput吗?
  • 我添加了数据快照。这行得通吗?
  • read this。为了解决您的问题,我们必须首先重新创建它。要重新创建它,我们需要产生与您显示的相同结果的数据和代码。没有人想从图片中输入数据。更不用说不会重现问题的数据了。
  • 感谢@missuse 提供的信息。对不起,我是这个网站的新手,但我已经用我的数据输入更新了这个问题。希望没问题?
  • 没问题,我们有时都是新手。您能否为测试数据提供dput?到目前为止我可以看到的一个问题是选定的cp 为0。这不好。这意味着这棵树根本没有被修剪。

标签: r r-caret confusion-matrix rpart


【解决方案1】:

我刚刚发现了我的confusionMatrix 是nbot 正常工作的原因。这是因为我在代码开头加载了库(插入符号)和库(ModelMetrics)。一旦我注释掉库(ModelMetrics)并重新启动我的 R,我就能够让混淆矩阵完美地工作。

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 2017-03-20
    • 2013-11-21
    • 2014-02-11
    • 1970-01-01
    • 2021-04-16
    • 2014-07-16
    • 1970-01-01
    • 2020-10-28
    相关资源
    最近更新 更多