【问题标题】:how to create a confusion matrix for xgboost in R如何在 R 中为 xgboost 创建混淆矩阵
【发布时间】:2019-11-13 13:32:12
【问题描述】:

我已经在 R 中创建了我的 XGBoost 分类器,如下代码所示

#importing the dataset
XGBoostDataSet_Hr_Admin_8 <- read.csv("CompletedDataImputed_HR_Admin.csv")

#Use factor function to convert categorical data to numerical data
XGBoostDataSet_Hr_Admin_8$Salary = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Salary, levels =c('L','M', 'H', 'V'), labels =c(1,2,3,4)))
XGBoostDataSet_Hr_Admin_8$Rude_Behavior = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Rude_Behavior, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
XGBoostDataSet_Hr_Admin_8$Feeling_undervalued =as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Feeling_undervalued, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
XGBoostDataSet_Hr_Admin_8$Overall_satisfaction = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Overall_satisfaction, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
XGBoostDataSet_Hr_Admin_8$Raises_frozen = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Raises_frozen, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
XGBoostDataSet_Hr_Admin_8$Poor_Conditions = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Poor_Conditions, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
XGBoostDataSet_Hr_Admin_8$Growth_not_available = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Growth_not_available, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
XGBoostDataSet_Hr_Admin_8$Workplace_Conflict = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Workplace_Conflict, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
XGBoostDataSet_Hr_Admin_8$Employee_Turnover = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Employee_Turnover, levels=c('Y', 'N'), labels =c(1,0)))

#split the data in train dataset and test dataset
library(caTools)
split = sample.split(XGBoostDataSet_Hr_Admin_8$Employee_Turnover,SplitRatio = 0.7)
training_set8 = subset(XGBoostDataSet_Hr_Admin_8, split==TRUE)
test_set8 = subset(XGBoostDataSet_Hr_Admin_8, split==FALSE)

#fitting XGBoost to the Training Test
library(xgboost)
classifier9 = xgboost(data = as.matrix(training_set8[-10]), label = training_set8$Employee_Turnover, nrounds = 10)

现在,我需要为 XGBoost 创建一个混淆矩阵。

我在网上搜索过,可惜找不到解决办法。

谁能帮帮我。

提前致谢

【问题讨论】:

    标签: r xgboost xgbclassifier


    【解决方案1】:

    您可以使用caret::confusionMatrix() 函数,但您需要对输出进行一些处理。显然,您需要一个真实结果(测试数据集)的向量,来比较计算结果和真实结果:

    library(xgboost)
    
    
    #Use factor function to convert categorical data to numerical data
    XGBoostDataSet_Hr_Admin_8$Salary = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Salary, levels =c('L','M', 'H', 'V'), labels =c(1,2,3,4)))
    XGBoostDataSet_Hr_Admin_8$Rude_Behavior = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Rude_Behavior, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Feeling_undervalued =as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Feeling_undervalued, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Overall_satisfaction = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Overall_satisfaction, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Raises_frozen = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Raises_frozen, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Poor_Conditions = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Poor_Conditions, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Growth_not_available = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Growth_not_available, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Workplace_Conflict = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Workplace_Conflict, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Employee_Turnover = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Employee_Turnover, levels=c('Y', 'N'), labels =c(1,0)))
    
    # here ifelse 0 1
    XGBoostDataSet_Hr_Admin_8$Employee_Turnover = ifelse(XGBoostDataSet_Hr_Admin_8$Employee_Turnover == 1,0,1)
    
    library(caTools)
    
    
    split = sample.split(XGBoostDataSet_Hr_Admin_8$Employee_Turnover,SplitRatio = 0.7)
    training_set8 = subset(XGBoostDataSet_Hr_Admin_8, split==TRUE)
    test_set8 = subset(XGBoostDataSet_Hr_Admin_8, split==FALSE)
    
    bst <- xgboost(data = as.matrix(training_set8[,-10]), label = training_set8$Employee_Turnover, max_depth = 2,
                   eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")  
    
    # you've to do your prediction here
    pred <- predict(bst, as.matrix(test_set8[,-10]))
    
    # and transform them in a 0 1 variable, you can choose the value to get 1
    pred <-  as.numeric(pred > 0.5)
    
    library(caret)
    confusionMatrix(factor(pred),factor(test_set8$Employee_Turnover))
    
    Confusion Matrix and Statistics
    
              Reference
    Prediction  0  1
             0 67  2
             1  0 16
    
                   Accuracy : 0.9765          
                     95% CI : (0.9176, 0.9971)
        No Information Rate : 0.7882          
        P-Value [Acc > NIR] : 4.626e-07       
    
                      Kappa : 0.9265          
    
     Mcnemar's Test P-Value : 0.4795          
    
                Sensitivity : 1.0000          
                Specificity : 0.8889          
             Pos Pred Value : 0.9710          
             Neg Pred Value : 1.0000          
                 Prevalence : 0.7882          
             Detection Rate : 0.7882          
       Detection Prevalence : 0.8118          
          Balanced Accuracy : 0.9444          
    
           'Positive' Class : 0   
    

    【讨论】:

    • 您好 s_t,在尝试此部分 pred
    • 嘿 s_t 和 ibocus,我刚刚看到你的回答,我认为 @ibocus 没有像示例那样使用 xgb.DMatrix,而是给 xgboost 提供了一个矩阵和标签(我不确定)
    • 我已经更新了我的问题中的完整脚本,你能看看我如何从那里创建混淆矩阵
    • 不幸的是,没有你的数据是没有用的(如果你可以训练和测试,请尝试发布,使用dput()function,或者一些类似于你的假的)。但是你有没有试过:pred &lt;- predict(classifier8, test_set8 ,type = "response")
    • @s_t,你说得对,我只是搞糊涂了..非常感谢你让我明白了..谢谢:)
    【解决方案2】:

    注意事项,您需要将您的 training_set8​​$Employee_Turnover 转换为 0 和 1。希望你已经做到了,如果没有看到下面的例子。

    第二,在做xgboost的时候需要指定objective="binary:logistic",这个是做分类的。

    所以从你所拥有的开始:

    library(caTools)
    library(xgboost)
    library(caret)
    set.seed(12345)
    # reproducible results
    
    XGBoostDataSet_Hr_Admin_8 <- read.csv("CompletedDataImputed_HR_Admin.csv")
    
    #Use factor function to convert categorical data to numerical data
    XGBoostDataSet_Hr_Admin_8$Salary = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Salary, levels =c('L','M', 'H', 'V'), labels =c(1,2,3,4)))
    XGBoostDataSet_Hr_Admin_8$Rude_Behavior = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Rude_Behavior, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Feeling_undervalued =as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Feeling_undervalued, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Overall_satisfaction = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Overall_satisfaction, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Raises_frozen = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Raises_frozen, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Poor_Conditions = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Poor_Conditions, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Growth_not_available = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Growth_not_available, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    XGBoostDataSet_Hr_Admin_8$Workplace_Conflict = as.numeric(factor(XGBoostDataSet_Hr_Admin_8$Workplace_Conflict, levels=c('Y', 'M', 'N'), labels =c(1,2,3)))
    

    对于这部分,我们将标签正确设置为 0 和 1

    #set levels
    lvl = c('N', 'Y')
    # sorry I have to do it like this, it's too long for me to read
    lb = as.character(XGBoostDataSet_Hr_Admin_8$Employee_Turnover)
    lb = as.numeric(factor(lb,levels=lvl))-1
    XGBoostDataSet_Hr_Admin_8$Employee_Turnover = lb
    

    我们按照您的方式拆分为训练 + 测试:

    #split the data in train dataset and test dataset
    split = sample.split(XGBoostDataSet_Hr_Admin_8$Employee_Turnover,SplitRatio = 0.7)
    training_set8 = subset(XGBoostDataSet_Hr_Admin_8, split==TRUE)
    test_set8 = subset(XGBoostDataSet_Hr_Admin_8, split==FALSE)
    

    合身:

    #fitting XGBoost to the Training Test
    classifier9 = xgboost(data = as.matrix(training_set8[-10]), 
    label = training_set8$Employee_Turnover, nrounds = 10)
    

    现在我们得到概率方面的预测并转换

    pred <- predict(classifier9, as.matrix(training_set8[-10]))
    # we convert to predicted labels
    pred_label <- lvl[as.numeric(pred>0.5)+1]
    # we get the observed label, or iris$Species
    actual_label <- lvl[as.numeric(training_set8$Employee_Turnover)+1]
    

    最后一个混淆矩阵:

    # confusion matrix
    table(pred_label,actual_label)
              actual_label
    pred_label   N   Y
             N  41   0
             Y   0 158
    

    或使用插入符号:

    confusionMatrix(factor(pred_label,levels=lvl),
    factor(actual_label,levels=lvl))
        Confusion Matrix and Statistics
    
                  Reference
        Prediction   N   Y
                 N  41   0
                 Y   0 158
    

    这是实际数据(由 OP 友情提供):

    structure(list(Salary = structure(c(2L, 3L, 2L, 3L, 2L, 3L, 2L, 
    2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 
    3L, 2L, 3L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 
    3L, 1L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 
    2L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
    2L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 
    3L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 
    2L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 
    2L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 
    2L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
    2L, 2L, 3L, 2L, 3L, 2L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
    2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 
    3L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 1L, 2L, 2L, 2L, 3L, 3L, 
    3L, 3L, 1L, 3L, 2L, 1L, 3L, 3L, 2L, 1L, 3L, 3L, 1L, 3L, 3L, 2L, 
    3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
    3L, 3L, 4L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 
    3L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    2L, 3L, 3L, 2L, 3L), .Label = c("H", "L", "M", "V"), class = "factor"), 
        Percentage_Increment = c(5, 10, 7, 7, 5, 7, 5, 5, 10, 5, 
        5, 5, 5, 5, 5, 10, 5, 5, 10, 10, 5, 5, 5, 5, 5, 5, 5, 5, 
        5, 10, 5, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 
        5, 5, 10, 7, 5, 5, 5, 5, 10, 10, 10, 5, 5, 5, 7, 10, 5, 5, 
        5, 7, 10, 5, 7, 5, 5, 10, 10, 10, 5, 5, 10, 5, 5, 5, 5, 5, 
        5, 5, 5, 10, 5, 5, 7, 7, 5, 10, 5, 5, 5, 5, 5, 7, 5, 10, 
        5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5, 10, 5, 5, 
        5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5, 5, 10, 5, 10, 5, 5, 
        5, 7, 5, 7, 10, 7, 10, 5, 10, 10, 5, 7, 5, 5, 10, 5, 5, 5, 
        10, 5, 7, 5, 5, 5, 5, 10, 3, 5, 5, 10, 10, 5, 5, 7, 10, 5, 
        5, 5, 5, 5, 5, 5, 10, 5, 7, 5, 5, 5, 5, 5, 7, 5, 7, 5, 5, 
        5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 7, 5, 5, 5, 10, 10, 5, 5, 5, 
        10, 5, 10, 10, 10, 10, 7, 5, 7, 5, 5, 10, 1, 10, 30, 1, 0.02, 
        5, 1, 11, 1, 3, 10, 1, 11, 1, 5, 10, 2.2, 18, 4, 10, 8, 1, 
        5, 9, 5, 4, 15, 15, 4, 10, 12, 1, 9, 3, 2.5, 5, 20, 30, 10, 
        5, 100, 10, 1, 1, 8, 1, 1, 2, 1, 5, 10, 1, 50, 50, 2, 3, 
        25, 1, 1), Rude_Behavior = structure(c(3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 
        3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 3L, 
        3L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 1L, 3L, 2L, 1L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 
        3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 2L, 3L, 
        3L, 1L, 2L, 3L, 3L, 1L, 3L, 3L, 3L, 1L, 2L, 3L, 2L, 1L, 1L, 
        2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 
        1L, 2L, 1L, 2L, 2L, 2L, 1L, 3L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 
        3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 
        2L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 1L), .Label = c("M", "N", 
        "Y"), class = "factor"), Feeling_undervalued = structure(c(1L, 
        2L, 3L, 1L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 
        3L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 
        3L, 1L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 1L, 2L, 
        3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 
        2L, 3L, 1L, 3L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
        3L, 3L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 
        3L, 2L, 3L, 3L, 1L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 2L, 3L, 2L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 
        3L, 3L, 3L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        2L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 1L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 
        2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 1L, 
        3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 
        3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 1L, 3L, 3L, 2L, 1L, 2L, 1L, 
        3L, 2L, 2L, 2L, 1L, 3L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 
        2L, 2L, 1L, 3L, 1L, 2L, 3L, 1L, 3L, 1L, 1L, 2L, 3L, 3L, 1L, 
        2L, 1L, 3L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 3L, 2L, 1L, 3L, 
        2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 2L, 3L, 2L), .Label = c("M", 
        "N", "Y"), class = "factor"), Overall_satisfaction = structure(c(2L, 
        3L, 3L, 3L, 2L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 
        3L, 3L, 3L, 1L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 
        3L, 2L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 
        2L, 3L, 2L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 2L, 3L, 
        3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 
        3L, 3L, 1L, 2L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 
        1L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 
        3L, 1L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
        2L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 
        3L, 3L, 3L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 
        3L, 3L, 3L, 2L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 
        1L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
        3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 1L, 2L, 3L, 3L, 3L, 2L, 
        2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 3L, 2L, 1L, 1L, 
        2L, 3L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 1L, 2L, 3L, 1L), .Label = c("M", 
        "N", "Y"), class = "factor"), Poor_Conditions = structure(c(3L, 
        1L, 3L, 2L, 3L, 3L, 3L, 1L, 2L, 3L, 1L, 3L, 3L, 1L, 2L, 3L, 
        3L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 
        3L, 1L, 3L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 1L, 
        3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 
        3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 
        3L, 2L, 3L, 1L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 
        1L, 1L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 2L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 1L, 3L, 3L, 1L, 
        3L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 
        3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 1L, 3L, 
        3L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
        3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 1L, 3L, 
        2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 1L, 3L, 
        3L, 1L, 3L, 1L, 2L, 3L, 3L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 
        2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 1L, 3L, 3L, 
        1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 2L, 3L, 
        3L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L), .Label = c("M", 
        "N", "Y"), class = "factor"), Raises_frozen = structure(c(2L, 
        3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 
        3L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 
        3L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 
        2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 
        2L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 
        2L, 3L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 
        3L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 
        2L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 
        3L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
        2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        2L, 3L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 3L, 
        3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 2L, 3L, 3L, 1L, 3L, 
        1L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 2L, 3L, 2L, 1L, 
        3L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 1L, 2L, 1L), .Label = c("M", 
        "N", "Y"), class = "factor"), Growth_not_available = structure(c(1L, 
        3L, 1L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 3L, 
        2L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 
        2L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 
        3L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 1L, 3L, 2L, 1L, 
        3L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 3L, 
        1L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 1L, 3L, 3L, 
        3L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 1L, 
        3L, 3L, 1L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 2L, 1L, 3L, 2L, 3L, 
        3L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 1L, 3L, 3L, 2L, 3L, 3L, 
        3L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 1L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 
        3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 
        3L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 3L, 2L, 
        2L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 
        1L, 3L, 2L, 2L, 1L, 1L, 2L, 3L, 3L, 1L, 3L, 2L, 1L, 2L, 2L, 
        1L, 2L, 1L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 
        3L, 2L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 2L, 3L), .Label = c("M", 
        "N", "Y"), class = "factor"), Workplace_Conflict = structure(c(3L, 
        3L, 3L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 2L, 3L, 
        3L, 2L, 3L, 1L, 3L, 3L, 2L, 3L, 1L, 3L, 3L, 1L, 3L, 2L, 3L, 
        3L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 
        2L, 3L, 1L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 1L, 
        3L, 3L, 3L, 2L, 3L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 
        2L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 3L, 
        3L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 1L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 
        3L, 3L, 3L, 3L, 2L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 
        3L, 1L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 
        3L, 2L, 3L, 3L, 1L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 
        3L, 2L, 3L, 3L, 3L, 1L, 3L, 2L, 3L, 1L, 3L, 3L, 2L, 3L, 1L, 
        3L, 2L, 2L, 3L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 3L, 2L, 
        2L, 2L, 1L, 1L, 1L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
        3L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 
        1L, 3L, 3L, 3L, 1L, 2L, 2L, 1L, 3L, 2L, 3L, 3L, 2L), .Label = c("M", 
        "N", "Y"), class = "factor"), Employee_Turnover = structure(c(2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("N", 
        "Y"), class = "factor")), class = "data.frame", row.names = c(NA, 
    -284L))
    

    【讨论】:

    • 我做了什么 XGBoostDataSet_Hr_Admin_8
    • 你能把这个包括在问题里吗?在 cmets 中我很难阅读它
    • @ibocus,问题出在标签上,它是 1 和 2,而不是 0 和 1。我将编辑我的答案,并尝试一些代码并希望能解决它
    • 嘿@ibocus,你能试试吗?
    • 非常感谢,等待您的更新代码。之间是否有任何解决方案将分类标签转换为数字标签以适应 xgboost 分类器而不会遇到我的 1s 和 2s 问题
    【解决方案3】:

    在以后提问时提供一些示例数据。

    下面的代码使用来自predict.xgb.Booster 的示例创建了一个混淆矩阵

    library("xgboost")
    data(agaricus.train, package='xgboost')
    data(agaricus.test, package='xgboost')
    train <- agaricus.train
    test <- agaricus.test
    
    bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
                   eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
    ## Predict class probability for new data
    pred <- predict(bst, test$data)
    ## Use arbitrary cutoff of 0.5 for classifier
    table(test$label, as.numeric(pred > 0.5))
    #        0    1
    #  0   825   10
    #  1     9  767
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2021-03-02
      • 2017-02-01
      • 1970-01-01
      • 2018-04-27
      • 1970-01-01
      • 1970-01-01
      • 2015-05-19
      • 2021-08-01
      相关资源
      最近更新 更多