据我所知,它实际上并不是这样工作的。这是一个示例 - 二进制变量的得分为 0 或 1:得分 0 = SHAP 值介于 0.2 和 0.5 之间,而得分 1 = SHAP 值介于 1.2 和 1.5 之间 - 这就是图表所说明的内容 -该变量的 0 和 1 之间的 SHAP 值的差异。选择“第一次观察”可能是得分为 0 或得分为 1 的观察,因此显示的 SHAP 值并不能真正告诉您有关变量的太多信息。这就是为什么 SHAP 图需要一个以上观察的矩阵(以及为什么您的方法不起作用)。
尽管如此,如果您愿意,您可以提取前 n 个观察值的 SHAP 值,然后在 ggplot 或 base R 中自行绘制第一个观察值,例如
library(tidyverse)
library(xgboost)
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
eta = 0.1, max_depth = 3, subsample = .5,
method = "hist", objective = "binary:logistic",
nthread = 2, verbose = 0)
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
## Use "plot = FALSE" to return the data to "mat", instead of the rendered plot
mat <- xgb.plot.shap(agaricus.test$data[1:2,], contr[1:2,], model = bst,
top_n = 12, n_col = 3, plot = FALSE)
## Format the data
mat$shap_contrib %>%
t() %>%
as.data.frame() %>%
rownames_to_column() %>%
set_names(c("Variable", "SHAP", "second_observation")) %>%
## Then plot however you want
ggplot(aes(y = SHAP, x = "")) +
geom_point(pch = 3) +
theme_bw() +
theme(axis.ticks.x = element_blank(),
axis.title.x = element_blank()) +
facet_wrap(facets = vars(Variable))
按 cmets 更新:
library(tidyverse)
library(xgboost)
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
eta = 0.1, max_depth = 3, subsample = .5,
objective = "binary:logistic",
nthread = 2, verbose = 0)
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE, approxcontrib = FALSE)
pred <- predict(bst, agaricus.test$data)
## Use "plot = FALSE" to return the data to "mat", instead of the rendered plot
mat <- xgb.plot.shap(agaricus.test$data[1:2,], contr[1:2,], model = bst,
top_n = 12, n_col = 3, plot = FALSE)
## Format the data
SHAP <- as.matrix(mat$shap_contrib[1,]) %>%
as.data.frame() %>%
rownames_to_column() %>%
set_names(c("Variable", "SHAP"))
Score <- as.matrix(mat$data[1,]) %>%
as.data.frame() %>%
rownames_to_column() %>%
set_names(c("Variable", "Score"))
Pred <- ifelse(pred[1] <= 0.5, 0, 1)
SHAP_Score <- left_join(SHAP, Score, by = "Variable")
SHAP_Score_Pred <- cbind(SHAP_Score, Pred)
ggplot(SHAP_Score_Pred, aes(y = SHAP, x = Score)) +
geom_hline(yintercept = 0, lty = 2, col = "grey75") +
geom_point(pch = 3, cex = 3, col = "red") +
ggtitle(label = paste("Prediction for this observation =", Pred, sep = " ")) +
theme_bw(base_size = 12) +
theme(axis.text = element_text(size = 14),
axis.title = element_text(size = 16)) +
scale_x_continuous(breaks = c(0,1)) +
facet_wrap(facets = vars(Variable))