【发布时间】:2015-03-02 08:48:44
【问题描述】:
是否可以在使用 R 的分类树分析中获得节点的 p 值?我正在使用 rpart 并且无法找到每个节点的 p 值。也许这只能通过回归而不是类别来实现。
structure(list(subj = c(702L, 702L, 702L, 702L, 702L, 702L, 702L,
702L, 702L, 702L, 702L, 702L, 702L, 702L, 702L, 702L, 702L, 702L,
702L, 702L, 702L, 702L, 702L, 702L), visit = c(4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), run = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("A", "B", "C", "D", "E", "xdur", "xend60", "xpre"
), class = "factor"), ho = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), hph = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), longexer = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("10min", "60min"), class = "factor"),
esq_sick = c(NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L,
NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA), esq_sick2 = c(NA,
NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA,
NA, NA, 0L, NA, NA, NA, NA, NA), ll_sick = c(NA, NA, 0L,
NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA,
0L, NA, NA, NA, NA, NA), ll_sick2 = c(NA, NA, 0L, NA, NA,
NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA,
NA, NA, NA, NA), esq_01 = c(NA, NA, 2L, NA, NA, NA, NA, NA,
NA, NA, 2L, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA,
NA), esq_02 = c(NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA, 2L,
NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA), esq_03 = c(NA,
NA, 0L, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA,
NA, NA, 0L, NA, NA, NA, NA, NA), esq_04 = c(NA, NA, 0L, NA,
NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L,
NA, NA, NA, NA, NA), esq_05 = c(NA, NA, 0L, NA, NA, NA, NA,
NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA,
NA, NA), esq_06 = c(NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA,
1L, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA),
esq_07 = c(NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA,
NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA), esq_08 = c(NA,
NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA,
NA, NA, 0L, NA, NA, NA, NA, NA), esq_09 = c(NA, NA, 0L, NA,
NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L,
NA, NA, NA, NA, NA), esq_10 = c(NA, NA, 0L, NA, NA, NA, NA,
NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA,
NA, NA)), .Names = c("subj", "visit", "run", "ho", "hph",
"longexer", "esq_sick", "esq_sick2", "ll_sick", "ll_sick2", "esq_01",
"esq_02", "esq_03", "esq_04", "esq_05", "esq_06", "esq_07", "esq_08",
"esq_09", "esq_10"), row.names = 7:30, class = "data.frame")
alldata = read.table('symptomology CSV2.csv',header=TRUE,sep=",")
library(rpart)
fit <- rpart(esq_sick2~esq_01_bin + esq_02_bin + esq_03_bin + esq_04_bin + esq_05_bin + esq_06_bin + esq_07_bin + esq_08_bin + esq_09_bin + esq_10_bin + esq_11_bin + esq_12_bin + esq_13_bin + esq_14_bin + esq_15_bin + esq_16_bin + esq_17_bin + esq_18_bin + esq_19_bin + esq_20_bin, method="class", data=alldata)
plot(fit, uniform = FALSE, branch = 1, compress = FALSE, nspace, margin = 0.1, minbranch = 0.3)
text(fit, use.n=TRUE, all=TRUE, cex=.8)
【问题讨论】:
-
您可以通过添加一个简单的示例并指定一个感兴趣的特定“节点”来改进这个问题并可能阻止进一步的接近投票。
-
我不确定你的意思。你想要一张树的照片吗?我对所有节点都感兴趣
-
一个例子是生成感兴趣对象的数据和代码。我不是指“图片”。如果你只是想要一个关于树结构歧视推理的教程,那么你应该删除这个问题并在 CrossValidated.com 上重新发布。 SO 都是关于代码和数据的。
-
必须尝试编辑 OP,因为我不能在这里放多行
-
为了将来参考,当 SO 上的人向您询问数据和代码时,他们通常表示 reproducible example。也就是说,一个包含足以重现您的问题或演示您希望实现的目标的数据和代码的最小示例,并展示了您迄今为止所做的尝试。
标签: r binary-tree nodes