【发布时间】:2021-01-25 12:20:10
【问题描述】:
我正在运行多个 GAM 模型,需要查看和比较这些模型的摘要输出。我想要一种快速有效的方法来从模型中提取和编译汇总统计信息,但还没有找到这样做的方法。
下面提供了一个示例数据集:
example.data <- structure(list(response = c(1.47, 0.84, 1.99, 2.29, 4.14, 4.47,
2.71, 1.67, 4.12, 1.67, 2.03, 1.74, 0.98, 0.96, 0.56, 2.45, 1.31,
3.06, 2.35, 3.2, 1.16, 2.07, 0.99, 1.35, 1.02, 2.92, 1.8, 2.17,
2.56, 1.56, 2.33, 3.19, 1.53, 2.94, 3.28, 1.53, 2.8, 5.53, 1.26,
2.43, 3.5, 2.22, 3.73, 2.46, 2.16, 1.99, 3.34, 2.63, 2.51, 1.78
), predictor1 = c(17, 14.4, 99.45, 10.8, 54.25, 55.1, 40, 9,
54.25, 14.4, 14.4, 17, 14.4, 17, 10.8, 54.25, 54.25, 15.3, 55.1,
54.25, 14.4, 58, 17, 53.425, 58, 40.45, 14.4, 12.75, 91.05, 6.24,
100.25, 77.25, 43.4, 183.6, 91.05, 9.84, 100.25, 64, 10, 10,
91.05, 8.25, 100.25, 54.25, 89.4, 9.84, 10.8, 54.25, 10.8, 54.25
), predictor2 = c(165.7, 177.3, 594.2, 192.5, 426.2, 270.8, 244,
236.1, 416, 175.8, 258.6, 233.5, 115.8, 141, 153.5, 414.2, 438.9,
203, 261.4, 357.8, 148, 205.5, 137.4, 214.7, 167.8, 371.4, 179.9,
273.7, 567.2, 231.5, 355.3, 270, 319.5, 301.9, 301.9, 215.5,
256.5, 417, 231.8, 284.6, 396.3, 323, 458.4, 290, 203, 198, 350.8,
338, 323.5, 264.7), predictor3 = c(829.8, 841, 903.6, 870.3,
794, 745, 845.2, 906.5, 890.3, 874.2, 805.4, 828.8, 872, 854.7,
912.2, 790.8, 759.2, 855.1, 741.6, 961.8, 839.9, 805.1, 885.2,
887.8, 833.9, 1050.9, 787.5, 837, 731.9, 774.4, 820.8, 995.8,
916.3, 1032.1, 1014.3, 773.7, 846.4, 723.7, 764.2, 708.3, 1009.3,
1053.7, 751.7, 901.1, 848.7, 796.5, 697.1, 733.6, 725.6, 856.6
)), row.names = c(50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L,
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L,
73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L,
86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L,
99L, 100L), class = "data.frame")
现在,我这样做的简单而低效的方式是这样的:
library(mgcv)
mod1 = gam(response ~ s(predictor1), data=example.data)
mod2 = gam(response ~ s(predictor2), data=example.data)
mod3 = gam(response ~ s(predictor3), data=example.data)
mod.names <- c("mod1", "mod2", "mod3")
mod.predictors <- c("predictor1", "predictor2", "predictor3")
mod.rsq <- c(summary(mod1)$r.sq, summary(mod2)$r.sq, summary(mod3)$r.sq)
mod.AIC <- c(AIC(mod1), AIC(mod2), AIC(mod3))
summary.data <- data.frame(mod.names,
mod.rsq,
mod.AIC,
mod.predictors)
summary.data
然后我可以从汇总表中相应地选择模型。
我在实际数据中有一百多个潜在的预测变量,手动指定所有模型及其输出显然很费力,因此需要更自动化的替代方案。
【问题讨论】:
-
您是否一次只对一个预测变量感兴趣,就像在您的示例中一样?那将是最容易自动化的。或者你想看更多的模型?