【问题标题】:How to plot/extract the BIC values from the step function如何从阶跃函数中绘制/提取 BIC 值
【发布时间】:2020-02-01 22:07:18
【问题描述】:

我需要使用 ggplot 绘制阶跃函数中每个回归步骤的 BIC 值。我不知道如何使用 ggplot 绘制每个步骤的 BIC 值。

form_model <- formula(lm(price~sqft_living+sqft_lot+waterfront+sqft_above+sqft_basement+years_since_renovations+age_of_house+grade_int+bed_int+bath_int+floors_dummy+view_dummy+condition_dummy+basement_dummy+renovated_dummy+weekend_dummy))
mod <- lm(price~1)
n <- (nrow(House_Regr))
forwardBIC <- step(mod,form_model,direction = "forward", k=log(n) )

这是我正在使用的模型。

Start:  AIC=181611.1
price ~ 1

                          Df  Sum of Sq        RSS    AIC
+ sqft_living              1 5.5908e+16 6.9104e+16 178111
+ grade_int                1 4.2600e+16 8.2413e+16 179154
+ sqft_above               1 3.8988e+16 8.6024e+16 179407
+ view_dummy               1 1.5755e+16 1.0926e+17 180822
+ sqft_basement            1 1.1560e+16 1.1345e+17 181045
+ bed_int                  1 1.0586e+16 1.1443e+17 181096
+ floors_dummy             1 8.6756e+15 1.1634e+17 181194
+ waterfront               1 8.1097e+15 1.1690e+17 181223
+ basement_dummy           1 3.8336e+15 1.2118e+17 181435
+ bath_int                 1 2.1104e+15 1.2290e+17 181519
+ renovated_dummy          1 1.3665e+15 1.2365e+17 181555
+ years_since_renovations  1 8.6785e+14 1.2414e+17 181579
+ sqft_lot                 1 8.2901e+14 1.2418e+17 181580
+ condition_dummy          1 6.4654e+14 1.2437e+17 181589
<none>                                  1.2501e+17 181611
+ age_of_house             1 1.7600e+14 1.2484e+17 181611
+ weekend_dummy            1 9.3267e+11 1.2501e+17 181620

Step:  AIC=178111
price ~ sqft_living

                          Df  Sum of Sq        RSS    AIC
+ view_dummy               1 4.7046e+15 6.4399e+16 177702
+ age_of_house             1 4.5059e+15 6.4598e+16 177721
+ waterfront               1 4.3957e+15 6.4708e+16 177731
+ grade_int                1 3.1890e+15 6.5915e+16 177840
+ years_since_renovations  1 3.0576e+15 6.6046e+16 177852
+ bed_int                  1 1.7778e+15 6.7326e+16 177965
+ bath_int                 1 1.7527e+15 6.7351e+16 177968
+ renovated_dummy          1 7.2312e+14 6.8381e+16 178057
+ basement_dummy           1 3.1144e+14 6.8793e+16 178093
+ sqft_above               1 1.6922e+14 6.8935e+16 178105
+ sqft_basement            1 1.6922e+14 6.8935e+16 178105
+ sqft_lot                 1 1.2746e+14 6.8977e+16 178109
<none>                                  6.9104e+16 178111
+ condition_dummy          1 3.6244e+13 6.9068e+16 178117
+ floors_dummy             1 1.0259e+13 6.9094e+16 178119
+ weekend_dummy            1 5.9534e+12 6.9098e+16 178119

这是回归的一个小输出。我需要使用 ggplot 绘制每个步骤的 BIC 值。我的想法是只提取每个步骤的 BIC 值,然后使用 ggplot 绘制它们,但正如我所说,我不知道如何实现这一点,或者提取 BIC 是否对于 ggplot 是必要的。

我将如何在 ggplot 上为回归中的每个步骤绘制 BIC?

【问题讨论】:

    标签: r ggplot2 regression


    【解决方案1】:

    我不建议通常这样做,所以如果有使用真实函数的答案,那就去吧。这里调用了一个函数:extractAIC,它存储结果,然后打印这些表。您可以通过在控制台中键入 step 函数来获取它。快速扫描告诉我,在这个函数内部的变量aod 中,它存储了它为每次迭代打印的表。

    一个 hacky 方法是在此函数中创建一个列表,每次更改时使用表更新列表,然后将其添加到响应中(通常的方式)或将其分配给全局环境(不好的方式) .由于我对阶跃函数的响应类一无所知,所以我选择了不好的方法。完整的功能在这里。您可以搜索 # (!) addition 标志以查看我将其添加到的位置。

    AIC 列包含 BIC 值。当您更改 step 调用中的 k 值时,您可以看到它发生了变化

    希望这对你有用,我正在使用 step 函数中的示例

    step2 <- function (object, scope, scale = 0, direction = c("both", "backward", 
                                                      "forward"), trace = 1, keep = NULL, steps = 1000, k = 2, 
              ...) 
    {
    
      # (!) addition
      aod.all <- list()
    
      mydeviance <- function(x, ...) {
        dev <- deviance(x)
        if (!is.null(dev)) 
          dev
        else extractAIC(x, k = 0)[2L]
      }
      cut.string <- function(string) {
        if (length(string) > 1L) 
          string[-1L] <- paste0("\n", string[-1L])
        string
      }
      re.arrange <- function(keep) {
        namr <- names(k1 <- keep[[1L]])
        namc <- names(keep)
        nc <- length(keep)
        nr <- length(k1)
        array(unlist(keep, recursive = FALSE), c(nr, nc), list(namr, 
                                                               namc))
      }
      step.results <- function(models, fit, object, usingCp = FALSE) {
        change <- sapply(models, "[[", "change")
        rd <- sapply(models, "[[", "deviance")
        dd <- c(NA, abs(diff(rd)))
        rdf <- sapply(models, "[[", "df.resid")
        ddf <- c(NA, diff(rdf))
        AIC <- sapply(models, "[[", "AIC")
        heading <- c("Stepwise Model Path \nAnalysis of Deviance Table", 
                     "\nInitial Model:", deparse(formula(object)), "\nFinal Model:", 
                     deparse(formula(fit)), "\n")
        aod <- data.frame(Step = I(change), Df = ddf, Deviance = dd, 
                          `Resid. Df` = rdf, `Resid. Dev` = rd, AIC = AIC, 
                          check.names = FALSE)
        if (usingCp) {
          cn <- colnames(aod)
          cn[cn == "AIC"] <- "Cp"
          colnames(aod) <- cn
        }
        attr(aod, "heading") <- heading
        fit$anova <- aod
        fit
      }
      Terms <- terms(object)
      object$call$formula <- object$formula <- Terms
      md <- missing(direction)
      direction <- match.arg(direction)
      backward <- direction == "both" | direction == "backward"
      forward <- direction == "both" | direction == "forward"
      if (missing(scope)) {
        fdrop <- numeric()
        fadd <- attr(Terms, "factors")
        if (md) 
          forward <- FALSE
      }
      else {
        if (is.list(scope)) {
          fdrop <- if (!is.null(fdrop <- scope$lower)) 
            attr(terms(update.formula(object, fdrop)), "factors")
          else numeric()
          fadd <- if (!is.null(fadd <- scope$upper)) 
            attr(terms(update.formula(object, fadd)), "factors")
        }
        else {
          fadd <- if (!is.null(fadd <- scope)) 
            attr(terms(update.formula(object, scope)), "factors")
          fdrop <- numeric()
        }
      }
      models <- vector("list", steps)
      if (!is.null(keep)) 
        keep.list <- vector("list", steps)
      n <- nobs(object, use.fallback = TRUE)
      fit <- object
      bAIC <- extractAIC(fit, scale, k = k, ...)
      edf <- bAIC[1L]
      bAIC <- bAIC[2L]
      if (is.na(bAIC)) 
        stop("AIC is not defined for this model, so 'step' cannot proceed")
      if (bAIC == -Inf) 
        stop("AIC is -infinity for this model, so 'step' cannot proceed")
      nm <- 1
      if (trace) {
        cat("Start:  AIC=", format(round(bAIC, 2)), "\n", cut.string(deparse(formula(fit))), 
            "\n\n", sep = "")
        flush.console()
      }
      models[[nm]] <- list(deviance = mydeviance(fit), df.resid = n - 
                             edf, change = "", AIC = bAIC)
      if (!is.null(keep)) 
        keep.list[[nm]] <- keep(fit, bAIC)
      usingCp <- FALSE
      while (steps > 0) {
        steps <- steps - 1
        AIC <- bAIC
        ffac <- attr(Terms, "factors")
        scope <- factor.scope(ffac, list(add = fadd, drop = fdrop))
        aod <- NULL
        change <- NULL
        if (backward && length(scope$drop)) {
          aod <- drop1(fit, scope$drop, scale = scale, trace = trace, 
                       k = k, ...)
          rn <- row.names(aod)
          row.names(aod) <- c(rn[1L], paste("-", rn[-1L]))
          if (any(aod$Df == 0, na.rm = TRUE)) {
            zdf <- aod$Df == 0 & !is.na(aod$Df)
            change <- rev(rownames(aod)[zdf])[1L]
          }
        }
        if (is.null(change)) {
          if (forward && length(scope$add)) {
            aodf <- add1(fit, scope$add, scale = scale, trace = trace, 
                         k = k, ...)
            rn <- row.names(aodf)
            row.names(aodf) <- c(rn[1L], paste("+", rn[-1L]))
            aod <- if (is.null(aod)) 
              aodf
            else rbind(aod, aodf[-1, , drop = FALSE])
          }
          attr(aod, "heading") <- NULL
          nzdf <- if (!is.null(aod$Df)) 
            aod$Df != 0 | is.na(aod$Df)
          aod <- aod[nzdf, ]
          if (is.null(aod) || ncol(aod) == 0) 
            break
          nc <- match(c("Cp", "AIC"), names(aod))
          nc <- nc[!is.na(nc)][1L]
          o <- order(aod[, nc])
    
          # (!) addition
          aod.all <- c(aod.all, list(aod))
    
          if (trace) 
            print(aod[o, ])
          if (o[1L] == 1) 
            break
          change <- rownames(aod)[o[1L]]
        }
        usingCp <- match("Cp", names(aod), 0L) > 0L
        fit <- update(fit, paste("~ .", change), evaluate = FALSE)
        fit <- eval.parent(fit)
        nnew <- nobs(fit, use.fallback = TRUE)
        if (all(is.finite(c(n, nnew))) && nnew != n) 
          stop("number of rows in use has changed: remove missing values?")
        Terms <- terms(fit)
        bAIC <- extractAIC(fit, scale, k = k, ...)
        edf <- bAIC[1L]
        bAIC <- bAIC[2L]
        if (trace) {
          cat("\nStep:  AIC=", format(round(bAIC, 2)), "\n", 
              cut.string(deparse(formula(fit))), "\n\n", sep = "")
          flush.console()
        }
        if (bAIC >= AIC + 1e-07) 
          break
        nm <- nm + 1
        models[[nm]] <- list(deviance = mydeviance(fit), df.resid = n - 
                               edf, change = change, AIC = bAIC)
        if (!is.null(keep)) 
          keep.list[[nm]] <- keep(fit, bAIC)
      }
      if (!is.null(keep)) 
        fit$keep <- re.arrange(keep.list[seq(nm)])
    
      # (!) addition
      assign("aod.all", aod.all, envir = .GlobalEnv)
      step.results(models = models[seq(nm)], fit, object, usingCp)
    }
    
    lm1 <- lm(Fertility ~ ., data = swiss)
    slm1 <- step2(lm1)
    aod.all
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2019-11-30
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2020-12-16
      • 2015-10-27
      • 1970-01-01
      相关资源
      最近更新 更多