【问题标题】:How to run multiple linear regressions with different independent variables and dependent variables adding standardized coefficients in R?如何使用不同的自变量和因变量在 R 中添加标准化系数来运行多个线性回归?
【发布时间】:2019-10-04 20:55:19
【问题描述】:

我目前正在尝试运行一个循环,对具有多个因变量 (n=1000) 的多个自变量 (n = 6) 执行线性回归。

这是一些示例数据,年龄、性别和教育程度代表我感兴趣的自变量,而 testcore_* 是我的因变量。

df = data.frame(ID = c(1001, 1002, 1003, 1004, 1005, 1006,1007, 1008, 1009,   1010, 1011),
                    age = as.numeric(c('56', '43','59','74','61','62','69','80','40','55','58')),
                    sex = as.numeric(c('0','1','0','0','1','1','0','1','0','1','0')),
                    testscore_1 = as.numeric(c('23','28','30','15','7','18','29','27','14','22','24')),
                    testscore_2 = as.numeric(c('1','3','2','5','8','2','5','6','7','8','2')),
                    testscore_3 = as.numeric(c('18','20','19','15','20','23','19','25','10','14','12')),
                    education =  as.numeric(c('5','4','3','5','2', '1','4','4','3','5','2')))

我的工作代码允许我为多个 DV 运行回归模型(我敢肯定,更有经验的 R 用户会不喜欢它,因为它缺乏效率):

y <- as.matrix(df[4:6])
#model for age
lm_results <- lm(y ~ age, data = df)

write.csv((broom::tidy(lm_results)), "lm_results_age.csv")

regression_results <-broom::tidy(lm_results)
standardized_coefficients <- lm.beta(lm_results)
age_standardize_results <- coef(standardized_coefficients)

write.csv(age_standardize_results, "lm_results_age_standardized_coefficients.csv")

然后,我将通过手动将 age 替换为 sexeducation 来重复这一切

有没有人有更优雅的方式来运行这个 - 例如,通过所有感兴趣的 IV(即年龄、性别和教育)的循环?

如果有人建议将broom::tidy(lm_results)lm.beta::lm.beta 中的标准化系数结合起来,即将标准化回归系数与主模型输出相结合,我们将不胜感激。

【问题讨论】:

    标签: r loops linear-regression lapply purrr


    【解决方案1】:

    这是对我过去必须使用的类似工作流程的改编。记住要真正惩罚自己运行大量模型。我在您的数据框中添加了几个预测器列。祝你好运!!

    解决方案

    # Creating pedictor and outcome vectors
    ivs_vec <- names(df)[c(2:6, 10)]
    dvs_vec <- names(df)[7:9]
    
    # Creating formulas and running the models
    ivs <- paste0(" ~ ", ivs_vec)
    dvs_ivs <- unlist(lapply(ivs, function(x) paste0(dvs_vec, x)))
    formulas <- lapply(dvs_ivs, formula)
    
    lm_results <- lapply(formulas, function(x) {
      lm(x, data = df)
    })
    
    # Creating / combining results
    tidy_results <- lapply(lm_results, broom::tidy)
    dv_list <- lapply(as.list(stringi::stri_extract_first_words(dvs_ivs)), rep, 2)
    tidy_results <- Map(cbind, dv_list, tidy_results)
    
    standardized_results <- lapply(lm_results, function(x) coef(lm.beta::lm.beta(x)))
    combined_results <- Map(cbind, tidy_results, standardized_results)
    
    # Cleaning up final results
    names(combined_results) <- dvs_ivs
    combined_results <- lapply(combined_results, function(x) {row.names(x) <- c(NULL); x})
    
    new_names <- c("Outcome", "Term", "Estimate", "Std. Error", "Statistic", "P-value", "Standardized Estimate")
    combined_results <- lapply(combined_results, setNames, new_names)
    

    结果

    combined_results[1:5]
    
    $`testscore_1 ~ age`
      Outcome        Term    Estimate Std. Error Statistic   P-value 
    Standardized Estimate
    1 testscore_1 (Intercept) 18.06027731 12.3493569 1.4624468 0.1776424            0.00000000
    2 testscore_1         age  0.05835152  0.2031295 0.2872627 0.7804155            0.09531823
    
    $`testscore_2 ~ age`
          Outcome        Term   Estimate Std. Error Statistic   P-value Standardized Estimate
    1 testscore_2 (Intercept) 3.63788676 4.39014570 0.8286483 0.4287311             0.0000000
    2 testscore_2         age 0.01367313 0.07221171 0.1893478 0.8540216             0.0629906
    
    $`testscore_3 ~ age`
          Outcome        Term  Estimate Std. Error Statistic   P-value Standardized Estimate
    1 testscore_3 (Intercept) 6.1215175   6.698083 0.9139208 0.3845886             0.0000000
    2 testscore_3         age 0.1943125   0.110174 1.7636870 0.1116119             0.5068026
    
    $`testscore_1 ~ sex`
          Outcome        Term Estimate Std. Error  Statistic      P-value Standardized Estimate
    1 testscore_1 (Intercept)     22.5   3.099283  7.2597435 4.766069e-05             0.0000000
    2 testscore_1         sex     -2.1   4.596980 -0.4568217 6.586248e-01            -0.1505386
    
    $`testscore_2 ~ sex`
          Outcome        Term Estimate Std. Error Statistic     P-value Standardized Estimate
    1 testscore_2 (Intercept) 3.666667   1.041129  3.521816 0.006496884             0.0000000
    2 testscore_2         sex 1.733333   1.544245  1.122447 0.290723029             0.3504247
    

    数据

    df <- data.frame(ID = c(1001, 1002, 1003, 1004, 1005, 1006,1007, 1008, 1009,   1010, 1011),
                         age = as.numeric(c('56', '43','59','74','61','62','69','80','40','55','58')),
                         sex = as.numeric(c('0','1','0','0','1','1','0','1','0','1','0')),
                         pred1 = sample(1:11, 11),
                         pred2 = sample(1:11, 11),
                         pred3 = sample(1:11, 11),
                         testscore_1 = as.numeric(c('23','28','30','15','7','18','29','27','14','22','24')),
                         testscore_2 = as.numeric(c('1','3','2','5','8','2','5','6','7','8','2')),
                         testscore_3 = as.numeric(c('18','20','19','15','20','23','19','25','10','14','12')),
                         education =  as.numeric(c('5','4','3','5','2', '1','4','4','3','5','2')))
    

    【讨论】:

    • 这太有用了!!谢谢@Andrew!
    • 这工作绝对漂亮!!!!非常感谢您的所有帮助,安德鲁!
    • 如果没有您的帮助,我们无法做到!再次感谢@Andrew。祝你有美好的一天!
    • 嗨@Andrew,再次感谢您在今年早些时候提供的所有帮助。我想在我的代码中感谢您,您希望我怎么做?
    • 你好@Andrew!我想知道您是否对我在此处发布的其他问题有建议:stackoverflow.com/questions/60668630/… 它基于您在此答案中的代码。如果您有时间,将非常感谢您的建议! :)
    【解决方案2】:

    一年后偶然发现了这一点,并记录了tidyverse 解决方案与@Andrew 相同的数据。

    library(dplyr)
    library(purrr)
    library(tidyr)
    library(stringi)
    
    # Creating pedictor and outcome vectors
    ivs_vec <- names(df)[c(2:6, 10)]
    dvs_vec <- names(df)[7:9]
    
    # Creating formulas and running the models
    ivs <- paste0(" ~ ", ivs_vec)
    dvs_ivs <- unlist(map(ivs, ~paste0(dvs_vec, .x)))
    
    models <- map(setNames(dvs_ivs, dvs_ivs),
                  ~ lm(formula = as.formula(.x),
                       data = df))
    
    basics <-
       map(models, ~ broom::tidy(.)) %>%
       map2_df(.,
               names(.),
               ~ mutate(.x, which_dependent = .y)) %>%
       select(which_dependent, everything()) %>%
       mutate(term = gsub("\\(Intercept\\)", "Intercept", term),
              which_dependent = stringi::stri_extract_first_words(which_dependent))
    
    basics$std_estimate <-
       map_dfr(models, ~ coef(lm.beta::lm.beta(.)), .id = "which_dependent") %>%
       pivot_longer(.,
                    cols = -which_dependent,
                    names_to = "term",
                    values_to = "std_estimate",
                    values_drop_na = TRUE) %>%
       pull(std_estimate)
    
    basics
    #> # A tibble: 36 x 7
    #>    which_dependent term      estimate std.error statistic   p.value std_estimate
    #>    <chr>           <chr>        <dbl>     <dbl>     <dbl>     <dbl>        <dbl>
    #>  1 testscore_1     Intercept  18.1      12.3        1.46  0.178           0     
    #>  2 testscore_1     age         0.0584    0.203      0.287 0.780           0.0953
    #>  3 testscore_2     Intercept   3.64      4.39       0.829 0.429           0     
    #>  4 testscore_2     age         0.0137    0.0722     0.189 0.854           0.0630
    #>  5 testscore_3     Intercept   6.12      6.70       0.914 0.385           0     
    #>  6 testscore_3     age         0.194     0.110      1.76  0.112           0.507 
    #>  7 testscore_1     Intercept  22.5       3.10       7.26  0.0000477       0     
    #>  8 testscore_1     sex        -2.10      4.60      -0.457 0.659          -0.151 
    #>  9 testscore_2     Intercept   3.67      1.04       3.52  0.00650         0     
    #> 10 testscore_2     sex         1.73      1.54       1.12  0.291           0.350 
    #> # … with 26 more rows
    

    【讨论】:

      猜你喜欢
      • 2020-05-20
      • 1970-01-01
      • 1970-01-01
      • 2020-03-06
      • 1970-01-01
      • 2013-11-28
      • 1970-01-01
      • 2019-04-16
      • 2021-08-21
      相关资源
      最近更新 更多