【问题标题】:Obtaining summary estimates across elements of a nested lists (or at different levels) [duplicate]跨嵌套列表(或不同级别)的元素获取汇总估计[重复]
【发布时间】:2020-10-28 04:06:03
【问题描述】:

我在获取嵌套列表元素的汇总估计时遇到了一些困难。请参阅下面的一个非常简化的示例。本质上,我想访问列表中的元素并轻松操作它们(例如,获取列表中的方法等)。有时元素处于相同的位置,有时它们处于不同的位置。我很确定在 tidyverse(或 base R)中有更简单直接的方法,但我似乎无法找到它。如果您能帮我解决这个问题,我将不胜感激。谢谢


library(reprex)
library(tidyverse)

#1) Creating a certain function that does some simulations
to_estimate <- function(){
  mean_age_female <- rnorm(1, mean=27, sd=1)
  mean_age_male <- rnorm(1, mean=30, sd=1)
  
  return(list(sim=cbind(mean_age_female,mean_age_male)))
}

to_estimate()
#> $sim
#>      mean_age_female mean_age_male
#> [1,]        26.35325      31.04636


#2) replicating these simulations and obtain estimates (mean, sd, percentile)
rep=4
res <- vector(mode = "list", length = rep)
set.seed(123)
for (i in 1:rep){
  res[[i]] <- to_estimate()
}
res
#> [[1]]
#> [[1]]$sim
#>      mean_age_female mean_age_male
#> [1,]        26.43952      29.76982
#> 
#> 
#> [[2]]
#> [[2]]$sim
#>      mean_age_female mean_age_male
#> [1,]        28.55871      30.07051
#> 
#> 
#> [[3]]
#> [[3]]$sim
#>      mean_age_female mean_age_male
#> [1,]        27.12929      31.71506
#> 
#> 
#> [[4]]
#> [[4]]$sim
#>      mean_age_female mean_age_male
#> [1,]        27.46092      28.73494


#Summarizing the results

means_female <- mean(res[[1]]$sim[,"mean_age_female"],
     res[[2]]$sim[,"mean_age_female"],
     res[[3]]$sim[,"mean_age_female"],
     res[[4]]$sim[,"mean_age_female"])

sd_female <-sd(c(res[[1]]$sim[,"mean_age_female"],
     res[[2]]$sim[,"mean_age_female"],
     res[[3]]$sim[,"mean_age_female"],
     res[[4]]$sim[,"mean_age_female"]))


upper_female <-quantile(c(res[[1]]$sim[,"mean_age_female"],
         res[[2]]$sim[,"mean_age_female"],
         res[[3]]$sim[,"mean_age_female"],
         res[[4]]$sim[,"mean_age_female"]), 0.975)

lower_female <-quantile(c(res[[1]]$sim[,"mean_age_female"],
                   res[[2]]$sim[,"mean_age_female"],
                   res[[3]]$sim[,"mean_age_female"],
                   res[[4]]$sim[,"mean_age_female"]), 0.025)

res_female <- cbind(means_female, sd_female, lower_female, upper_female)
rownames(res_female) <- NULL

res_female
#>      means_female sd_female lower_female upper_female
#> [1,]     26.43952 0.8835687     26.49126     28.47637


means_male <- mean(res[[1]]$sim[,"mean_age_male"],
                     res[[2]]$sim[,"mean_age_male"],
                     res[[3]]$sim[,"mean_age_male"],
                     res[[4]]$sim[,"mean_age_male"])

sd_male <-sd(c(res[[1]]$sim[,"mean_age_male"],
                 res[[2]]$sim[,"mean_age_male"],
                 res[[3]]$sim[,"mean_age_male"],
                 res[[4]]$sim[,"mean_age_male"]))


upper_male <-quantile(c(res[[1]]$sim[,"mean_age_male"],
                          res[[2]]$sim[,"mean_age_male"],
                          res[[3]]$sim[,"mean_age_male"],
                          res[[4]]$sim[,"mean_age_male"]), 0.975)

lower_male <-quantile(c(res[[1]]$sim[,"mean_age_male"],
                          res[[2]]$sim[,"mean_age_male"],
                          res[[3]]$sim[,"mean_age_male"],
                          res[[4]]$sim[,"mean_age_male"]), 0.025)

res_male <- cbind(means_male, sd_male, lower_male, upper_male)
rownames(res_male) <- NULL



cbind(res_female, res_male) %>% 
  data.frame() %>% 
  pivot_longer(cols = everything(),
       names_to = c(".value", "gender"),
       names_sep = "_",
       values_to = c("gender", "means", "sd", "lower", "upper")) 
#> # A tibble: 2 x 5
#>   gender means    sd lower upper
#>   <chr>  <dbl> <dbl> <dbl> <dbl>
#> 1 female  26.4 0.884  26.5  28.5
#> 2 male    29.8 1.24   28.8  31.6

#is there a function in tidyverse that can do this automatically such as
# in purrr package with map or modify?


【问题讨论】:

    标签: r list nested tidyverse


    【解决方案1】:

    你可以试试这个:

    #For female
    lapply(res,function(x) mean(x$sim[1]))
    #For male
    lapply(res,function(x) mean(x$sim[2]))
    #Entire df
    res <- lapply(res, as.data.frame)
    df <- do.call(rbind,res)
    
      sim.mean_age_female sim.mean_age_male
    1            26.43952          29.76982
    2            28.55871          30.07051
    3            27.12929          31.71506
    4            27.46092          28.73494
    
    colMeans(df)
    
    sim.mean_age_female   sim.mean_age_male 
               27.39711            30.07258 
    

    【讨论】:

    • 谢谢@Duck。
    【解决方案2】:

    我会在 tidyverse 中这样做...

    library(tidyverse)
    tibble(RunId = 1:4) %>% # There are 4 runs of the sim...
      mutate(Results = map(RunId, ~ to_estimate()$sim %>% as_tibble())) %>% # Get results for each run
      unnest(cols = c(Results)) %>% # unnest them
      gather(Key, Value, -RunId) %>% # make the frame long
      separate(Key, c("junk", "junk2", "gender")) %>% # pull out gender from the key name...
      select(-junk, -junk2) %>% # get rid of the junk
      group_by(gender) %>% # group by gender
      summarize(means = mean(Value),
                sd = sd(Value),
                lower = quantile(Value, .025),
                upper = quantile(Value, .975)) # calculate your summary statistics
    

    ...导致这个框架:

    【讨论】:

    • 谢谢@Jason
    • @SimRock 不客气!
    猜你喜欢
    • 1970-01-01
    • 2017-10-19
    • 1970-01-01
    • 2021-12-03
    • 1970-01-01
    • 2016-06-21
    • 2011-08-27
    • 2015-03-01
    • 2021-06-10
    相关资源
    最近更新 更多