在循环中合并和输出表答案

【问题标题】：combine and output tables in loop在循环中合并和输出表
【发布时间】：2022-12-18 15:32:57
【问题描述】：

我正在使用一个循环来获取和检查一系列文件的统计信息。对于每个文件，我（1）检查它对每个状态有多少观察，（2）检查它总共有多少个 ID 以及它有多少个唯一 ID； (3) 检查它总共有多少个 id，以及它有多少个特定状态的唯一 id。

我目前只是在循环中打印出答案，但我想将它输出到两个表中：第一个是每个状态的观察计数，第二个是特定状态的 id 和 id 的计数.这个虚拟数据创建了我正在做的事情的（非常粗略的）近似值：

library(tidyverse)
library(dplyr)
library(purrr)


for(x in 1:100){
  print(x)
  df <- data.frame(
    state =  sample(c(0:72), 1000, replace = TRUE),
    id =  sample(c(100:999), 1000, replace = TRUE)
  )
  
  #table
  print(table(df$state))
  
  #full counts
  print(length(df$id))
  print(length(unique(df$id)))
  
  #specific counts
  print(length(df$id[df$state == 72]))
  print(length(unique(df$id[df$state == 72])))
  
}

我在数据框/表中想要的输出基本上是：

1：1:100 中 x 的每个状态的计数；是这样的：

    x state01 state02 state03
1   1      43     772     455
2   2     509     759     619
3   3     269     930     313
4   4     702     983     120
5   5     455      68     735
6   6     708      12     812
7   7     221     334      25
8   8     746     155     134
9   9     150     831     468
10 10     415     867     261

2. id 的唯一 id 的计数，以及 1:100 中 x 的特定状态的 id 和唯一 id 的计数。或者，像这样：

    x   id uid id72 uid72
1   1 1000 395  423   150
2   2 1000 352  541   100
3   3 1000  86  180   32
4   4 1000 202  718   105
5   5 1000 839  135   135
6   6 1000  79  897    30
7   7 1000 437   91    91
8   8 1000 387  287   101
9   9 1000 102  225    85
10 10 1000 310  998   103

【问题讨论】：

标签： r

【解决方案1】：

不确定这是否正确，因为“表 2”与您的示例不符，但这种方法能解决您的问题吗？

library(tidyverse)

results_table_1 <- list()
results_table_2 <- list()

for(x in 1:100){
  df <- data.frame(
    state = sample(c(0:72), 1000, replace = TRUE),
    id = sample(c(100:999), 1000, replace = TRUE)
  )
  
  results_table_1[[x]] <- df %>%
    mutate(rn = x) %>%
    group_by(state) %>%
    summarise(count = n(), rn) %>%
    ungroup() %>%
    arrange(state) %>%
    pivot_wider(id_cols = rn,
                names_from = state,
                values_from = count,
                names_prefix = "state_",
                values_fn = unique)
  
  results_table_2[[x]] <- df %>%
    mutate(rn = x,
           id_tmp = id) %>%
    mutate(uid = n_distinct(id_tmp),
           id = length(id)) %>%
    filter(state == 72) %>%
    mutate(id72 = length(id_tmp),
           uid72 = n_distinct(id_tmp)) %>%
    select(-c(state, id_tmp)) %>%
    relocate(rn, .before = id) %>%
    distinct()
}

results_table_1_df <- bind_rows(results_table_1)
results_table_2_df <- bind_rows(results_table_2)

results_table_1_df
#> # A tibble: 100 × 74
#>       rn state_0 state_1 state_2 state_3 state_4 state_5 state_6 state_7 state_8
#>    <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>   <int>
#>  1     1      16       9      12      12      16      16      12      14       8
#>  2     2      10      17      18      16      16      11      18      20      17
#>  3     3      10      12       9      11      14      12      10      10      15
#>  4     4      10      12      15      17      12      11      13       5      15
#>  5     5      16      15       9      17      19      17      14      14      13
#>  6     6      19      17      11      13      14      15      16      14      13
#>  7     7      11      16      11      13      18      13       6      22      16
#>  8     8      17      12      15       9      13      16      19      14      11
#>  9     9      17      15      15       8      11      13      15      21      14
#> 10    10      18      12      17      12      15      12      13      17      17
#> # … with 90 more rows, and 64 more variables: state_9 <int>, state_10 <int>,
#> #   state_11 <int>, state_12 <int>, state_13 <int>, state_14 <int>,
#> #   state_15 <int>, state_16 <int>, state_17 <int>, state_18 <int>,
#> #   state_19 <int>, state_20 <int>, state_21 <int>, state_22 <int>,
#> #   state_23 <int>, state_24 <int>, state_25 <int>, state_26 <int>,
#> #   state_27 <int>, state_28 <int>, state_29 <int>, state_30 <int>,
#> #   state_31 <int>, state_32 <int>, state_33 <int>, state_34 <int>, …

head(results_table_2_df)
#>      rn   id uid id72 uid72
#> 1     1 1000 597    9     9
#> 2     2 1000 618   12    12
#> 3     3 1000 611   17    17
#> 4     4 1000 596   11    11
#> 5     5 1000 588   12    12
#> 6     6 1000 614   26    25

^{创建于 2022-12-07 reprex v2.0.2}

【讨论】：