给定不同数据框的列表，映射和绑定不同的数据框答案

【问题标题】：Map and bind different dataframes given a list of different dataframes给定不同数据框的列表，映射和绑定不同的数据框
【发布时间】：2019-04-08 17:25:51
【问题描述】：

给定一个函数foo，它输出一个包含 3 个数据帧的列表：

structure(list(isSameUser = structure(1:2, .Label = c("0", "1"
), class = "factor"), n = c(212L, 72L), cum_n = c(212L, 284L), 
    user_id = c(1, 1)), .Names = c("isSameUser", "n", "cum_n", 
"user_id"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-2L))

structure(list(isSameUser = structure(1:2, .Label = c("0", "1"
), class = "factor"), n = c(54L, 18L), cum_n = c(54L, 72L), user_id = c(1, 
1)), .Names = c("isSameUser", "n", "cum_n", "user_id"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -2L))

structure(list(error_abs_perc = 0.0694444444444444, user_id = 1), .Names = c("error_abs_perc", 
"user_id"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-1L))

我有 1000 个用户 ----> 意味着我有 1000*3 个数据帧，第一个数据帧有 1000 个，第二个数据帧有 1000 个，第三个数据帧有 1000 个。请告知我如何map_dfr 并获得 3 个绑定的数据帧。

我正在尝试使用 imap_dfr 和 map_dfr 来做到这一点。

试过这个： Use Dplyr::Bind_Rows and Purrr to Selectively Bind Different Dataframes In a List of Dataframes

但这并不能解决我的问题。

根据@markus 请求：

list(structure(list(train_stats = structure(list(isSameUser = structure(1:2, .Label = c("0", 
"1"), class = "factor"), n = c(212L, 72L), cum_n = c(212L, 284L
), user_id = c(1L, 1L)), .Names = c("isSameUser", "n", "cum_n", 
"user_id"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-2L)), test_stats = structure(list(isSameUser = structure(1:2, .Label = c("0", 
"1"), class = "factor"), n = c(54L, 18L), cum_n = c(54L, 72L), 
    user_id = c(1L, 1L)), .Names = c("isSameUser", "n", "cum_n", 
"user_id"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-2L)), test_set_error = structure(list(error_abs_perc = 0.0694444444444444, 
    user_id = 1L), .Names = c("error_abs_perc", "user_id"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L))), .Names = c("train_stats", 
"test_stats", "test_set_error")), structure(list(train_stats = structure(list(
    isSameUser = structure(1:2, .Label = c("0", "1"), class = "factor"), 
    n = c(238L, 46L), cum_n = c(238L, 284L), user_id = c(2L, 
    2L)), .Names = c("isSameUser", "n", "cum_n", "user_id"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -2L)), test_stats = structure(list(
    isSameUser = structure(1:2, .Label = c("0", "1"), class = "factor"), 
    n = c(60L, 12L), cum_n = c(60L, 72L), user_id = c(2L, 2L)), .Names = c("isSameUser", 
"n", "cum_n", "user_id"), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L)), test_set_error = structure(list(error_abs_perc = 0.0555555555555556, 
    user_id = 2L), .Names = c("error_abs_perc", "user_id"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L))), .Names = c("train_stats", 
"test_stats", "test_set_error")), structure(list(train_stats = structure(list(
    isSameUser = structure(1:2, .Label = c("0", "1"), class = "factor"), 
    n = c(232L, 52L), cum_n = c(232L, 284L), user_id = c(3L, 
    3L)), .Names = c("isSameUser", "n", "cum_n", "user_id"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -2L)), test_stats = structure(list(
    isSameUser = structure(1:2, .Label = c("0", "1"), class = "factor"), 
    n = c(58L, 14L), cum_n = c(58L, 72L), user_id = c(3L, 3L)), .Names = c("isSameUser", 
"n", "cum_n", "user_id"), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L)), test_set_error = structure(list(error_abs_perc = 0.0138888888888889, 
    user_id = 3L), .Names = c("error_abs_perc", "user_id"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L))), .Names = c("train_stats", 
"test_stats", "test_set_error")), structure(list(train_stats = structure(list(
    isSameUser = structure(1:2, .Label = c("0", "1"), class = "factor"), 
    n = c(224L, 60L), cum_n = c(224L, 284L), user_id = c(4L, 
    4L)), .Names = c("isSameUser", "n", "cum_n", "user_id"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -2L)), test_stats = structure(list(
    isSameUser = structure(1:2, .Label = c("0", "1"), class = "factor"), 
    n = c(56L, 16L), cum_n = c(56L, 72L), user_id = c(4L, 4L)), .Names = c("isSameUser", 
"n", "cum_n", "user_id"), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L)), test_set_error = structure(list(error_abs_perc = 0.0694444444444444, 
    user_id = 4L), .Names = c("error_abs_perc", "user_id"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L))), .Names = c("train_stats", 
"test_stats", "test_set_error")), structure(list(train_stats = structure(list(
    isSameUser = structure(1:2, .Label = c("0", "1"), class = "factor"), 
    n = c(232L, 52L), cum_n = c(232L, 284L), user_id = c(5L, 
    5L)), .Names = c("isSameUser", "n", "cum_n", "user_id"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -2L)), test_stats = structure(list(
    isSameUser = structure(1:2, .Label = c("0", "1"), class = "factor"), 
    n = c(58L, 14L), cum_n = c(58L, 72L), user_id = c(5L, 5L)), .Names = c("isSameUser", 
"n", "cum_n", "user_id"), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L)), test_set_error = structure(list(error_abs_perc = 0.0138888888888889, 
    user_id = 5L), .Names = c("error_abs_perc", "user_id"), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -1L))), .Names = c("train_stats", 
"test_stats", "test_set_error")))

【问题讨论】：

@markus，我试过了，但它给了我一个数据框，但我更喜欢在一个列表中保留 3 个数据框。计算这 3000 个数据帧，绑定每 1000 个数据帧的行并存储在一个列表中。我可以使用函数手动执行此操作，我认为有一种 purrr 方式。
获取所有 train_stats 数据帧并绑定它们，获取所有 test_stats 并将它们绑定在一起，最后绑定所有 test_set_error 数据帧
我想我现在明白了。您能否再次更新您的问题，因为应该至少有两个列表条目，其中给出了所有三个数据框。
@markus，你可以复制同一个，没关系。但我将使用lapply(1:5, function(i) foo(data, i)) 向您提供 5 个用户的总列表@

标签： r functional-programming dplyr purrr

【解决方案1】：

我们可以transpose你的列表，然后使用map和bind_rows

library(purrr)
map(transpose(lst), bind_rows)
#$train_stats
# A tibble: 10 x 4
#   isSameUser     n cum_n user_id
#   <fct>      <int> <int>   <int>
# 1 0            212   212       1
# 2 1             72   284       1
# 3 0            238   238       2
# 4 1             46   284       2
# 5 0            232   232       3
# 6 1             52   284       3
# 7 0            224   224       4
# 8 1             60   284       4
# 9 0            232   232       5
#10 1             52   284       5

#$test_stats
# A tibble: 10 x 4
#   isSameUser     n cum_n user_id
#   <fct>      <int> <int>   <int>
# 1 0             54    54       1
# 2 1             18    72       1
# 3 0             60    60       2
# 4 1             12    72       2
# 5 0             58    58       3
# 6 1             14    72       3
# 7 0             56    56       4
# 8 1             16    72       4
# 9 0             58    58       5
#10 1             14    72       5

#$test_set_error
# A tibble: 5 x 2
#  error_abs_perc user_id
#           <dbl>   <int>
#1         0.0694       1
#2         0.0556       2
#3         0.0139       3
#4         0.0694       4
#5         0.0139       5

【讨论】：