解决方案
这是与dplyr::bind_rows() 混合的基本 R 方法:
tmp <- lapply(1:length(myList), function(i) {
tapply(setNames(myList[[i]],
sapply(strsplit(names(myList[[i]]), ":"),
function(x) paste0(sort(x), collapse = ":"))),
sapply(strsplit(names(myList[[i]]), ":"),
function(x) paste0(sort(x), collapse = ":")), sum)
})
bind_rows(tmp, .id = "listNo") |>
pivot_longer(!listNo, names_to = "var", values_to = "count", values_drop_na = T)
# A tibble: 9 x 3
listNo var count
<chr> <chr> <dbl>
1 1 x1:x2 3
2 1 x3:x4 1
3 2 x1:x2 3
4 2 x3:x4 1
5 2 x1:x1 1
6 2 x1:x6 2
7 3 x1:x2 1
8 3 x3:x4 6
9 4 x2:x5 2
微基准
出于好奇,我在现有答案上运行了microbenchmark,似乎@ThomasIsCoding 的解决方案在时间上已经击败了@AllanCameron:
microbenchmark::microbenchmark(
Allan = {
`row.names<-`(do.call(rbind, Map(function(vec, name) {
x <- names(vec)
l <- sapply(strsplit(x, ":"), function(y) {
paste0("x", sort(as.numeric(sub("\\D", "", y))), collapse = ":")
})
df <- setNames(as.data.frame(table(rep(l, vec))), c("var", "count"))
df$listNo <- name
df
}, vec = myList, name = names(myList))), NULL)
},
benson23 = {
tmp <- lapply(1:length(myList), function(i) {
tapply(setNames(myList[[i]],
sapply(strsplit(names(myList[[i]]), ":"),
function(x) paste0(sort(x), collapse = ":"))),
sapply(strsplit(names(myList[[i]]), ":"),
function(x) paste0(sort(x), collapse = ":")), sum)
})
bind_rows(tmp, .id = "listNo") |>
pivot_longer(!listNo, names_to = "var", values_to = "count", values_drop_na = T)
},
tmfmnk = {
map_dfr(myList, enframe, .id = "listNo") %>%
mutate(var = map_chr(str_split(name, ":"), ~ str_c(sort(.), collapse = ":"))) %>%
group_by(listNo, var) %>%
summarise(count = sum(value))
},
zephryl = {
tibble(count = myList, listNo = names(myList)) %>%
unnest_longer(count, indices_to = "var") %>%
mutate(
var = str_extract_all(var, "\\d+"),
var = map_chr(var, ~ str_glue("x{sort(.x)[[1]]}:x{sort(.x)[[2]]}"))
) %>%
group_by(listNo, var) %>%
summarize(count = sum(count), .groups = "drop")
},
PaulS = {
map_dfr(myList, identity, .id = "listNo") %>%
pivot_longer(cols = -listNo, values_drop_na = T) %>%
rowwise %>%
mutate(name = str_split(name, ":", simplify = T) %>% sort %>%
str_c(collapse = ":")) %>%
group_by(name, listNo) %>%
summarise(count = sum(value), .groups = "drop")
},
TIC1 = {
aggregate(
count ~ .,
transform(
cbind(
setNames(do.call(rbind, Map(stack, myList)), c("count", "var")),
listNo = rep(seq_along(myList), lengths(myList))
),
var = sapply(
strsplit(as.character(var), ":"),
function(x) paste0(sort(x), collapse = ":")
)
),
sum
)
},
TIC2 = {
aggregate(
count ~ .,
cbind(
var = unlist(sapply(
myList,
function(x) {
sapply(
strsplit(names(x), ":"),
function(v) paste0(sort(v), collapse = ":")
)
}
)),
setNames(stack(myList), c("count", "listNo"))
),
sum
)
},
Maël = {
myList %>%
imap(~ .x %>%
enframe() %>%
separate(name, into = c("c1", "c2")) %>%
graph.data.frame(., directed = F) %>%
get.data.frame() %>%
group_by(from, to) %>%
summarise(count = sum(value)) %>%
unite(c("from","to"), col = "var", sep = ":") %>%
mutate(listNo = .y)) %>%
bind_rows()
})
Unit: milliseconds
expr min lq mean median uq max neval cld
Allan 2.1327 2.25920 2.475978 2.33445 2.45270 12.3697 100 a
benson23 3.5083 3.80855 4.150929 4.03700 4.27685 13.3313 100 a
tmfmnk 5.4928 5.88520 6.324940 6.24190 6.66975 8.1777 100 ab
zephryl 10.1629 10.89110 14.813878 11.58475 12.14085 221.0931 100 c
PaulS 7.7565 8.44360 11.402325 9.10860 9.47480 124.1965 100 bc
TIC1 3.5233 3.88805 8.240207 4.06640 4.26765 202.9082 100 a c
TIC2 1.8722 2.03240 2.247993 2.13230 2.24045 10.7320 100 a
Maël 35.3066 39.52920 44.456091 40.96870 42.39480 170.8322 100 d