tidyverse 中的另一个选项是获取 distinct 行,按“物种”分组,summarise“发生”作为行数 (n()) 并使用 str_c(来自 @987654326 @ - tidyverse 包的一部分 - 当有 NA 元素时也会给出不同的行为)到 collapse 元素到单个字符串中
library(dplyr)
library(stringr)
df %>%
distinct() %>%
group_by(species) %>%
summarise(occurrence = n(),
BIN = str_c(unique(BIN), collapse = ","))
# A tibble: 7 x 3
# species occurrence BIN
# <chr> <int> <chr>
#1 Clupea_harengus 1 BOLD:AAB7944
#2 Eutrigla_gurnardus 2 BOLD:AAC0262,BOLD:AAE9187
#3 Fundulus_rubrifrons 1 BOLD:AAI7245
#4 Gadus_morhua 1 BOLD:ACF1143
#5 Sprattus_sprattus 1 BOLD:AAE9187
#6 Tilapia_guineensis 1 BOLD:AAL5979
#7 Tilapia_zillii 1 BOLD:AAB9042
如果有 NA 元素,则行为略有不同(除非我们先处理 NAs)
paste(c(NA, 'a', 'b'), collapse=",")
#[1] "NA,a,b"
str_c(c(NA, 'a', 'b'), collapse=",")
#[1] NA
数据
df <- structure(list(species = c("Tilapia_guineensis", "Tilapia_zillii",
"Fundulus_rubrifrons", "Eutrigla_gurnardus", "Eutrigla_gurnardus",
"Sprattus_sprattus", "Gadus_morhua", "Clupea_harengus"), BIN = c("BOLD:AAL5979",
"BOLD:AAB9042", "BOLD:AAI7245", "BOLD:AAC0262", "BOLD:AAE9187",
"BOLD:AAE9187", "BOLD:ACF1143", "BOLD:AAB7944")),
class = "data.frame", row.names = c("2",
"3", "4", "5", "6", "7", "8", "9"))