我们可以使用dplyr按'v1'分组,得到'v3'的range的difference小于6
library(dplyr)
df1 %>%
group_by(v1)%>%
filter(abs(diff(range(v3))) >= 6)
# A tibble: 7 x 3
# Groups: v1 [3]
# v1 v2 v3
# <chr> <int> <int>
#1 a 2 13
#2 c 2 1
#3 e 1 2
#4 a 2 4
#5 a 8 1
#6 e 1 9
#7 c 2 8
或者我们可以arrange 列'v3',然后filter first 和last 值的差异
df1 %>%
arrange(v1, v3) %>%
group_by(v1) %>%
filter(last(v3) - first(v3) >=6)
或者data.table
library(data.table)
setDT(df1)[, .SD[abs(diff(range(v3))) >= 6], by = v1]
或者另一个选项是.I
setDT(df1)[df1[, .I[abs(diff(range(v3))) >= 6], by = v1]$V1]
或者另一个选项是ave from base R
i1 <- with(df1, ave(v3, v1, FUN = function(x) abs(diff(range(x)))) >= 6)
df1[i1,]
或者使用subset和tapply
subset(df1, v1 %in% names(which(tapply(v3, v1,
function(x) diff(range(x))) >=6)))
数据
df1 <- structure(list(v1 = c("a", "b", "c", "d", "e", "a", "a", "e",
"b", "c", "d"), v2 = c(2L, 5L, 2L, 2L, 1L, 2L, 8L, 1L, 0L, 2L,
1L), v3 = c(13L, 3L, 1L, 1L, 2L, 4L, 1L, 9L, 1L, 8L, 5L)),
class = "data.frame", row.names = c(NA,
-11L))