使用split from base R 可以提高当前解决方案的效率
system.time({
a <- t(sapply(unique(vals), function(uv) {
w = which(uv == vals)
c(w[1], w[length(w)])
}))
})
# user system elapsed
# 4.75 1.60 6.39
system.time({
a1 <- do.call(rbind, lapply(split(seq_along(vals), vals),
function(x) x[c(1, length(x))]))[as.character(unique(vals)),]
})
# user system elapsed
# 0.09 0.00 0.09
all.equal(a, a1, check.attributes = FALSE)
#[1] TRUE
或者另一个选项是match/fmatch,发现它比split慢
library(fastmatch)
system.time({
a2 <- cbind(fmatch(unique(vals), vals), length(vals) - fmatch(unique(vals), rev(vals)) + 1)
})
# user system elapsed
# 0.45 0.25 0.70
all.equal(a, a2, check.attributes = FALSE)
#[1] TRUE
数据
set.seed(24)
vals <- sample(1:100, 1e7, replace = TRUE)