或避免循环的 tidyverse 解决方案:
x <- tibble(Value=c(1,1,1,0,0,1,0,0,1,1,2,2,0,0,1,0,0,1,1,1,
0,0,2,2,1,1,2,0,0,1,0,2,0,2,1)) %>%
mutate(ModValue=ifelse(Value == 0, NA, Value)) %>%
fill(ModValue, .direction="down")
runLengths <- rle(x$ModValue)
groupIndex <- unlist(lapply(1:length(runLengths$lengths),
function(x) rep(x, runLengths$lengths[x]))
)
x <- x %>% add_column(Group=groupIndex) %>% select(-ModValue)
您的输入数据与预期输出的长度不同。我花了一些时间来解决这个问题... :)
** 编辑 **
还有一个不优雅的解决方案来解释不断变化的日子(或其他超级分组......
x <- tibble(
RowNumber=1:35,
Date=lubridate::ymd(c(rep("2020-05-31", 20), rep("2020-06-01", 15))),
Value=c(1,1,1,0,0,1,0,0,1,1,2,2,0,0,1,0,0,1,1,1,0,0,2,2,1,1,2,0,0,1,0,2,0,2,1))
# Check we have a change of date mid-sequence
x %>% filter(row_number() > 15 & row_number() < 25)
x <- x %>%
mutate(ModValue=ifelse(Value == 0, NA, Value)) %>%
fill(ModValue, .direction="down")
# Inelegantly compute the groups
make_groups <- function(x) {
runs <- rle(x)
return(tibble(GroupWithinDay=unlist(
lapply(1:length(runs$lengths),
function(x) rep(x, runs$lengths[x])))))
}
y <- x %>% group_by(Date) %>% do(make_groups(.$ModValue))
x <- x %>% add_column(GroupWithinDay=y$GroupWithinDay) %>% select(-ModValue)
# Check the change of date is handled correctly
x %>% filter(row_number() > 15 & row_number() < 25)
给予
# A tibble: 9 x 4
RowNumber Date Value GroupWithinDay
<int> <date> <dbl> <int>
1 16 2020-05-31 0 3
2 17 2020-05-31 0 3
3 18 2020-05-31 1 3
4 19 2020-05-31 1 3
5 20 2020-05-31 1 3
6 21 2020-06-01 0 1
7 22 2020-06-01 0 1
8 23 2020-06-01 2 2
9 24 2020-06-01 2 2