可能不是最优雅的解决方案,但它似乎可以完成工作。
library(tidyverse)
d <- read_csv(
"ID, Timestamp, Enable, Status, Deviation, Threshold
a, 6/10/2015 10:10, 0, 0, 0.5, 0.65
a, 6/10/2015 10:15, 0, 0, 0.6, 0.65
a, 6/10/2015 10:20, 0, 0, 0.75, 0.65
a, 6/10/2015 10:25, 1, 0, 0.8, 0.65
a, 6/10/2015 10:30, 1, 0, 0.9, 0.65
a, 6/10/2015 10:35, 1, 0, 0.8, 0.65
a, 6/10/2015 10:40, 1, 1, 0.7, 0.65
a, 6/10/2015 10:45, 1, 1, 0.5, 0.65
a, 6/10/2015 10:50, 0, 0, 0.6, 0.65
a, 6/10/2015 10:55, 0, 0, 0.7, 0.65
a, 6/10/2015 11:00, 1, 0, 0.8, 0.65
a, 6/10/2015 11:05, 1, 0, 0.9, 0.65
a, 6/10/2015 11:10, 1, 1, 1, 0.65
a, 6/10/2015 11:15, 1, 1, 0.8, 0.65
a, 6/10/2015 11:20, 1, 1, 0.7, 0.65
b, 7/10/2015 11:20, 0, 0, 0.4, 0.5
b, 7/11/2015 11:25, 0, 0, 0.6, 0.5
b, 7/12/2015 11:30, 1, 0, 0.7, 0.5
b, 7/13/2015 11:35, 1, 1, 0.8, 0.5")
d %>%
mutate(
start = ifelse(Enable == 0 & Deviation > Threshold & Status == 0,
1,
0),
end_x = ifelse(Enable == 1 & Deviation > Threshold,
1,
0),
end_z = ifelse(Enable == 1 & Deviation > Threshold & Status == 1,
1,
0)) %>%
gather(var, val, start:end_z) %>% # gather them into a single variable
filter(val == 1) %>% # remove dummy coding
select(ID, Timestamp, var) %>% # remove unnecessary variables
group_by(ID, var) %>%
mutate(count = 1:n()) %>% # create count variable so rows are uniquely identified
spread(var, Timestamp) %>% # spread it back out
select(ID, start, end_x, end_z) %>%
na.omit()
ID start end_x end_z
<chr> <chr> <chr> <chr>
1 a 6/10/2015 10:20 6/10/2015 10:25 6/10/2015 10:40
2 a 6/10/2015 10:55 6/10/2015 10:30 6/10/2015 11:10
3 b 7/11/2015 11:25 7/12/2015 11:30 7/13/2015 11:35