【发布时间】:2022-01-17 14:58:17
【问题描述】:
这是我的数据集的一个子集:
> dput(df)
structure(list(ID = c(238L, 238L, 238L, 238L, 238L, 238L, 238L,
238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L,
238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L,
238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L,
238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L,
238L, 238L, 238L, 238L, 238L, 238L, 238L, 238L), X = c(54.6775637888,
54.9684018941, 54.9684018941, 55.2592399993, 55.2592399993, 55.8409162098,
55.8409162098, 56.1317543151, 55.5500781046, 55.5500781046, 55.2592399993,
59.6218115782, 56.1317543151, 56.4225924204, 56.4225924204, 56.4225924204,
59.6218115782, 56.7134305256, 56.4225924204, 59.6218115782, 59.6218115782,
56.7134305256, 59.6218115782, 57.5859448414, 57.8767829466, 59.6218115782,
59.3309734729, 59.6218115782, 59.6218115782, 59.6218115782, 59.6218115782,
59.6218115782, 59.6218115782, 60.2034877887, 59.6218115782, 59.6218115782,
59.6218115782, 59.6218115782, 59.6218115782, 59.6218115782, 59.6218115782,
59.9126496835, 59.6218115782, 59.6218115782, 59.6218115782, 59.6218115782,
59.6218115782, 59.6218115782, 59.6218115782, 59.6218115782, 59.9126496835,
59.6218115782, 59.6218115782, 59.6218115782, 59.6218115782, 59.6218115782,
59.6218115782, 59.6218115782, 59.6218115782), Y = c(177.411244208,
179.447110945, 180.319625261, 180.901301471, 181.773815787, 182.355491998,
182.937168208, 182.937168208, 183.809682524, 184.973034945, 184.391358735,
170.721967787, 185.26387305, 185.845549261, 185.845549261, 186.427225471,
170.721967787, 186.718063577, 186.718063577, 171.012805893, 171.012805893,
188.463092208, 171.012805893, 189.335606524, 189.626444629, 171.012805893,
190.78979705, 170.721967787, 191.662311366, 170.721967787, 192.825663787,
170.721967787, 170.721967787, 193.698178103, 170.721967787, 170.721967787,
170.721967787, 170.721967787, 170.721967787, 170.721967787, 170.721967787,
170.721967787, 170.721967787, 170.721967787, 170.721967787, 170.721967787,
170.721967787, 170.721967787, 170.721967787, 170.721967787, 170.721967787,
170.721967787, 170.721967787, 170.721967787, 170.721967787, 170.721967787,
170.721967787, 170.721967787, 170.721967787), T = c(553, 554,
555, 556, 557, 558, 559, 560, 561, 562, 562, 563, 563, 564, 565,
566, 567, 567, 568, 568, 569, 569, 570, 570, 571, 571, 572, 572,
573, 573, 574, 574, 575, 575, 576, 577, 578, 579, 580, 581, 582,
583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595,
596, 597, 598, 599, 600), compID = c("Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238",
"Day8-Series004-238", "Day8-Series004-238", "Day8-Series004-238"
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA,
-59L), groups = structure(list(compID = "Day8-Series004-238",
.rows = structure(list(1:59), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
我想做的是:
- 删除我成功完成的所有重复的 T(时间)。
- 当连续 T 之间的差值大于 1 时,创建一个新的 compID。-> 成功完成
- 这个 compID 应该填充连续的 T 直到它遇到由 #2 生成的 compID。-> 这就是我需要帮助的地方!
这是我取得的成就:
ID X Y T compID T2 compID2
1 238 54.67756 177.4112 553 Day8-Series004-238 NA Day8-Series004-238
2 238 54.96840 179.4471 554 Day8-Series004-238 553 Day8-Series004-238
3 238 54.96840 180.3196 555 Day8-Series004-238 554 Day8-Series004-238
4 238 55.25924 180.9013 556 Day8-Series004-238 555 Day8-Series004-238
5 238 55.25924 181.7738 557 Day8-Series004-238 556 Day8-Series004-238
6 238 55.84092 182.3555 558 Day8-Series004-238 557 Day8-Series004-238
7 238 55.84092 182.9372 559 Day8-Series004-238 558 Day8-Series004-238
8 238 56.13175 182.9372 560 Day8-Series004-238 559 Day8-Series004-238
9 238 55.55008 183.8097 561 Day8-Series004-238 560 Day8-Series004-238
10 238 56.42259 185.8455 564 Day8-Series004-238 561 Day8-Series004-238.10
11 238 56.42259 185.8455 565 Day8-Series004-238 564 Day8-Series004-238
12 238 56.42259 186.4272 566 Day8-Series004-238 565 Day8-Series004-238
13 238 59.62181 170.7220 576 Day8-Series004-238 566 Day8-Series004-238.13
14 238 59.62181 170.7220 577 Day8-Series004-238 576 Day8-Series004-238
15 238 59.62181 170.7220 578 Day8-Series004-238 577 Day8-Series004-238
16 238 59.62181 170.7220 579 Day8-Series004-238 578 Day8-Series004-238
17 238 59.62181 170.7220 580 Day8-Series004-238 579 Day8-Series004-238
使用此代码:
df2 <- df %>%
arrange (T) %>%
filter(! (duplicated (T) | duplicated (T, fromLast=TRUE))) %>%
mutate (T2=lag (T)) %>%
mutate (compID2 = ifelse ((T-T2)==1, compID, paste (compID, 1:n(), sep="."))) %>%
fill (compID2, .direction ="up")
结果应该是这样的:
ID X Y T compID T2 compID2
1 238 54.67756 177.4112 553 Day8-Series004-238 NA Day8-Series004-238
2 238 54.96840 179.4471 554 Day8-Series004-238 553 Day8-Series004-238
3 238 54.96840 180.3196 555 Day8-Series004-238 554 Day8-Series004-238
4 238 55.25924 180.9013 556 Day8-Series004-238 555 Day8-Series004-238
5 238 55.25924 181.7738 557 Day8-Series004-238 556 Day8-Series004-238
6 238 55.84092 182.3555 558 Day8-Series004-238 557 Day8-Series004-238
7 238 55.84092 182.9372 559 Day8-Series004-238 558 Day8-Series004-238
8 238 56.13175 182.9372 560 Day8-Series004-238 559 Day8-Series004-238
9 238 55.55008 183.8097 561 Day8-Series004-238 560 Day8-Series004-238
10 238 56.42259 185.8455 564 Day8-Series004-238 561 Day8-Series004-238.10
11 238 56.42259 185.8455 565 Day8-Series004-238 564 Day8-Series004-238.10
12 238 56.42259 186.4272 566 Day8-Series004-238 565 Day8-Series004-238.10
13 238 59.62181 170.7220 576 Day8-Series004-238 566 Day8-Series004-238.13
14 238 59.62181 170.7220 577 Day8-Series004-238 576 Day8-Series004-238.13
15 238 59.62181 170.7220 578 Day8-Series004-238 577 Day8-Series004-238.13
16 238 59.62181 170.7220 579 Day8-Series004-238 578 Day8-Series004-238.13
17 238 59.62181 170.7220 580 Day8-Series004-238 579 Day8-Series004-238.13
任何帮助将不胜感激!如果你也觉得1和2有更高效的方法,也请推荐!
谢谢!
【问题讨论】:
-
嗨@Kaye11,你能解决这个问题吗?
标签: r dplyr data-wrangling