一种方法是获取长格式数据,仅保留"YES" 行,为每个Name 创建一个序列列,并获取宽格式数据,用"NO" 填充空值。
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = student:Married) %>%
filter(value == 'YES') %>%
group_by(Name) %>%
mutate(row = row_number()) %>%
pivot_wider(values_fill = "NO") %>%
select(-row)
# Name Positionnumber student Married
# <chr> <int> <chr> <chr>
#1 Bob 1 YES NO
#2 Susan 2 YES NO
#3 Susan 2 NO YES
#4 Becky 4 NO YES
#5 Billy 5 YES NO
#6 Billy 5 NO YES
然而,这会删除Name 具有两个"NO"s 的值,这是@Walker Harrison 建议的一个细微变化。
df %>%
pivot_longer(cols = student:Married) %>%
arrange(Name, name) %>%
group_by(Name) %>%
filter(value == 'YES' | (name == "student" &
value == 'NO' & lag(value) == 'NO')) %>%
mutate(row = row_number()) %>%
pivot_wider(values_fill = "NO") %>%
select(-row)
# Name Positionnumber Married student
# <chr> <int> <chr> <chr>
#1 Becky 4 YES NO
#2 Billy 5 YES NO
#3 Billy 5 NO YES
#4 Bob 1 NO YES
#5 Mark 3 NO NO
#6 Susan 2 YES NO
#7 Susan 2 NO YES
数据
df <- structure(list(Name = c("Bob", "Susan", "Mark", "Becky", "Billy"
), Positionnumber = 1:5, student = c("YES", "YES", "NO", "NO",
"YES"), Married = c("NO", "YES", "NO", "YES", "YES")), class =
"data.frame", row.names = c(NA, -5L))