使用 ifelse:
library(tidyverse)
# Example data
df <- data.frame(
stringsAsFactors = FALSE,
ROW = c(2L, 7L, 8L, 9L, 10L),
ID = c("REC1000023","REC1000032",
"REC1000066","REC1000078","REC1000099"),
SEX = c("F", "M", "M", "M", "M"),
RACE = c("1.Black","6.White","4.Asian",
"6.White","5.Multiracial")
)
# Create new columns
df2 <- df %>%
mutate(Black = ifelse(RACE == "1.Black", 1, 0),
White = ifelse(RACE == "6.White", 1, 0),
Other = ifelse(RACE != "1.Black" & RACE != "6.White", 1, 0))
df2
# ROW ID SEX RACE Black White Other
#1 2 REC1000023 F 1.Black 1 0 0
#2 7 REC1000032 M 6.White 0 1 0
#3 8 REC1000066 M 4.Asian 0 0 1
#4 9 REC1000078 M 6.White 0 1 0
#5 10 REC1000099 M 5.Multiracial 0 0 1
--
不确定速度是否是您的应用程序的一个因素,但这里是使用示例数据集的基准:
ronak_func <- function(df){
df %>%
mutate(col = sub('\\d+\\.', '', RACE),
col = replace(col, !col %in% c('Black', 'White'), 'Other')) %>%
pivot_wider(names_from = col, values_from = col,
values_fn = length, values_fill = 0)
}
jared_func <- function(df){
df %>%
mutate(Black = ifelse(RACE == "1.Black", 1, 0),
White = ifelse(RACE == "6.White", 1, 0),
Other = ifelse(RACE != "1.Black" & RACE != "6.White", 1, 0))
}
karthik_func <- function(df){
df %>% mutate(Black = +str_detect(RACE,'Black'),
White = +str_detect(RACE,'White'),
Other = +(!str_detect(RACE,'Black|White')))
}
jpdugo17_func <- function(df){
map_dfc(list('1.Black', '6.White'), ~ transmute(df, '{str_sub(.x, 3, -1)}' := if_else(RACE == .x, 1, 0))) %>%
mutate(other = if_else(Black + White == 1, 0, 1)) %>% cbind(df, .)
}
GKi1_func <- function(df) {
df$Black <- +(df$RACE == "1.Black")
df$White <- +(df$RACE == "6.White")
df$Other <- 1 - (df$Black | df$White)
df
}
GKi2_func <- function(df) {
df$Black <- +grepl("Black", df$RACE, fixed = TRUE)
df$White <- +grepl("White", df$RACE, fixed = TRUE)
df$Other <- 1 - (df$Black | df$White)
df
}
jared_func_dt <- function(df){
setDT(df)
df[, Black := +(df$RACE == "1.Black")][, White := +(df$RACE == "6.White")][, Other := 1 - (df$Black | df$White)]
}
res <- microbenchmark::microbenchmark(ronak_func(df),
jared_func(df),
karthik_func(df),
jpdugo17_func(df),
GKi1_func(df),
GKi2_func(df),
jared_func_dt(df))
autoplot(res)
以及使用具有 10k 行的示例数据集的基准测试:
df2 <- data.frame(stringsAsFactors = FALSE,
ROW = 1:10000,
ID = rep(c("REC1000023","REC1000032",
"REC1000066","REC1000078",
"REC1000099"), times = 2000),
SEX = sample(c("F", "M"),
replace = TRUE,
size = 10000),
RACE = sample(c("1.Black","6.White","4.Asian",
"6.White","5.Multiracial"),
replace = TRUE,
size = 10000))
res <- microbenchmark::microbenchmark(ronak_func(df2),
jared_func(df2),
karthik_func(df2),
jpdugo17_func(df2),
GKi1_func(df2),
GKi2_func(df2),
jared_func_dt(df2))
autoplot(res)