概述
我将错位的值存储在两个单独的向量中。然后在dplyr::mutate() 内部使用三个dplyr::if_else() 调用来根据需要清理变量。
# load necessary packages -----
library(tidyverse)
# load necessary data --------
cloud <-
read_csv("Rainfall, Treatment
274.7, Seeded
274.7, Seeded
Seeded, 255
242.5, Seeded
200.7, Seeded
198.6, Seeded
129.6, Seeded
119, Seeded
118.3, Seeded
115.3, Seeded
92.4, Seeded
40.6, Seeded
32.7, Seeded
31.4, Seded
17.5, Seeded")
# store the misplaced text value
misplaced.text <-
cloud %>% pull(Rainfall) %>% str_subset("^\\D.*$")
# store the misplaced numeric value
misplaced.numeric <-
cloud %>% pull(Treatment) %>% str_subset("^\\d.*$")
# update cloud so that misplaced values are swapped -----
# and clean Treatment for mispellings
cloud.clean <-
cloud %>%
mutate(Rainfall = if_else(Rainfall %in% misplaced.text &
Treatment %in% misplaced.numeric
, misplaced.numeric
, Rainfall) %>% as.double()
, Treatment = if_else(Treatment %in% misplaced.numeric
, misplaced.text
, Treatment)
, Treatment = if_else(Treatment %in% "Seded"
, "Seeded"
, Treatment))
# view results ----
# note: tibble is only rounding the printed output in console
cloud.clean$Rainfall[1] # [1] 274.7
cloud.clean
# A tibble: 15 x 2
# Rainfall Treatment
# <dbl> <chr>
# 1 275. Seeded
# 2 275. Seeded
# 3 255 Seeded
# 4 242. Seeded
# 5 201. Seeded
# 6 199. Seeded
# 7 130. Seeded
# 8 119 Seeded
# 9 118. Seeded
# 10 115. Seeded
# 11 92.4 Seeded
# 12 40.6 Seeded
# 13 32.7 Seeded
# 14 31.4 Seeded
# 15 17.5 Seeded
# end of script #