处理NA 的一个例子是临时用随机生成的数字填充它们:
# data
df <- data.frame(A = c(5,9,33,6,8),
B = c(23,33,7,18,44),
C = c(6,7,14,23,33),
D = c(NA, 8, NA, 48, 7),
E = c(NA, 12, NA, NA, 9))
# fill NA with random numbers
set.seed(1)
df2 <- as.data.frame(do.call(cbind, lapply(df, function(x) ifelse(is.na(x), rnorm(1), x))))
> df2
A B C D E
1 5 23 6 -0.6264538 0.1836433
2 9 33 7 8.0000000 12.0000000
3 33 7 14 -0.6264538 0.1836433
4 6 18 23 48.0000000 0.1836433
5 8 44 33 7.0000000 9.0000000
# split data by rows
df2 <- split(df2, seq_along(df2))
# compare rows with each other
temp <- lapply(lapply(df2, function(x) lapply(df2, function(y) x %in% y)), function(x) do.call(rbind, x))
# delete self comparisons
output <- lapply(1:5, function(x) temp[[x]] <- temp[[x]][-x,])
结果:
[[1]]
[,1] [,2] [,3] [,4] [,5]
2 FALSE FALSE FALSE FALSE FALSE
3 FALSE FALSE FALSE TRUE TRUE
4 FALSE TRUE TRUE FALSE TRUE
5 FALSE FALSE FALSE FALSE FALSE
[[2]]
[,1] [,2] [,3] [,4] [,5]
1 FALSE FALSE FALSE FALSE FALSE
3 FALSE TRUE TRUE FALSE FALSE
4 FALSE FALSE FALSE FALSE FALSE
5 TRUE TRUE TRUE TRUE FALSE
[[3]]
[,1] [,2] [,3] [,4] [,5]
1 FALSE FALSE FALSE TRUE TRUE
2 TRUE TRUE FALSE FALSE FALSE
4 FALSE FALSE FALSE FALSE TRUE
5 TRUE TRUE FALSE FALSE FALSE
[[4]]
[,1] [,2] [,3] [,4] [,5]
1 TRUE FALSE TRUE FALSE TRUE
2 FALSE FALSE FALSE FALSE FALSE
3 FALSE FALSE FALSE FALSE TRUE
5 FALSE FALSE FALSE FALSE FALSE
[[5]]
[,1] [,2] [,3] [,4] [,5]
1 FALSE FALSE FALSE FALSE FALSE
2 TRUE FALSE TRUE TRUE TRUE
3 FALSE FALSE TRUE TRUE FALSE
4 FALSE FALSE FALSE FALSE FALSE