我不使用apply 或for-loop,而是使用dplyr 和tidyr 中的函数(因为您已经在使用这些包)。
library(dplyr)
library(tidyr)
首先我得到所有审阅者对的所有组合:
reviewer_combos <- tibble(reviewer1 = letters[1:6], reviewer2 = letters[1:6]) %>%
complete(reviewer1, reviewer2) %>%
filter(reviewer1 != reviewer2)
reviewer_combos
# A tibble: 30 x 2
# reviewer1 reviewer2
# <chr> <chr>
# 1 a b
# 2 a c
# 3 a d
# 4 a e
# 5 a f
# 6 b a
# 7 b c
# 8 b d
# 9 b e
# 10 b f
# ... with 20 more rows
然后用两个left_joins将reviwer1和reviwer2的答案加入到数据中,判断是否一致:
reviewer_combos <- reviewer_combos %>%
left_join(dta, by = c("reviewer1" = "reviewer")) %>%
left_join(dta, by = c("reviewer2" = "reviewer"), suffix = c("", "2")) %>%
mutate(agree = as.integer(outcome == outcome2))
reviewer_combos
# A tibble: 66 x 7
# reviewer1 reviewer2 ptid outcome ptid2 outcome2 agree
# <chr> <chr> <dbl> <chr> <dbl> <chr> <int>
# 1 a b 4.00 No 3.00 Yes 0
# 2 a b 4.00 No 5.00 Yes 0
# 3 a b 5.00 Yes 3.00 Yes 1
# 4 a b 5.00 Yes 5.00 Yes 1
# 5 a c 4.00 No 4.00 No 1
# 6 a c 5.00 Yes 4.00 No 0
# 7 a d 4.00 No 3.00 Yes 0
# 8 a d 5.00 Yes 3.00 Yes 1
# 9 a e 4.00 No 4.00 No 1
# 10 a e 5.00 Yes 4.00 No 0
# ... with 56 more rows
最后用group_by 和summarize 确定每个reviwer 组中的一致性百分比,并以spread 的格式显示它们:
reviewer_percentage <- reviewer_combos %>%
group_by(reviewer1, reviewer2) %>%
summarize(percentage_agree = sum(agree)/n()) %>%
spread(reviewer2, percentage_agree)
reviewer_percentage
# A tibble: 6 x 7
# Groups: reviewer1 [6]
# reviewer1 a b c d e f
# * <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 a NA 0.500 0.500 0.500 0.500 0.500
# 2 b 0.500 NA 0 1.00 0 0.500
# 3 c 0.500 0 NA 0 1.00 0.500
# 4 d 0.500 1.00 0 NA 0 0.500
# 5 e 0.500 0 1.00 0 NA 0.500
# 6 f 0.500 0.500 0.500 0.500 0.500 NA
如果您需要将其作为matrix 并且上三角部分为NA,您可以这样做:
reviewer_percentage_mat <- reviewer_percentage %>%
ungroup() %>%
select(-reviewer1) %>%
as.matrix()
rownames(reviewer_percentage_mat) <- reviewer_percentage$reviewer1
reviewer_percentage_mat[upper.tri(reviewer_percentage_mat)] <- NA
reviewer_percentage_mat
# a b c d e f
# a NA NA NA NA NA NA
# b 0.5 NA NA NA NA NA
# c 0.5 0.0 NA NA NA NA
# d 0.5 1.0 0.0 NA NA NA
# e 0.5 0.0 1.0 0.0 NA NA
# f 0.5 0.5 0.5 0.5 0.5 NA
数据
dta <- structure(list(ptid = c(3, 3, 3, 4, 4, 4, 5, 5, 5),
reviewer = c("d", "b", "f", "a", "e", "c", "a", "f", "b"),
outcome = c("Yes", "Yes", "No", "No", "No", "No", "Yes", "Yes", "Yes")),
.Names = c("ptid", "reviewer", "outcome"),
row.names = c(NA, -9L),
class = "data.frame")