【发布时间】:2020-03-20 04:10:42
【问题描述】:
显示我想要实现的目标的图表:
你好社区。我试图根据一个特定组(过滤和分组)的平均函数来改变一个新变量。当我尝试创建新变量时,我将集合取消分组以在所有组中操作。我尝试运行 此 R 代码。 但是,mutate 函数仅适用于过滤后的组,无法找到 un-filter 函数。我在 (df01) 下输入()我的数据框样本。非常感谢您的所有 cmets 和建议。问候。 M.
R 代码:
df01 %>% #summary table of the means to be used.
filter(GFPimg == "WT") %>%
group_by(Demineralization, Cond, Temp) %>%
summarise(Mean2 = mean(Mean)) %>%
arrange(desc(Demineralization))
print()
df01 %>%
filter(GFPimg == "WT") %>%
group_by(Demineralization, Cond, Temp) %>%
mutate(mean2 = mean(Mean)) %>%
arrange(desc(Demineralization, Cond)) %>%
ungroup() %>%
group_by(Demineralization, Cond) %>%
mutate(submean = Mean - mean2) %>%
print(n=200)
数据框示例df01:
df01 <- structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54),
GFPimg = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L), .Label = c("HT", "MT", "WT"), class = "factor"),
Cond = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("EC", "EI"), class = "factor"),
Temp = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("37c", "RT"), class = "factor"),
Side = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("L", "R"), class = "factor"),
Mean = c(62.435, 64.537, 102.447, 92.608, 103.277, 104.711, 67.017, 61.748, 68.921, 59.962, 63.368, 60.435, 69.54, 67.886, 51.71, 50.291, 50.881, 54.865, 80.538, 84.05, 92.223, 87.337, 90.444, 90.728, 29.951, 28.574, 30.896, 30.399, 29.773, 30.715, 31.498, 30.385, 99.004, 83.644, 95.962, 83.451, 22.649, 22.5, 53.066, 51.368, 55.459, 57.203, 54.444, 58.504, 76.518, 95.81, 23.43, 24.736, 28.86, 28.347, 28.386, 29.319, 58.017, 63.064, 80.293, 89.194, 70.52, 63.989, 71.436, 59.379, 75.986, 80.22, 71.583, 76.589, 77.138, 95.998, 77.193, 71.384, 75.614, 83.061, 73.062, 71.833, 71.83, 55.783, 77.376, 64, 96.14, 99.876, 40.972, 53.465, 36.25, 47.626, 40.619, 39.697, 34.34, 36.855, 77.131, 75.35, 67.014, 58.811, 39.237, 49.357, 74.333, 79.921, 62.631, 63.119, 60.207, 65.171, 77.563, 82.078, 39.115, 45.988, 42.65, 55.806, 33.534, 41.271, 62.359, 67.092),
Demineralization = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("After", "Before"), class = "factor")), class = "data.frame", row.names = c(NA, -108L))
【问题讨论】:
-
R 不像 Excel 中单击过滤器然后取消单击它。管道运算符 (%>%) 所做的是将一个修改后的数据帧从前一个操作传递到下一个操作。当您执行子集()时,您实际上是在生成一个新的数据框,该数据框仅保留满足条件的那些行,并删除所有其他行。如果必须保留以前的行,则必须将中间结果保存在其他变量中。此外,最好使用 ifelse() 仅选择那些与您的条件匹配的值来计算 mean(),而不是在这种情况下使用 subset()。
-
@user2332849,
subset()在哪里出现问题?我没看到... -
@user2332849。非常感谢您的评论。所以我应该使用 ifelse() 而不是 filter() 或者是 subset() 和 ifelse() 的组合...?
-
Sorry subset() 像函数 filter() 一样进行过滤。
-
如果使用subset() 或filter(),所有不符合条件的记录将被永久删除。如果想保留它们并仅对另一个特定子集执行计算,则可以使用 ifelse() 应用条件。例如:mean(ifelse(GFPimg == 'WT', Mean, NA), na.rm=T) 这将用 NA 替换 GFPimg 不是 'WT' 的所有值,然后在计算平均值之前将其删除。