【发布时间】:2021-06-10 00:43:40
【问题描述】:
我对 R 还很陌生,我一直在尝试使用以下代码制作 barplot:
表格
这张表是我要绘制的。
library(tidyverse)
library(parallel)
library(devtools)
library(scales)
peak_intersect_rep_elements <-
tribble(
~Sample, ~CellLine, ~Rep, ~Total_peaks, ~Alu_intersect, ~L1_intersect, ~Antibody,
"ADAR062", "HEK293xT", "rep1", 4407, 3329, 201, "p110/p150\nAb1",
"ADAR062", "HEK293xT", "rep1", 19103, 3481, 8737, "p150\nAb3",
"ADAR062", "HEK293xT", "rep1", 1782, 836, 109, "p110/p150\nAb4",
"ADAR112", "HEK293xT", "rep1", 2269, 1852, 61, "p110/p150\nAb1",
"ADAR112", "HEK293xT", "rep1", 28573, 5725, 17037, "p150\nAb3",
"ADAR112", "HEK293xT", "rep1", 5115, 4448, 213, "p110/p150\nAb4",
"ADAR112", "K562", "rep1", 1367, 770, 49, "p110/p150\nAb1",
"ADAR112", "K562", "rep1", 12195, 2889, 5323, "p150\nAb3",
"ADAR112", "K562", "rep1", 1178, 656, 58, "p110/p150\nAb4",
"ADAR004", "HEK293xT", "rep1", 4130, 3289, 136, "p110/p150\nAb1",
"ADAR004", "HEK293xT", "rep2", 3447, 2816, 135, "p110/p150\nAb1",
"ADAR004", "HEK293xT", "rep3", 4607, 3697, 176, "p110/p150\nAb1",
"ADAR004", "HEK293xT", "rep1", 9711, 8450, 373, "p110/p150\nAb4",
"ADAR004", "HEK293xT", "rep2", 7275, 6163, 294, "p110/p150\nAb4",
"ADAR004", "HEK293xT", "rep3", 6789, 5704, 256, "p110/p150\nAb4"
)
分组方式
在这里,我将这些值平均在一起并计算上表的标准偏差。
total_peak_intersect_rep_elements_mean <- peak_intersect_rep_elements %>%
group_by(Sample, CellLine, Antibody, Rep) %>%
summarise(Mean_total_peaks = mean(Total_peaks),
Mean_alu_intersect = mean(Alu_intersect),
Mean_l1_intersect = mean(L1_intersect),
SD_total_peaks = sd(Total_peaks),
SD_alu_intersect = sd(Alu_intersect),
SD_l1_intersect = sd(L1_intersect)) %>%
ungroup() %>%
replace_na(list(SD_total_peaks = 0, SD_alu_intersect = 0,
SD_l1_intersect = 0))
total_peak_intersect_rep_elements_mean
绘制条形图
在这里,我将 Rep 列分解为级别并为它们命名。
temp_plot <- total_peak_intersect_rep_elements_mean %>%
mutate(Rep = factor(Rep, levels = c("rep1", "rep2", "rep3"),
labels = c("s1 (n = 1)", "s2 (n = 1)",
"s3 (n = 1)")))
我正在尝试填写此处的 Rep 列。
temp_plot %>%
ggplot(aes(Antibody, Mean_total_peaks, fill=Rep)) +
geom_bar(stat = "identity", position = "dodge", size = 0.3, color = "black") +
geom_errorbar(aes(ymin=Mean_total_peaks - SD_total_peaks,
ymax=Mean_total_peaks + SD_total_peaks),
width=0.6, size = 0.8,
position=position_dodge(.9)) +
facet_wrap(~CellLine, scales = 'free_x') +
scale_fill_manual(values = c("s1 (n = 1)" = "#125863",
"s2 (n = 1)" = "#2BA8B3",
"s3 (n = 1)" = "#D5E9EB")) +
scale_y_continuous(trans = "log10", labels=comma_format(accuracy = 1)) +
labs(y = "ADAR1 IP peaks", x = '', fill = "Replicate") +
guides(color = F) +
theme_minimal() +
NULL
在这之后,我最终得到了一个看起来像这样的图表。
但是,如上所述,误差线没有正确对齐。谁能帮帮我?
【问题讨论】:
-
您检查过 temp_plot 数据框是否正常?
-
嗨,乔治,我的数据框看起来不错。
-
我一直运行代码,所有的 sd 都为零。也许看看为什么会这样......(编辑)你在“分组依据”中的变量似乎唯一地定义了父集中的观察结果,所以没有剩下的变化。
-
我猜你不想在 group_by 函数中使用 'sample'
-
谢谢,乔治!我的 groupby 中的“样本”似乎是罪魁祸首。它现在正在工作!