【发布时间】:2017-12-16 21:43:52
【问题描述】:
我正在使用 lmer 模型(来自 lmerTest)来了解大小是否与基因表达显着相关,如果是,则哪些特定基因与大小相关(还考虑了“女性”和“笼子”作为随机效应) :
lmer(Expression ~ size*genes + (1|female) + (1|cage), data = df)
在摘要输出中,我的一个基因被用作截距(因为它在字母表中最高,'ctsk')。在阅读之后,建议我选择最高(或最低)表达的基因作为我的截距来比较其他一切。在这种情况下,基因“星”是最高表达的。在重新调整我的数据并以“星”作为截距重新运行模型后,所有其他斜率现在在 summary() 输出中都很重要,尽管 anova() 输出是相同的。
我的问题是:
- 是否可以不使用我的一个基因作为截距?如果不可能,那我怎么知道我应该选择哪个基因作为截距?
- 我可以测试斜率是否不为零吗?也许这就是我在模型中不指定截距的地方(即“0+size*genes”)?
- 是否可以将截距作为所有斜率的平均值?
然后我将使用 lsmeans 来确定斜率是否彼此显着不同。
这是一些可重现的代码:
df <- structure(list(size = c(13.458, 13.916, 13.356, 13.84, 14.15,
16.4, 15.528, 13.916, 13.458, 13.285, 15.415, 14.181, 13.367,
13.356, 13.947, 14.615, 15.804, 15.528, 16.811, 14.677, 13.2,
17.57, 13.947, 14.15, 16.833, 13.2, 17.254, 16.4, 14.181, 13.367,
14.294, 13.84, 16.833, 17.083, 15.847, 13.399, 14.15, 15.47,
13.356, 14.615, 15.415, 15.596, 15.847, 16.833, 13.285, 15.47,
15.596, 14.181, 13.356, 14.294, 15.415, 15.363, 15.4, 12.851,
17.254, 13.285, 17.57, 14.7, 17.57, 13.947, 16.811, 15.4, 13.399,
14.22, 13.285, 14.344, 17.083, 15.363, 14.677, 15.945), female = structure(c(7L,
12L, 7L, 11L, 12L, 9L, 6L, 12L, 7L, 7L, 6L, 12L, 8L, 7L, 7L,
11L, 9L, 6L, 10L, 11L, 8L, 10L, 7L, 12L, 10L, 8L, 10L, 9L, 12L,
8L, 12L, 11L, 10L, 10L, 9L, 8L, 12L, 6L, 7L, 11L, 6L, 9L, 9L,
10L, 7L, 6L, 9L, 12L, 7L, 12L, 6L, 6L, 6L, 8L, 10L, 7L, 10L,
11L, 10L, 7L, 10L, 6L, 8L, 11L, 7L, 6L, 10L, 6L, 11L, 9L), .Label = c("2",
"3", "6", "10", "11", "16", "18", "24", "25", "28", "30", "31",
"116", "119", "128", "135", "150", "180", "182", "184", "191",
"194", "308", "311", "313", "315", "320", "321", "322", "324",
"325", "329", "339", "342"), class = "factor"), Expression = c(1.10620339407889,
1.06152707257767, 2.03000185674761, 1.92971750056866, 1.30833983462599,
1.02760836165184, 0.960969703469363, 1.54706275342441, 0.314774666283256,
2.63330873720495, 0.895123048920455, 0.917716470037954, 1.3178821021651,
1.57879156856332, 0.633429011784367, 1.12641940390116, 1.0117475796626,
0.687813581350802, 0.923485880847423, 2.98926377892241, 0.547685277701021,
0.967691178046748, 2.04562285257417, 1.09072264997544, 1.57682235413366,
0.967061529758701, 0.941995966023426, 0.299517719292817, 1.8654758451133,
0.651369936708288, 1, 1.04407979584122, 0.799275069735012, 1.007255409328,
0.428129727802404, 0.93927930755046, 0.987394257033815, 0.965050972503591,
2.06719308587322, 1.63846508102874, 0.997380526962644, 0.60270197593643,
2.78682867333149, 0.552922632281237, 3.06702198884562, 0.890708510580522,
1.15168812515828, 0.929205084743164, 2.27254101826041, 1, 0.958147442333527,
1.05924173014089, 0.984356852670054, 0.623630720815415, 0.796864961771971,
2.4679841984147, 1.07248904053777, 1.79630829771291, 0.929642913565982,
0.296954006040077, 2.25741254504115, 1.17188536743493, 0.849778293699644,
2.32679163466857, 0.598119006609413, 0.975660099975423, 1.01494421228949,
1.14007557533352, 2.03638316428189, 0.777347547080068), cage = structure(c(64L,
49L, 56L, 66L, 68L, 48L, 53L, 49L, 64L, 56L, 55L, 68L, 80L, 56L,
64L, 75L, 69L, 53L, 59L, 66L, 63L, 59L, 64L, 68L, 59L, 63L, 50L,
48L, 68L, 80L, 49L, 66L, 59L, 50L, 48L, 63L, 68L, 62L, 56L, 75L,
55L, 81L, 48L, 59L, 56L, 62L, 81L, 68L, 56L, 49L, 55L, 62L, 55L,
63L, 50L, 56L, 59L, 75L, 59L, 64L, 59L, 55L, 63L, 66L, 56L, 53L,
50L, 62L, 66L, 81L), .Label = c("023", "024", "041", "042", "043",
"044", "044 bis", "045", "046", "047", "049", "051", "053", "058",
"060", "061", "068", "070", "071", "111", "112", "113", "123",
"126", "128", "14", "15", "23 bis", "24", "39", "41", "42", "44",
"46 bis", "47", "49", "51", "53", "58", "60", "61", "67", "68",
"70", "75", "76", "9", "D520", "D521", "D522", "D526", "D526bis",
"D533", "D535", "D539", "D544", "D545", "D545bis", "D546", "D561",
"D561bis", "D564", "D570", "D581", "D584", "D586", "L611", "L616",
"L633", "L634", "L635", "L635bis", "L637", "L659", "L673", "L676",
"L686", "L717", "L718", "L720", "L725", "L727", "L727bis"), class = "factor"),
genes = c("igf1", "gr", "ctsk", "ets2", "ctsk", "mtor", "igf1",
"sgk1", "sgk1", "ghr1", "ghr1", "gr", "ctsk", "ets2", "timp2",
"timp2", "ets2", "rictor", "sparc", "mmp9", "gr", "sparc",
"mmp2", "ghr1", "mmp9", "sparc", "mmp2", "timp2", "star",
"sgk1", "mmp2", "gr", "mmp2", "rictor", "timp2", "mmp2",
"mmp2", "mmp2", "mmp2", "rictor", "mtor", "ghr1", "star",
"igf1", "mmp9", "igf1", "igf2", "rictor", "rictor", "mmp9",
"ets2", "ctsk", "mtor", "ghr1", "mtor", "ets2", "ets2", "igf2",
"igf1", "sgk1", "sgk1", "ghr1", "sgk1", "igf2", "star", "mtor",
"igf2", "ghr1", "mmp2", "rictor")), .Names = c("size", "female",
"Expression", "cage", "genes"), row.names = c(1684L, 2674L, 10350L,
11338L, 10379L, 4586L, 1679L, 3637L, 3610L, 5537L, 5530L, 2676L,
10355L, 11313L, 8422L, 8450L, 11322L, 6494L, 9406L, 13262L, 2653L,
9407L, 12274L, 5564L, 13256L, 9394L, 12294L, 8438L, 750L, 3614L,
12303L, 2671L, 12293L, 6513L, 8437L, 12284L, 12305L, 12267L,
12276L, 6524L, 4567L, 5545L, 733L, 1700L, 13241L, 1674L, 7471L,
6528L, 6498L, 13266L, 11308L, 10347L, 4566L, 5541L, 4590L, 11315L,
11333L, 7482L, 1703L, 3607L, 3628L, 5529L, 3617L, 7483L, 722L,
4565L, 7476L, 5532L, 12299L, 6510L), class = "data.frame")
genes <- as.factor(df$genes)
library(lmerTest)
fit1 <- lmer(Expression ~ size * genes +(1|female) + (1|cage), data = df)
anova(fit1)
summary(fit1) # uses the gene 'ctsk' for intercept, so re-level to see what happens if I re-order based on highest value (sgk1):
df$genes <- relevel(genes, "star")
# re-fit the model with 'star' as the intercept:
fit1 <- lmer(Expression ~ size * genes +(1|female) + (1|cage), data = df)
anova(fit1) # no difference here
summary(fit1) # lots of difference
我的样本数据很长,因为否则模型将无法运行 - 希望这没问题!
【问题讨论】:
标签: lme4 mixed-models intercept coefficients