【发布时间】:2018-08-08 11:53:07
【问题描述】:
使用钻石数据集,我正在尝试运行回归模型,在每个切工级别内将颜色“D”与其他每种颜色进行比较(未在回归模型中指定交互作用)。
为此,我尝试创建按剪切和颜色分组的嵌套数据框,但在每个组中,我想要适当剪切的参考颜色“D”。
下面的代码没有做我想要的,因为每组颜色都不包含颜色“D”:
library(tidyverse)
> diamonds %>%
+ group_by(cut, color) %>%
+ nest() %>% arrange(cut, color)
# A tibble: 35 x 3
cut color data
<ord> <ord> <list>
1 Fair D <tibble [163 x 8]>
2 Fair E <tibble [224 x 8]>
3 Fair F <tibble [312 x 8]>
4 Fair G <tibble [314 x 8]>
5 Fair H <tibble [303 x 8]>
6 Fair I <tibble [175 x 8]>
7 Fair J <tibble [119 x 8]>
8 Good D <tibble [662 x 8]>
9 Good E <tibble [933 x 8]>
10 Good F <tibble [909 x 8]>
# ... with 25 more rows
下面的代码可以完成这项工作,但我正在寻找一个 tidyverse 版本:
data_fair_de = diamonds %>% filter(cut=="Fair" & color %in% c("D", "E")) %>% mutate( grouping_var = "data_fair_de" )
data_fair_df = diamonds %>% filter(cut=="Fair" & color %in% c("D", "F")) %>% mutate( grouping_var = "data_fair_df" )
data_fair_dg = diamonds %>% filter(cut=="Fair" & color %in% c("D", "G")) %>% mutate( grouping_var = "data_fair_dg" )
data_fair_dh = diamonds %>% filter(cut=="Fair" & color %in% c("D", "H")) %>% mutate( grouping_var = "data_fair_dh" )
data_fair_di = diamonds %>% filter(cut=="Fair" & color %in% c("D", "I")) %>% mutate( grouping_var = "data_fair_di" )
data_fair_dj = diamonds %>% filter(cut=="Fair" & color %in% c("D", "J")) %>% mutate( grouping_var = "data_fair_dj" )
data_good_de = diamonds %>% filter(cut=="Good" & color %in% c("D", "E")) %>% mutate( grouping_var = "data_good_de " )
data_good_df = diamonds %>% filter(cut=="Good" & color %in% c("D", "F")) %>% mutate( grouping_var = "data_good_df " )
data_good_dg = diamonds %>% filter(cut=="Good" & color %in% c("D", "G")) %>% mutate( grouping_var = "data_good_dg " )
data_good_dh = diamonds %>% filter(cut=="Good" & color %in% c("D", "H")) %>% mutate( grouping_var = "data_good_dh " )
data_good_di = diamonds %>% filter(cut=="Good" & color %in% c("D", "I")) %>% mutate( grouping_var = "data_good_di " )
data_good_dj = diamonds %>% filter(cut=="Good" & color %in% c("D", "J")) %>% mutate( grouping_var = "data_good_dj " )
data_very_de = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "E")) %>% mutate( grouping_var = "data_very_de " )
data_very_df = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "F")) %>% mutate( grouping_var = "data_very_df " )
data_very_dg = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "G")) %>% mutate( grouping_var = "data_very_dg " )
data_very_dh = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "H")) %>% mutate( grouping_var = "data_very_dh " )
data_very_di = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "I")) %>% mutate( grouping_var = "data_very_di " )
data_very_dj = diamonds %>% filter(cut=="Very Good" & color %in% c("D", "J")) %>% mutate( grouping_var = "data_very_dj " )
data_premium_de = diamonds %>% filter(cut=="Premium" & color %in% c("D", "E")) %>% mutate( grouping_var = "data_premium_de " )
data_premium_df = diamonds %>% filter(cut=="Premium" & color %in% c("D", "F")) %>% mutate( grouping_var = "data_premium_df " )
data_premium_dg = diamonds %>% filter(cut=="Premium" & color %in% c("D", "G")) %>% mutate( grouping_var = "data_premium_dg " )
data_premium_dh = diamonds %>% filter(cut=="Premium" & color %in% c("D", "H")) %>% mutate( grouping_var = "data_premium_dh " )
data_premium_di = diamonds %>% filter(cut=="Premium" & color %in% c("D", "I")) %>% mutate( grouping_var = "data_premium_di " )
data_premium_dj = diamonds %>% filter(cut=="Premium" & color %in% c("D", "J")) %>% mutate( grouping_var = "data_premium_dj " )
data_ideal_de = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "E")) %>% mutate( grouping_var = "data_ideal_de " )
data_ideal_df = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "F")) %>% mutate( grouping_var = "data_ideal_df " )
data_ideal_dg = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "G")) %>% mutate( grouping_var = "data_ideal_dg " )
data_ideal_dh = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "H")) %>% mutate( grouping_var = "data_ideal_dh " )
data_ideal_di = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "I")) %>% mutate( grouping_var = "data_ideal_di " )
data_ideal_dj = diamonds %>% filter(cut=="Ideal" & color %in% c("D", "J")) %>% mutate( grouping_var = "data_ideal_dj " )
bind_rows(
data_fair_de , data_fair_df , data_fair_dg , data_fair_dh , data_fair_di , data_fair_dj ,
data_good_de , data_good_df , data_good_dg , data_good_dh , data_good_di , data_good_dj ,
data_very_de , data_very_df , data_very_dg , data_very_dh , data_very_di , data_very_dj ,
data_premium_de, data_premium_df, data_premium_dg, data_premium_dh, data_premium_di, data_premium_dj,
data_ideal_de , data_ideal_df , data_ideal_dg , data_ideal_dh , data_ideal_di , data_ideal_dj ) %>%
group_by(grouping_var) %>%
nest()
【问题讨论】:
-
您想在数据集中创建一个新变量吗?您可能需要
expand与“D”和所有其他“颜色”元素和“剪切”变量吗? -
是的,我认为最好使用一个新变量,通过它我可以对数据集进行分组。我会调查
expand,谢谢。