【问题标题】:Multiple groups tests via permutation通过排列进行多组测试
【发布时间】:2018-01-04 21:26:48
【问题描述】:

我有一个 df,其中包含与两个实验相关的两组值(value_1 和 value_2)。

一个实验包含两组(0 和 1),另一个实验包含三个组(0,1,2)。

test    group   Value_1    Value_2
AA      0           15.1    11.2
AA      0           12.4    8.6
AA      1           9.6     22.5
AA      1           10.2    22
BB      0           12.11   11
BB      0           14      1.2
BB      1           11      13.2
BB      1           12.3    9
BB      2           14.2    12
BB      2           15      13

df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L, 
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4, 
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2, 
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test", 
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, 
-10L))

我想对 value_1 和 value_2 应用置换测试 - 按测试、按组 - 覆盖:

  • 两组案例(0 vs 1)和
  • 超过 2 个案例(0 对 1、0 对 2、1 对 2)。

到目前为止我所做的 - 分解成碎片是:

  1. 当组只有两个时,我只需应用 oneway.test():

    df %>%
      filter(test %in% 'AA') -> df_test_aa
    
    df_test_aa_value_1 <- oneway.test(df_test_aa$Value_1~df_test_aa$group)
    df_test_aa_value_1$p.value
    [1] 0.2011234
    
    
    df_test_aa_value_2 <- oneway.test(df_test_aa$Value_2~df_test_aa$group)
    df_test_aa_value_2$p.value
    [1] 0.05854026
    
  2. 当组数超过 2 时,我会测试所有可能的排列:

    • 前 0 对 1:

      df %>% filter(test %in% 'BB' & group %in% c(0,1)) -> df_test_bb_01
      
      df_test_bb_01_value_1 <- oneway.test(df_test_bb_01$Value_1~df_test_bb_01$group)
      df_test_bb_01_value_1$p.value
      [1] 0.3585415
      
      df_test_bb_01_value_2 <-    oneway.test(df_test_bb_01$Value_2~df_test_bb_01$group)
      df_test_bb_01_value_2$p.value
      [1] 0.4848446
      
    • 然后 0 对 2:

       df %>%
       filter(test %in% 'BB' & group %in% c(0,2)) -> df_test_bb_02
      
       df_test_bb_02_value_1 <-       oneway.test(df_test_bb_02$Value_1~df_test_bb_02$group)
       df_test_bb_02_value_1$p.value
       [1] 0.3246012
      
       df_test_bb_02_value_2 <- oneway.test(df_test_bb_02$Value_2~df_test_bb_02$group)
       df_test_bb_02_value_2$p.value
       [1] 0.4142838
      
    • 然后 1 对 2:

       df %>%
        filter(test %in% 'BB' & group %in% c(1,2)) -> df_test_bb_12
      
        df_test_bb_12_value_1 <- oneway.test(df_test_bb_12$Value_1~df_test_bb_12$group)
        df_test_bb_12_value_1$p.value
        [1] 0.08105404
      
      
        df_test_bb_12_value_2 <- oneway.test(df_test_bb_12$Value_2~df_test_bb_12$group)
        df_test_bb_12_value_2$p.value
        [1] 0.6245713
      

因此,我希望获得一个看起来像这样的 df:

test value  p_value_2sided  hypothesis
AA  Value_1   0.201         0,1
AA  Value_2   0.059         0,1
BB  Value_1   0.359         0,1
BB  Value_1   0.325         0,2
BB  Value_1   0.081         1,2
BB  Value_2   0.485         0,1
BB  Value_2   0.414         0,2
BB  Value_2   0.625         1,2

感谢您的提示!

【问题讨论】:

    标签: r dplyr ab-testing


    【解决方案1】:
    df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L, 
    0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4, 
    9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2, 
    8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test", 
    "group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, -10L))
    
    library(tidyverse)
    
    # reshape dataset
    df2 = df %>% gather(value, v, -test, -group)
    
    # function to compute p value
    # vectorized version
    f = function(t,val,x1,x2) {
      (df2 %>% 
         filter(test == t & value == val & group %in% c(x1,x2)) %>% 
         oneway.test(v~group, data = .))$p.value }
    f = Vectorize(f)
    
    df2 %>% 
      distinct(test, group, value) %>%       # get unique combinations
      group_by(test, value) %>%              # for each test and value
      nest() %>%                             # nest rest of columns
      mutate(d = map(data, ~data.frame(t(combn(.$group, 2)))),
             hypothesis = map(d, ~paste0(.$X1, ",", .$X2))) %>%  # get pairs/combinations of values
      unnest(d, hypothesis) %>%              # unnest data
      mutate(pval = f(test, value, X1, X2))  # apply vectorised function to get p value
    
    # # A tibble: 8 x 6
    #   test   value   hypothesis    X1    X2   pval
    #   <fctr> <chr>   <chr>      <int> <int>  <dbl>
    # 1 AA     Value_1 0,1            0     1 0.201 
    # 2 BB     Value_1 0,1            0     1 0.359 
    # 3 BB     Value_1 0,2            0     2 0.325 
    # 4 BB     Value_1 1,2            1     2 0.0811
    # 5 AA     Value_2 0,1            0     1 0.0585
    # 6 BB     Value_2 0,1            0     1 0.485 
    # 7 BB     Value_2 0,2            0     2 0.414 
    # 8 BB     Value_2 1,2            1     2 0.625
    

    如果你真的不需要X1X2,你可以删除它们。 但是,通过这种方式,您(也)可以将它们作为单独的数字变量,以防您希望在分析的稍后阶段在另一个过程中使用它们(例如,过滤特定组)。

    【讨论】:

      【解决方案2】:

      嗯,这不漂亮,但是……

      df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
      2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L, 
      0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4, 
      9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2, 
      8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test", 
      "group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, 
      -10L))
      
      as.data.frame(
        do.call(rbind, by(df, factor(df$test), function(x) {
        h <- t(combn(unique(x$group), 2))
        p <- apply(h, 1, function(y) {
          with(x[x$group %in% y, ], {
            c(oneway.test(Value_1 ~ group)$p.value,
              oneway.test(Value_2 ~ group)$p.value)
            })
          })
      
        h <- rep(paste(h[, 1], h[, 2], sep = ","), each = 2)
      
        cbind(test = as.character(x$test[1]), value = c("Value_1", "Value_2"), p_value = as.vector(p), hypothesis = h)
        }))
      )
      
      
        test   value            p_value hypothesis
      1   AA Value_1  0.201123366107666        0,1
      2   AA Value_2 0.0585402590546805        0,1
      3   BB Value_1  0.358541470571387        0,1
      4   BB Value_2  0.484844587956832        0,1
      5   BB Value_1  0.324601180998953        0,2
      6   BB Value_2  0.414283756097153        0,2
      7   BB Value_1 0.0810540380817137        1,2
      8   BB Value_2  0.624571310834221        1,2
      

      【讨论】:

      • 它有效,我理解代码。我的问题是:如果我添加一个额外的变量(比如说 Value_3),代码就不再起作用了,因为它没有添加正确的组合......让我编辑我的问题
      • 你确定吗?它对我来说很好用,只要确保也将 h &lt;- rep(paste(h[, 1], h[, 2], sep = ","), each = 2) 设置为 each = 3
      猜你喜欢
      • 1970-01-01
      • 2021-07-24
      • 1970-01-01
      • 1970-01-01
      • 2017-09-21
      • 2012-06-10
      • 2019-05-30
      • 1970-01-01
      • 2014-07-18
      相关资源
      最近更新 更多