【问题标题】:Apply function between rows, grouped by a variable, computing all possible combinations between variable in other column在行之间应用函数,按变量分组,计算其他列中变量之间的所有可能组合
【发布时间】:2017-02-24 17:17:38
【问题描述】:

我在 R 中有一个大的 data.frame,其过度简化的版本看起来像这样(真正的 data.frame 在“颜色”列中有 20 种颜色,在“数字”列中有 10 个不同的数字:

Color   Number  Y
blue    1       5
blue    2       3
blue    3       2
red     1       5
red     2       8
red     3       2
green   1       2
green   2       9
green   3       3

对于“颜色”中的每种颜色,我想通过比较“Y”列的相应值,在“数字”列中的所有数字组合之间应用一个函数。让我们以一个简单的函数为例:

if x >= y, print 1, else print 0 # where x and y represent the first and second values to be compared, respectively 

我会将此作为输出 data.frame:

Color   Comparison  Y
blue    1_vs_2      1
blue    1_vs_3      1
blue    2_vs_1      0
blue    2_vs_3      1
blue    3_vs_1      0
blue    3_vs_2      0
red     1_vs_2      0
red     1_vs_3      1
red     2_vs_1      1
red     2_vs_3      1
red     3_vs_1      0
red     3_vs_2      0
green   1_vs_2      0
green   1_vs_3      0
green   2_vs_1      1
green   2_vs_3      1
green   3_vs_1      1
green   3_vs_2      0

【问题讨论】:

    标签: r dataframe


    【解决方案1】:

    您考虑过 SQL 吗?您可以将数据合并回自身。如果您将Color 限制为相同而Number 限制为不同,则您应该获得所需的每个成对比较。这与@Psidom 的回答概念相同——他只是在data.table 加入中做到这一点。

    library(sqldf)
    res <- sqldf("SELECT     l.Color, l.Number as l_number, r.Number as r_number,
                              case when l.Y >= r.Y then 1 else 0 end as Y
                  FROM       df as l
                  INNER JOIN df as r
                  ON         l.Color = r.Color AND
                             l.Number != r.Number
                 ")
    
    res$comparison <- paste0(res$l_number,"_vs_",res$r_number)
    
    res
    
       Color l_number r_number Y comparison
    1   blue        1        2 1     1_vs_2
    2   blue        1        3 1     1_vs_3
    3   blue        2        1 0     2_vs_1
    4   blue        2        3 1     2_vs_3
    5   blue        3        1 0     3_vs_1
    6   blue        3        2 0     3_vs_2
    7    red        1        2 0     1_vs_2
    8    red        1        3 1     1_vs_3
    9    red        2        1 1     2_vs_1
    10   red        2        3 1     2_vs_3
    11   red        3        1 0     3_vs_1
    12   red        3        2 0     3_vs_2
    13 green        1        2 0     1_vs_2
    14 green        1        3 0     1_vs_3
    15 green        2        1 1     2_vs_1
    16 green        2        3 1     2_vs_3
    17 green        3        1 1     3_vs_1
    18 green        3        2 0     3_vs_2
    

    【讨论】:

      【解决方案2】:

      你可以试试这个data.table方法:

      library(data.table)
      setDT(dt)
      (dt[, .(Comparison = do.call(paste, c(sep = "_vs_", CJ(Number, Number, sorted = FALSE))), 
              Y = as.numeric(do.call(`>=`, CJ(Y, Y, sorted = FALSE)))), 
          by = .(Color)]        
         [!grepl("(\\d+)_vs_\\1", Comparison)])   # filter rows where the numbers are the same
      
      #    Color Comparison Y
      # 1:  blue     1_vs_2 1
      # 2:  blue     1_vs_3 1
      # 3:  blue     2_vs_1 0
      # 4:  blue     2_vs_3 1
      # 5:  blue     3_vs_1 0
      # 6:  blue     3_vs_2 0
      # 7:   red     1_vs_2 0
      # 8:   red     1_vs_3 1
      # 9:   red     2_vs_1 1
      #10:   red     2_vs_3 1
      #11:   red     3_vs_1 0
      #12:   red     3_vs_2 0
      #13: green     1_vs_2 0
      #14: green     1_vs_3 0
      #15: green     2_vs_1 1
      #16: green     2_vs_3 1
      #17: green     3_vs_1 1
      #18: green     3_vs_2 0
      

      【讨论】:

        【解决方案3】:

        使用dplyr

        df <- data.frame(Color = c(rep("blue",3), rep("red", 3), rep("green", 3)),
                             Number = rep(1:3, 3),
                             Y = c(5,3,2,5,8,2,2,9,3))
        
        df %>% 
          left_join(df, by = c("Color")) %>% 
          filter(Number.x != Number.y) %>% 
          mutate(Comparison = sprintf("%s_vs_%s", Number.x, Number.y))  %>% 
          mutate(Y = ifelse(Y.x - Y.y >= 0, 1, 0)) %>% 
          select(Color, Comparison, Y)
        
           Color Comparison Y
        1   blue     1_vs_2 1
        2   blue     1_vs_3 1
        3   blue     2_vs_1 0
        4   blue     2_vs_3 1
        5   blue     3_vs_1 0
        6   blue     3_vs_2 0
        7    red     1_vs_2 0
        8    red     1_vs_3 1
        9    red     2_vs_1 1
        10   red     2_vs_3 1
        11   red     3_vs_1 0
        12   red     3_vs_2 0
        13 green     1_vs_2 0
        14 green     1_vs_3 0
        15 green     2_vs_1 1
        16 green     2_vs_3 1
        17 green     3_vs_1 1
        18 green     3_vs_2 0
        

        【讨论】:

          【解决方案4】:
          #Obtain all combinations for each color
          df2 = data.frame(do.call(rbind, lapply( split(df, df$Color), function(x)
                                                    cbind(x[1,1], t(combn(x[,2], 2))) ) ))    
          #Repeat combinations in reverse order
          df2 = rbind(df2, setNames(df2[,c(1,3:2)], colnames(df2)))    
          #Do a comparison of two comparators
          df2$Y = as.numeric(as.numeric(as.character(df2$X3)) > as.numeric(as.character(df2$X2)))    
          #Sort if you want
          df2 = df2[order(df2$X1,df2$Y),]    
          #Create comparison column if that is necessary
          df2$comparison = paste(df2$X2,df2$X3,sep = "_vs_")
          
          df2
          #      X1 X2 X3 Y comparison
          #10  blue  2  1 0     2_vs_1
          #11  blue  3  1 0     3_vs_1
          #12  blue  3  2 0     3_vs_2
          #1   blue  1  2 1     1_vs_2
          #2   blue  1  3 1     1_vs_3
          #3   blue  2  3 1     2_vs_3
          #13 green  2  1 0     2_vs_1
          #14 green  3  1 0     3_vs_1
          #15 green  3  2 0     3_vs_2
          #4  green  1  2 1     1_vs_2
          #5  green  1  3 1     1_vs_3
          #6  green  2  3 1     2_vs_3
          #16   red  2  1 0     2_vs_1
          #17   red  3  1 0     3_vs_1
          #18   red  3  2 0     3_vs_2
          #7    red  1  2 1     1_vs_2
          #8    red  1  3 1     1_vs_3
          #9    red  2  3 1     2_vs_3
          

          【讨论】:

            猜你喜欢
            • 1970-01-01
            • 1970-01-01
            • 2020-03-18
            • 2019-11-18
            • 1970-01-01
            • 1970-01-01
            • 1970-01-01
            • 1970-01-01
            • 2018-09-14
            相关资源
            最近更新 更多