【问题标题】:Get all possible combinations of two columns by a variable id通过变量 id 获取两列的所有可能组合
【发布时间】:2018-03-02 19:18:21
【问题描述】:

在给定“组 ID”值的情况下,我需要获取 2 个值的所有排列

我有这个:

group id    value
   1         a
   1         b
   1         c
   2         b
   2         c
   2         d

并且想要这个:

group id    value1   value2
   1         a        b
   1         a        c
   1         b        a
   1         b        c  
   1         c        a
   1         c        b
   2         b        c
   2         b        d
   2         c        b
   2         c        d
   2         d        b
   2         d        c

【问题讨论】:

  • 如果您不介意将结果作为因素,请使用 Base R 方法。 data = data.frame(group.id = c(1,1,1,2,2,2), value = c('a', 'b', 'c', 'b', 'c', ' d')); combos = tapply(data$value, data$group.id, combn, 2)

标签: r combinations


【解决方案1】:

下面是快速和简单的

library(gtools)
library(data.table)

indices <- c(1,1,1,2,2,2)
variables <- c("a", "b", "c", "b", "c", "d")
dt <- data.table(indices, variables)

get_permutations <- function(df){
    perm <- permutations(nrow(unique(df[,1])), 2, df$variables)
    as.data.table(perm)
}

ds <- dt[, get_permutations(.SD), by = indices]

    indices V1 V2
 1:       1  a  b
 2:       1  a  c
 3:       1  b  a
 4:       1  b  c
 5:       1  c  a
 6:       1  c  b
 7:       2  b  c
 8:       2  b  d
 9:       2  c  b
10:       2  c  d
11:       2  d  b
12:       2  d  c

【讨论】:

    【解决方案2】:

    使用data.tableexpand.grid 中的split 可以实现一种可能的解决方案。

    步骤如下:

      library(data.table)
    
      setDT(df)
    
      #list will be generated for each group
      ll <- lapply(split(df, by="group_id"), 
         function(x)cbind(group_id = unique(x$group_id), 
         expand.grid(x$value, x$value, stringsAsFactors = F)))
    
      #Combine data frames from list and then filter those with 
      # having same value for both columns
      do.call("rbind", ll) %>% filter(Var1 != Var2)
    
    #Result
       group_id Var1 Var2
    1         1    b    a
    2         1    c    a
    3         1    a    b
    4         1    c    b
    5         1    a    c
    6         1    b    c
    7         2    c    b
    8         2    d    b
    9         2    b    c
    10        2    d    c
    11        2    b    d
    12        2    c    d
    

    数据

    df <- read.table(text = "group_id    value
    1         a
    1         b
    1         c
    2         b
    2         c
    2         d", header = TRUE, stringsAsFactors = FALSE)
    

    【讨论】:

      【解决方案3】:

      您正在寻找来自 gtools 的 permutations

      ## In general
      
      library(gtools)
      
      char.var <- c('a','b','c')
      df = as.data.frame(permutations(n=length(char.var), r=2, v=char.var))
      df
      
         V1 V2
      1  a  b
      2  a  c
      3  b  a
      4  b  c
      5  c  a
      6  c  b
      
      ## answer for question
      
      library(data.table)
      library(gtools)
      
      df <- data.frame(groupid = c(1,1,1,2,2,2), value = c('a','b','c','b','c','d'))
      df$value <- as.character(df$value)
      
      setDT(df)
      
      output <- data.table()
      
      for(i in unique(df$groupid))
      {
          temp_df = df[groupid == eval(i)] # this gets group
          temp_df2 <- as.data.table(permutations(length(temp_df$value), r=2, temp_df$value)) # this creates combinations
          temp_df2[, groupid := i]
          colnames(temp_df2)[1:2] <- c('value1','value2')
          output <- rbind(output, temp_df2) # this appends value in output df
      
      }
      
      print(output)
      
              value1 value2 groupid
       1:      a      b       1
       2:      a      c       1
       3:      b      a       1
       4:      b      c       1
       5:      c      a       1
       6:      c      b       1
       7:      b      c       2
       8:      b      d       2
       9:      c      b       2
      10:      c      d       2
      11:      d      b       2
      12:      d      c       2
      

      【讨论】:

      • 这不会按照要求保留和按 id 分组。
      【解决方案4】:

      几乎像@João Machado

      df <- data.frame(group_id = c(rep(1,3),rep(2,3)), value = c(letters[1:3],letters[2:4]))
      df <- split(x= df, f= df$group_id)
      df <- lapply(df, function(i)
      {
        library(gtools)
        a<- data.frame(gtools::permutations(n = length(as.vector(i[,"value"])), r= 2,v = as.vector(i[,"value"])))
        colnames(a) <- c("value1", "value2")
        a$group_id <- unique(as.vector(i[,"group_id"]))
        a <- a[,c("group_id","value1","value2")]
      })
      df <- do.call(rbind, df)
      

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2013-10-06
        • 1970-01-01
        • 1970-01-01
        • 2017-09-18
        • 1970-01-01
        • 1970-01-01
        相关资源
        最近更新 更多