【问题标题】:multiply each row of a dataframe by it's vector R将数据帧的每一行乘以它的向量 R
【发布时间】:2021-06-12 03:32:28
【问题描述】:

将每列 x1:x10 与它们各自的向量相乘的最简单方法是什么(如果可能的话)。 例如:新表的第一行将是: 年龄 = “一”,x1 = x1 * 1,x2 = x2 * 2,x3 = x3 * 9,x4 = x4 * 4...等

x <- data.frame(age = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                replicate(10,sample(0:5,5,rep=TRUE)),
                time = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                vector = c("1-2-9-4-5-1-5-6-1-2", 
                           "3-2-3-4-5-2-6-6-1-2", 
                           "1-2-4-4-2-4-5-4-2-1", 
                           "9-2-3-1-5-5-5-3-1-2", 
                           "1-1-3-4-5-1-5-6-3-2"))
   age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time              vector
1    one  4  0  3  5  0  5  0  3  5   4   one 1-2-9-4-5-1-5-6-1-2
2    two  1  5  3  5  1  2  5  0  4   4   two 3-2-3-4-5-2-6-6-1-2
3  three  0  4  5  0  3  0  0  5  2   2 three 1-2-4-4-2-4-5-4-2-1
4   four  0  0  5  5  4  3  2  5  5   1  four 9-2-3-1-5-5-5-3-1-2
5   five  2  1  2  1  4  5  5  2  1   1  five 1-1-3-4-5-1-5-6-3-2
6    one  4  0  3  5  0  5  0  3  5   4   one 1-2-3-4-5-1-5-6-1-2
7    two  1  5  3  5  1  2  5  0  4   4   two 3-2-3-4-5-2-6-6-1-2
8  three  0  4  5  0  3  0  0  5  2   2 three 1-2-4-4-2-4-5-4-2-1
9   four  0  0  5  5  4  3  2  5  5   1  four 9-2-3-1-5-5-5-3-1-2
10  five  2  1  2  1  4  5  5  2  1   1  five 1-1-3-4-5-1-5-6-3-9

我可以通过将最后一列分成多列然后将每一列相乘来做到这一点,但我正在寻找一种更快的方法

谢谢

【问题讨论】:

    标签: r dplyr tidyverse


    【解决方案1】:

    您可以在单个 mutate 语句中执行此操作,使用 dplyr 强大的 cur_data()

    set.seed(2021)
    x <- data.frame(age = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                    replicate(10,sample(0:5,5,rep=TRUE)),
                    time = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                    vector = c("1-2-9-4-5-1-5-6-1-2", 
                               "3-2-3-4-5-2-6-6-1-2", 
                               "1-2-4-4-2-4-5-4-2-1", 
                               "9-2-3-1-5-5-5-3-1-2", 
                               "1-1-3-4-5-1-5-6-3-2"))
    
    library(tidyverse)
    
    x %>% mutate(select(cur_data(), starts_with('X')) * t(map_dfc(strsplit(vector, '-'), as.numeric)))
    
    #>      age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time              vector
    #> 1    one  5 10 36  4 25  1 20 12  1   0   one 1-2-9-4-5-1-5-6-1-2
    #> 2    two 15 10  0  8  5  4  0  6  4   0   two 3-2-3-4-5-2-6-6-1-2
    #> 3  three  1  4 12 12  6 12 25 20 10   5 three 1-2-4-4-2-4-5-4-2-1
    #> 4   four 27 10  6  4 20 20  5 15  2   8  four 9-2-3-1-5-5-5-3-1-2
    #> 5   five  3  5  9  8 25  5 10 30  3   6  five 1-1-3-4-5-1-5-6-3-2
    #> 6    one  5 10 36  4 25  1 20 12  1   0   one 1-2-9-4-5-1-5-6-1-2
    #> 7    two 15 10  0  8  5  4  0  6  4   0   two 3-2-3-4-5-2-6-6-1-2
    #> 8  three  1  4 12 12  6 12 25 20 10   5 three 1-2-4-4-2-4-5-4-2-1
    #> 9   four 27 10  6  4 20 20  5 15  2   8  four 9-2-3-1-5-5-5-3-1-2
    #> 10  five  3  5  9  8 25  5 10 30  3   6  five 1-1-3-4-5-1-5-6-3-2
    

    甚至像 G.Grothendieck 所建议的那样使用across(这将消除使用cur_data()

    x %>% mutate(across(starts_with('X')) * t(map_dfc(strsplit(vector, '-'), as.numeric)))
    

    【讨论】:

      【解决方案2】:

      我们也可以使用以下解决方案:

      library(dplyr)
      library(tidyr)
      
      set.seed(123)
      
      x %>%
        separate(vector, paste0("Y", seq(1, 10)), "-", convert = TRUE) %>%
        mutate(across(starts_with("X"), ~ .x * get(sub("\\X", "\\Y", cur_column()))))
      
      
           age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time Y1 Y2 Y3 Y4 Y5 Y6 Y7 Y8 Y9 Y10
      1    one  2 10 45  8  0  0  0 24  3   0   one  1  2  9  4  5  1  5  6  1   2
      2    two 15  4  0  8 20 10 12  0  4   2   two  3  2  3  4  5  2  6  6  1   2
      3  three  2  8  4  0  4  8 20  0  8   4 three  1  2  4  4  2  4  5  4  2   1
      4   four  9  6  6  3  5 15 15  3  2   8  four  9  2  3  1  5  5  5  3  1   2
      5   five  1  5 12  0  5  5  5 12 15   6  five  1  1  3  4  5  1  5  6  3   2
      6    one  2 10 45  8  0  0  0 24  3   0   one  1  2  9  4  5  1  5  6  1   2
      7    two 15  4  0  8 20 10 12  0  4   2   two  3  2  3  4  5  2  6  6  1   2
      8  three  2  8  4  0  4  8 20  0  8   4 three  1  2  4  4  2  4  5  4  2   1
      9   four  9  6  6  3  5 15 15  3  2   8  four  9  2  3  1  5  5  5  3  1   2
      10  five  1  5 12  0  5  5  5 12 15   6  five  1  1  3  4  5  1  5  6  3   2
      

      【讨论】:

        【解决方案3】:

        这是一个基本的 R 方法 -

        cols <- grep('X\\d+', names(x))  
        mat <- do.call(rbind, strsplit(x$vector, '-', fixed = TRUE))
        x[cols] <- x[cols] * as.numeric(mat)
        x
        
        #     age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time              vector
        #1    one  2 10 45  8  0  0  0 24  3   0   one 1-2-9-4-5-1-5-6-1-2
        #2    two 15  4  0  8 20 10 12  0  4   2   two 3-2-3-4-5-2-6-6-1-2
        #3  three  2  8  4  0  4  8 20  0  8   4 three 1-2-4-4-2-4-5-4-2-1
        #4   four  9  6  6  3  5 15 15  3  2   8  four 9-2-3-1-5-5-5-3-1-2
        #5   five  1  5 12  0  5  5  5 12 15   6  five 1-1-3-4-5-1-5-6-3-2
        #6    one  2 10 45  8  0  0  0 24  3   0   one 1-2-9-4-5-1-5-6-1-2
        #7    two 15  4  0  8 20 10 12  0  4   2   two 3-2-3-4-5-2-6-6-1-2
        #8  three  2  8  4  0  4  8 20  0  8   4 three 1-2-4-4-2-4-5-4-2-1
        #9   four  9  6  6  3  5 15 15  3  2   8  four 9-2-3-1-5-5-5-3-1-2
        #10  five  1  5 12  0  5  5  5 12 15   6  five 1-1-3-4-5-1-5-6-3-2
        

        数据

        set.seed(123)
        x <- data.frame(age = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                        replicate(10,sample(0:5,5,rep=TRUE)),
                        time = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                        vector = c("1-2-9-4-5-1-5-6-1-2", 
                                   "3-2-3-4-5-2-6-6-1-2", 
                                   "1-2-4-4-2-4-5-4-2-1", 
                                   "9-2-3-1-5-5-5-3-1-2", 
                                   "1-1-3-4-5-1-5-6-3-2"))
        x
        
        #     age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time              vector
        #1    one  2  5  5  2  0  0  0  4  3   0   one 1-2-9-4-5-1-5-6-1-2
        #2    two  5  2  0  2  4  5  2  0  4   1   two 3-2-3-4-5-2-6-6-1-2
        #3  three  2  4  1  0  2  2  4  0  4   4 three 1-2-4-4-2-4-5-4-2-1
        #4   four  1  3  2  3  1  3  3  1  2   4  four 9-2-3-1-5-5-5-3-1-2
        #5   five  1  5  4  0  1  5  1  2  5   3  five 1-1-3-4-5-1-5-6-3-2
        #6    one  2  5  5  2  0  0  0  4  3   0   one 1-2-9-4-5-1-5-6-1-2
        #7    two  5  2  0  2  4  5  2  0  4   1   two 3-2-3-4-5-2-6-6-1-2
        #8  three  2  4  1  0  2  2  4  0  4   4 three 1-2-4-4-2-4-5-4-2-1
        #9   four  1  3  2  3  1  3  3  1  2   4  four 9-2-3-1-5-5-5-3-1-2
        #10  five  1  5  4  0  1  5  1  2  5   3  five 1-1-3-4-5-1-5-6-3-2
        

        【讨论】:

          【解决方案4】:

          base Rread.table 一起使用

          x[startsWith(names(x), "X")] <- read.table(text = x$vector, sep="-",
                   header = FALSE) * x[startsWith(names(x), "X")]
          x
               age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time              vector
          1    one  2 10 45  8  0  0  0 24  3   0   one 1-2-9-4-5-1-5-6-1-2
          2    two 15  4  0  8 20 10 12  0  4   2   two 3-2-3-4-5-2-6-6-1-2
          3  three  2  8  4  0  4  8 20  0  8   4 three 1-2-4-4-2-4-5-4-2-1
          4   four  9  6  6  3  5 15 15  3  2   8  four 9-2-3-1-5-5-5-3-1-2
          5   five  1  5 12  0  5  5  5 12 15   6  five 1-1-3-4-5-1-5-6-3-2
          6    one  2 10 45  8  0  0  0 24  3   0   one 1-2-9-4-5-1-5-6-1-2
          7    two 15  4  0  8 20 10 12  0  4   2   two 3-2-3-4-5-2-6-6-1-2
          8  three  2  8  4  0  4  8 20  0  8   4 three 1-2-4-4-2-4-5-4-2-1
          9   four  9  6  6  3  5 15 15  3  2   8  four 9-2-3-1-5-5-5-3-1-2
          10  five  1  5 12  0  5  5  5 12 15   6  five 1-1-3-4-5-1-5-6-3-2
          

          数据

          x <- structure(list(age = c("one", "two", "three", "four", "five", 
          "one", "two", "three", "four", "five"), X1 = c(5L, 5L, 1L, 3L, 
          3L, 5L, 5L, 1L, 3L, 3L), X2 = c(5L, 5L, 2L, 5L, 5L, 5L, 5L, 2L, 
          5L, 5L), X3 = c(4L, 0L, 3L, 2L, 3L, 4L, 0L, 3L, 2L, 3L), X4 = c(1L, 
          2L, 3L, 4L, 2L, 1L, 2L, 3L, 4L, 2L), X5 = c(5L, 1L, 3L, 4L, 5L, 
          5L, 1L, 3L, 4L, 5L), X6 = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 
          5L), X7 = c(4L, 0L, 5L, 1L, 2L, 4L, 0L, 5L, 1L, 2L), X8 = c(2L, 
          1L, 5L, 5L, 5L, 2L, 1L, 5L, 5L, 5L), X9 = c(1L, 4L, 5L, 2L, 1L, 
          1L, 4L, 5L, 2L, 1L), X10 = c(0L, 0L, 5L, 4L, 3L, 0L, 0L, 5L, 
          4L, 3L), time = c("one", "two", "three", "four", "five", "one", 
          "two", "three", "four", "five"), vector = c("1-2-9-4-5-1-5-6-1-2", 
          "3-2-3-4-5-2-6-6-1-2", "1-2-4-4-2-4-5-4-2-1", "9-2-3-1-5-5-5-3-1-2", 
          "1-1-3-4-5-1-5-6-3-2", "1-2-9-4-5-1-5-6-1-2", "3-2-3-4-5-2-6-6-1-2", 
          "1-2-4-4-2-4-5-4-2-1", "9-2-3-1-5-5-5-3-1-2", "1-1-3-4-5-1-5-6-3-2"
          )), class = "data.frame", row.names = c(NA, -10L))
          
          

          【讨论】:

            【解决方案5】:

            这是使用c_across(){tidyverse} 方法。结果是一个列表列,可以使用unnest() 提取。

            x1 <- x %>% 
              mutate(vector = str_split(vector, "-")) %>% 
              rowwise() %>% 
              mutate(vector = list(as.integer(vector)),
                     res = list(c_across(X1:X10) * vector)) %>%
              ungroup()
            
            x1
            

            【讨论】:

              猜你喜欢
              • 1970-01-01
              • 2012-11-29
              • 1970-01-01
              • 2020-05-30
              • 1970-01-01
              • 2021-06-28
              • 1970-01-01
              • 1970-01-01
              • 2016-04-02
              相关资源
              最近更新 更多