将数据帧的每一行乘以它的向量 R答案

【问题标题】：multiply each row of a dataframe by it's vector R将数据帧的每一行乘以它的向量 R
【发布时间】：2021-06-12 03:32:28
【问题描述】：

将每列 x1:x10 与它们各自的向量相乘的最简单方法是什么（如果可能的话）。例如：新表的第一行将是：年龄 = “一”，x1 = x1 * 1，x2 = x2 * 2，x3 = x3 * 9，x4 = x4 * 4...等

x <- data.frame(age = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                replicate(10,sample(0:5,5,rep=TRUE)),
                time = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                vector = c("1-2-9-4-5-1-5-6-1-2", 
                           "3-2-3-4-5-2-6-6-1-2", 
                           "1-2-4-4-2-4-5-4-2-1", 
                           "9-2-3-1-5-5-5-3-1-2", 
                           "1-1-3-4-5-1-5-6-3-2"))

   age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time              vector
1    one  4  0  3  5  0  5  0  3  5   4   one 1-2-9-4-5-1-5-6-1-2
2    two  1  5  3  5  1  2  5  0  4   4   two 3-2-3-4-5-2-6-6-1-2
3  three  0  4  5  0  3  0  0  5  2   2 three 1-2-4-4-2-4-5-4-2-1
4   four  0  0  5  5  4  3  2  5  5   1  four 9-2-3-1-5-5-5-3-1-2
5   five  2  1  2  1  4  5  5  2  1   1  five 1-1-3-4-5-1-5-6-3-2
6    one  4  0  3  5  0  5  0  3  5   4   one 1-2-3-4-5-1-5-6-1-2
7    two  1  5  3  5  1  2  5  0  4   4   two 3-2-3-4-5-2-6-6-1-2
8  three  0  4  5  0  3  0  0  5  2   2 three 1-2-4-4-2-4-5-4-2-1
9   four  0  0  5  5  4  3  2  5  5   1  four 9-2-3-1-5-5-5-3-1-2
10  five  2  1  2  1  4  5  5  2  1   1  five 1-1-3-4-5-1-5-6-3-9

我可以通过将最后一列分成多列然后将每一列相乘来做到这一点，但我正在寻找一种更快的方法

谢谢

【问题讨论】：

标签： r dplyr tidyverse

【解决方案1】：

您可以在单个 mutate 语句中执行此操作，使用 dplyr 强大的 cur_data()

set.seed(2021)
x <- data.frame(age = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                replicate(10,sample(0:5,5,rep=TRUE)),
                time = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                vector = c("1-2-9-4-5-1-5-6-1-2", 
                           "3-2-3-4-5-2-6-6-1-2", 
                           "1-2-4-4-2-4-5-4-2-1", 
                           "9-2-3-1-5-5-5-3-1-2", 
                           "1-1-3-4-5-1-5-6-3-2"))

library(tidyverse)

x %>% mutate(select(cur_data(), starts_with('X')) * t(map_dfc(strsplit(vector, '-'), as.numeric)))

#>      age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time              vector
#> 1    one  5 10 36  4 25  1 20 12  1   0   one 1-2-9-4-5-1-5-6-1-2
#> 2    two 15 10  0  8  5  4  0  6  4   0   two 3-2-3-4-5-2-6-6-1-2
#> 3  three  1  4 12 12  6 12 25 20 10   5 three 1-2-4-4-2-4-5-4-2-1
#> 4   four 27 10  6  4 20 20  5 15  2   8  four 9-2-3-1-5-5-5-3-1-2
#> 5   five  3  5  9  8 25  5 10 30  3   6  five 1-1-3-4-5-1-5-6-3-2
#> 6    one  5 10 36  4 25  1 20 12  1   0   one 1-2-9-4-5-1-5-6-1-2
#> 7    two 15 10  0  8  5  4  0  6  4   0   two 3-2-3-4-5-2-6-6-1-2
#> 8  three  1  4 12 12  6 12 25 20 10   5 three 1-2-4-4-2-4-5-4-2-1
#> 9   four 27 10  6  4 20 20  5 15  2   8  four 9-2-3-1-5-5-5-3-1-2
#> 10  five  3  5  9  8 25  5 10 30  3   6  five 1-1-3-4-5-1-5-6-3-2

甚至像 G.Grothendieck 所建议的那样使用across（这将消除使用cur_data()

x %>% mutate(across(starts_with('X')) * t(map_dfc(strsplit(vector, '-'), as.numeric)))

【讨论】：

【解决方案2】：

我们也可以使用以下解决方案：

library(dplyr)
library(tidyr)

set.seed(123)

x %>%
  separate(vector, paste0("Y", seq(1, 10)), "-", convert = TRUE) %>%
  mutate(across(starts_with("X"), ~ .x * get(sub("\\X", "\\Y", cur_column()))))


     age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time Y1 Y2 Y3 Y4 Y5 Y6 Y7 Y8 Y9 Y10
1    one  2 10 45  8  0  0  0 24  3   0   one  1  2  9  4  5  1  5  6  1   2
2    two 15  4  0  8 20 10 12  0  4   2   two  3  2  3  4  5  2  6  6  1   2
3  three  2  8  4  0  4  8 20  0  8   4 three  1  2  4  4  2  4  5  4  2   1
4   four  9  6  6  3  5 15 15  3  2   8  four  9  2  3  1  5  5  5  3  1   2
5   five  1  5 12  0  5  5  5 12 15   6  five  1  1  3  4  5  1  5  6  3   2
6    one  2 10 45  8  0  0  0 24  3   0   one  1  2  9  4  5  1  5  6  1   2
7    two 15  4  0  8 20 10 12  0  4   2   two  3  2  3  4  5  2  6  6  1   2
8  three  2  8  4  0  4  8 20  0  8   4 three  1  2  4  4  2  4  5  4  2   1
9   four  9  6  6  3  5 15 15  3  2   8  four  9  2  3  1  5  5  5  3  1   2
10  five  1  5 12  0  5  5  5 12 15   6  five  1  1  3  4  5  1  5  6  3   2

【讨论】：

【解决方案3】：

这是一个基本的 R 方法 -

cols <- grep('X\\d+', names(x))  
mat <- do.call(rbind, strsplit(x$vector, '-', fixed = TRUE))
x[cols] <- x[cols] * as.numeric(mat)
x

#     age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time              vector
#1    one  2 10 45  8  0  0  0 24  3   0   one 1-2-9-4-5-1-5-6-1-2
#2    two 15  4  0  8 20 10 12  0  4   2   two 3-2-3-4-5-2-6-6-1-2
#3  three  2  8  4  0  4  8 20  0  8   4 three 1-2-4-4-2-4-5-4-2-1
#4   four  9  6  6  3  5 15 15  3  2   8  four 9-2-3-1-5-5-5-3-1-2
#5   five  1  5 12  0  5  5  5 12 15   6  five 1-1-3-4-5-1-5-6-3-2
#6    one  2 10 45  8  0  0  0 24  3   0   one 1-2-9-4-5-1-5-6-1-2
#7    two 15  4  0  8 20 10 12  0  4   2   two 3-2-3-4-5-2-6-6-1-2
#8  three  2  8  4  0  4  8 20  0  8   4 three 1-2-4-4-2-4-5-4-2-1
#9   four  9  6  6  3  5 15 15  3  2   8  four 9-2-3-1-5-5-5-3-1-2
#10  five  1  5 12  0  5  5  5 12 15   6  five 1-1-3-4-5-1-5-6-3-2

数据

set.seed(123)
x <- data.frame(age = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                replicate(10,sample(0:5,5,rep=TRUE)),
                time = c("one", "two", "three", "four", "five","one", "two", "three", "four", "five"),
                vector = c("1-2-9-4-5-1-5-6-1-2", 
                           "3-2-3-4-5-2-6-6-1-2", 
                           "1-2-4-4-2-4-5-4-2-1", 
                           "9-2-3-1-5-5-5-3-1-2", 
                           "1-1-3-4-5-1-5-6-3-2"))
x

#     age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time              vector
#1    one  2  5  5  2  0  0  0  4  3   0   one 1-2-9-4-5-1-5-6-1-2
#2    two  5  2  0  2  4  5  2  0  4   1   two 3-2-3-4-5-2-6-6-1-2
#3  three  2  4  1  0  2  2  4  0  4   4 three 1-2-4-4-2-4-5-4-2-1
#4   four  1  3  2  3  1  3  3  1  2   4  four 9-2-3-1-5-5-5-3-1-2
#5   five  1  5  4  0  1  5  1  2  5   3  five 1-1-3-4-5-1-5-6-3-2
#6    one  2  5  5  2  0  0  0  4  3   0   one 1-2-9-4-5-1-5-6-1-2
#7    two  5  2  0  2  4  5  2  0  4   1   two 3-2-3-4-5-2-6-6-1-2
#8  three  2  4  1  0  2  2  4  0  4   4 three 1-2-4-4-2-4-5-4-2-1
#9   four  1  3  2  3  1  3  3  1  2   4  four 9-2-3-1-5-5-5-3-1-2
#10  five  1  5  4  0  1  5  1  2  5   3  five 1-1-3-4-5-1-5-6-3-2

【讨论】：

【解决方案4】：

将base R 与read.table 一起使用

x[startsWith(names(x), "X")] <- read.table(text = x$vector, sep="-",
         header = FALSE) * x[startsWith(names(x), "X")]
x
     age X1 X2 X3 X4 X5 X6 X7 X8 X9 X10  time              vector
1    one  2 10 45  8  0  0  0 24  3   0   one 1-2-9-4-5-1-5-6-1-2
2    two 15  4  0  8 20 10 12  0  4   2   two 3-2-3-4-5-2-6-6-1-2
3  three  2  8  4  0  4  8 20  0  8   4 three 1-2-4-4-2-4-5-4-2-1
4   four  9  6  6  3  5 15 15  3  2   8  four 9-2-3-1-5-5-5-3-1-2
5   five  1  5 12  0  5  5  5 12 15   6  five 1-1-3-4-5-1-5-6-3-2
6    one  2 10 45  8  0  0  0 24  3   0   one 1-2-9-4-5-1-5-6-1-2
7    two 15  4  0  8 20 10 12  0  4   2   two 3-2-3-4-5-2-6-6-1-2
8  three  2  8  4  0  4  8 20  0  8   4 three 1-2-4-4-2-4-5-4-2-1
9   four  9  6  6  3  5 15 15  3  2   8  four 9-2-3-1-5-5-5-3-1-2
10  five  1  5 12  0  5  5  5 12 15   6  five 1-1-3-4-5-1-5-6-3-2

数据

x <- structure(list(age = c("one", "two", "three", "four", "five", 
"one", "two", "three", "four", "five"), X1 = c(5L, 5L, 1L, 3L, 
3L, 5L, 5L, 1L, 3L, 3L), X2 = c(5L, 5L, 2L, 5L, 5L, 5L, 5L, 2L, 
5L, 5L), X3 = c(4L, 0L, 3L, 2L, 3L, 4L, 0L, 3L, 2L, 3L), X4 = c(1L, 
2L, 3L, 4L, 2L, 1L, 2L, 3L, 4L, 2L), X5 = c(5L, 1L, 3L, 4L, 5L, 
5L, 1L, 3L, 4L, 5L), X6 = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 
5L), X7 = c(4L, 0L, 5L, 1L, 2L, 4L, 0L, 5L, 1L, 2L), X8 = c(2L, 
1L, 5L, 5L, 5L, 2L, 1L, 5L, 5L, 5L), X9 = c(1L, 4L, 5L, 2L, 1L, 
1L, 4L, 5L, 2L, 1L), X10 = c(0L, 0L, 5L, 4L, 3L, 0L, 0L, 5L, 
4L, 3L), time = c("one", "two", "three", "four", "five", "one", 
"two", "three", "four", "five"), vector = c("1-2-9-4-5-1-5-6-1-2", 
"3-2-3-4-5-2-6-6-1-2", "1-2-4-4-2-4-5-4-2-1", "9-2-3-1-5-5-5-3-1-2", 
"1-1-3-4-5-1-5-6-3-2", "1-2-9-4-5-1-5-6-1-2", "3-2-3-4-5-2-6-6-1-2", 
"1-2-4-4-2-4-5-4-2-1", "9-2-3-1-5-5-5-3-1-2", "1-1-3-4-5-1-5-6-3-2"
)), class = "data.frame", row.names = c(NA, -10L))

【讨论】：

【解决方案5】：

这是使用c_across() 的{tidyverse} 方法。结果是一个列表列，可以使用unnest() 提取。

x1 <- x %>% 
  mutate(vector = str_split(vector, "-")) %>% 
  rowwise() %>% 
  mutate(vector = list(as.integer(vector)),
         res = list(c_across(X1:X10) * vector)) %>%
  ungroup()

x1

【讨论】：