【问题标题】:Make purrr::accumulate() run fast or alternatives?让 purrr::accumulate() 快速运行还是替代方案?
【发布时间】:2019-11-20 21:27:14
【问题描述】:

问题

我正在寻找 R 中 for 循环的更快替代方案。具体来说,可以提供由于加法而导致矢量减少的中间结果。 purrr::accumulate() 可以解决问题,但似乎很慢。下面显示了一个可重现的示例。

功能

带有for循环

accumulate_values <- function(time_vector, 
                              input_vector, 
                              list_of_parameters)
{

  number_samples <- length(time_vector)
  time_steps <- c(0, diff(time_vector))

  calculation <- (list_of_parameters$K * input_vector - list_of_parameters$M) * time_steps

  accumulated_values <- rep(0, number_samples)
  for (i in 2:number_samples) {
    accumulated_values[i] <- max(0, accumulated_values[i-1] + calculation[i])

  }

  return(accumulated_values)
}

使用 purrr::accumulate()

功能

library(tidyverse)
accumulate_values_purrr <- function(time_vector, 
                              input_vector, 
                              list_of_parameters)
{
  number_samples <- length(time_vector)
  time_steps <- c(0, diff(time_vector))
    calculation <- (list_of_parameters$K * input_vector - list_of_parameters$M) * time_steps

  # accumulated_values <- rep(0, number_samples)
  # for (i in 2:number_samples) {
  #   accumulated_values[i] <- max(0, accumulated_values[i-1] + calculation[i])
  #   
  # }
  accumulated_values <- calculation %>% purrr::accumulate(function(x, y) max(0, x + y))

  return(accumulated_values)
}  

结果

# Data
Nums <- 1000000
my_time_vector <- seq(1, Nums, by = 1)
my_input_vector <- rnorm(Nums)
my_list_of_parameters <- list(K = 5, M = 0.01)

# Results
set.seed(1987)
library(tictoc)
# With for-loop
tic()
answer1 <- accumulate_values(my_time_vector, 
                  my_input_vector, 
                  my_list_of_parameters)
toc()
## 1.73 sec elapsed

# With purrr::accumulate
tic()
answer2 <- accumulate_values_purrr(my_time_vector, 
                  my_input_vector, 
                  my_list_of_parameters)
toc()
## 5.93 sec elapsed

identical(answer1, answer2)
## [1] TRUE

问题

如何让accumulate() 更快?有更快的替代方案吗?

【问题讨论】:

  • 你能用Reduce(function(x, y) max(0, x + y), calculation, accumulate = TRUE)试试还是用cummax(calculation)
  • @akrun 感谢您的评论。 Reduce() 在这些数据上比 purrr::accumulate() 快,但仍然比 for-loop 慢。 cummax() 不提供相同的结果。
  • 可能你需要cummax(pmax(0, calculation))
  • 不,cummax(pmax(0, calculation)) 提供的结果不同。
  • 使用 c++ std::accumulateRcpp 包:adv-r.had.co.nz/Rcpp.html

标签: r for-loop purrr


【解决方案1】:

这是一个尝试。我使用{bench}-package 来提供更准确的微基准。

library(purrr)

accumulate_values <- function(time_vector,
                              input_vector,
                              list_of_parameters)
{

  number_samples <- length(time_vector)
  time_steps <- c(0, diff(time_vector))

  calculation <- (list_of_parameters$K * input_vector - list_of_parameters$M) * time_steps

  accumulated_values <- rep(0, number_samples)
  for (i in 2:number_samples) {
    accumulated_values[i] <- max(0, accumulated_values[i-1] + calculation[i])

  }

  return(accumulated_values)
}

accumulate_values_purrr <- function(time_vector,
                                    input_vector,
                                    list_of_parameters)
{
  number_samples <- length(time_vector)
  time_steps <- c(0, diff(time_vector))
  calculation <- (list_of_parameters$K * input_vector - list_of_parameters$M) * time_steps

  # accumulated_values <- rep(0, number_samples)
  # for (i in 2:number_samples) {
  #   accumulated_values[i] <- max(0, accumulated_values[i-1] + calculation[i])
  #
  # }
  accumulated_values <- calculation %>% purrr::accumulate(function(x, y) max(0, x + y))

  return(accumulated_values)
}

# Data
set.seed(1987)
Nums <- 1000000
# Nums <- 1000
time_vector <- seq(1, Nums, by = 1)
input_vector <- rnorm(Nums)
list_of_parameters <- list(K = 5, M = 0.01)

number_samples <- length(time_vector)
time_steps <- c(0, diff(time_vector))
calculation <- (list_of_parameters$K * input_vector - list_of_parameters$M) * time_steps

answer <- accumulate_values_purrr(time_vector,
                                  input_vector,
                                  list_of_parameters)

Rcpp::cppFunction(
  plugins = "cpp11",
  "std::vector<double> process(NumericVector calculation) {
  // NumericVector result (calculation.length(), 0.0);
  std::vector<double> result;
  // result.capacity(calculation.length());

  std::accumulate(calculation.begin(),
                  calculation.end(),
                  0.0,
                  [&result](double x, double y){
                    auto new_entry = std::max(0.0, x + y);
                    result.push_back(new_entry);
                    return new_entry;
                  });

  return result;
}")

Rcpp::cppFunction(
  plugins = "cpp11",
  "std::vector<double> process_with_cap(NumericVector calculation) {
  std::vector<double> result;
  result.reserve(calculation.length());

  std::accumulate(calculation.cbegin(),
                  calculation.cend(),
                  0.0,
                  [&result](double x, double y){
                    auto new_entry = std::max(0.0, x + y);
                    result.push_back(new_entry);
                    return new_entry;
                  });

  return result;
}")



bench::mark(for_loopa = accumulate_values(time_vector,
                                          input_vector,
                                          list_of_parameters),
            rcpp_process = process(calculation),
            rcpp_process_with_cap = process_with_cap(calculation),
            purrr_accumulate = accumulate_values_purrr(time_vector,
                                                       input_vector,
                                                       list_of_parameters)) %>%
  bench:::summary.bench_mark(relative = TRUE)
#> Warning: Some expressions had a GC in every iteration; so filtering is disabled.

#> Warning: Some expressions had a GC in every iteration; so filtering is disabled.
#> # A tibble: 4 x 6
#>   expression               min median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>             <dbl>  <dbl>     <dbl>     <dbl>    <dbl>
#> 1 for_loopa              44.0   47.4       8.36      8.01     1.38
#> 2 rcpp_process            1.37   1.38    279.        1        1   
#> 3 rcpp_process_with_cap   1      1       375.        1        1.60
#> 4 purrr_accumulate      412.   396.        1        14.0      1.22

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2012-07-14
    • 2021-09-11
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多