R中odesolver的速度问题答案

【问题标题】：speed problems with odesolver in RR中odesolver的速度问题
【发布时间】：2018-03-05 09:09:00
【问题描述】：

我在 R 中有一个微分方程模型，它使用 deSolve 包中的 odesolver。但是，目前该模型运行非常缓慢。我认为这可能与我提供给 odesolver 的函数写得不好有关，但无法弄清楚究竟是什么在减慢它的速度以及如何加快它的速度。有没有人有任何想法？

我做了一个与我的工作方式相似的例子：

library(data.table)
library(deSolve)

matrix_1 <- matrix(runif(100),10,10)
matrix_1[which(matrix_1 > 0.5)] <- 1
matrix_1[which(matrix_1 < 0.5)] <- 0

matrix_2 <- matrix(runif(100),10,10)
matrix_2[which(matrix_2 > 0.5)] <- 1
matrix_2[which(matrix_2 < 0.5)] <- 0

group_ID <- rep(c(1,2), 5)
N <- runif(10, 0, 100000)

Nchange <- function(t, N, parameters) {
  with(as.list(c(N, parameters)), {
    N_per_1 <- matrix_1 * N_per_connection  
    N_per_1[is.na(N_per_1)] <- 0
    total_N_2 <- as.vector(N_per_1)
    if (nrow(as.matrix(N_per_1)) > 1) {
      total_N_2 <- colSums(N_per_1[drop = FALSE])
    }
    N_per_1_cost <- N_per_1
    for (i in possible_competition) {
      column <- as.vector(N_per_1[, i])
      if (sum(column) > 0) {
        active_groups <- unique(group_ID[column > 0])
        if (length(active_groups) > 1){
          group_ID_dets <- data.table("group_ID" = group_ID, "column"= column, "n_IDS" = 1:length(group_ID))
          group_ID_dets$portions <- ave(group_ID_dets$column, group_ID_dets$group_ID, FUN = function(x) x / sum(x))
          group_ID_dets[is.na(group_ID_dets)] <- 0
          totals <- as.vector(unlist(tapply(group_ID_dets$column, group_ID_dets$group_ID, function(x) sum(x))))
          totals[is.na(totals)] <- 0
          totals <- totals*2 - sum(totals)
          totals[totals < 0] <- 0
          group_ID_totals <- data.table("group_ID" = unique(group_ID), "totals" = as.vector(totals))
          group_ID_dets$totals <- group_ID_totals$totals[match(group_ID_dets$group_ID, group_ID_totals$group_ID)]
          N_per_1[, i] <- group_ID_dets$totals * group_ID_dets$portions
        }
      }
    }

    res_per_1 <- N_per_1 * 0.1

    N_per_2 <- matrix_2 * N_per_connection
    N_per_2[is.na(N_per_2)] <- 0 
    res_per_2 <- N_per_2 * 0.1

    dN <- rowSums(res_per_1)  - rowSums(N_per_1_cost * 0.00003) + rowSums(res_per_2)  - 
      rowSums(N_per_2 * 0.00003) - N*0.03

    list(c(dN))
  })
}  # function describing differential equations
N_per_connection <- N/(rowSums(matrix_1) + rowSums(matrix_2))
possible_competition <- which(colSums(matrix_1 != 0)>1)
times <- seq(0, 100, by = 1) 
out <- ode(y = N, times = times, func = Nchange, parms = NULL)

【问题讨论】：

标签： r performance optimization data.table ode

【解决方案1】：

识别瓶颈的好方法是使用profiler，而profvis 包提供了深入研究结果的好方法。将您的代码封装在 p <- profvis({YourCodeInHere}) 中，然后使用 print(p) 查看结果，可以得出以下见解：

花费时间最多的行是（按时间降序排列）：

group_ID_totals <- data.table("group_ID" = unique(group_ID), "totals" = as.vector(totals))

group_ID_dets$portions <- ave(group_ID_dets$column, group_ID_dets$group_ID, FUN = function(x) x / sum(x))

group_ID_dets <- data.table("group_ID" = group_ID, "column"= column, "n_IDS" = 1:length(group_ID))

totals <- as.vector(unlist(tapply(group_ID_dets$column, group_ID_dets$group_ID, function(x) sum(x))))

group_ID_dets$totals <- group_ID_totals$totals[match(group_ID_dets$group_ID, group_ID_totals$group_ID)]

我不熟悉您的 ODE 的详细信息，但您应该专注于优化这些任务。我认为更大的问题是您正在循环运行这些命令。通常，您会听到 R 中的循环很慢，但在答案 here 中可以找到对此问题的更细致的讨论。那里的一些提示可能会帮助您重组代码/循环。祝你好运！

【讨论】：