【发布时间】:2020-10-26 12:58:31
【问题描述】:
我使用foreach 和doParallel 运行模拟,并与随机数(在代码中命名为random)作斗争。
简而言之:我模拟一个足球联赛,随机生成所有比赛的获胜者和相应的结果。在dt_base 中没有比赛,在dt_ex1 和dt_ex2 4 场比赛的结果是已知的。应该模拟所有未知的结果。
在这篇文章底部的联赛模拟代码中,我设置了 1000 个模拟,分成 100 个块(forloop 用于将数据发送到 PostgreSQL,并在我使用的完整代码中减少 RAM 使用)。 我希望所有的随机数都不同(甚至不坚持可重复的结果)。
1.当运行给定的代码时,应该实现所有不同的随机数的目标。
> # ====== Distinct Random Numbers ======
> length(unique(out$random)) # expectation: 22000
[1] 22000
> length(unique(out$random[out$part == "base"])) # expectation: 10000
[1] 10000
> length(unique(out$random[out$part == "dt_ex1"])) # expectation: 6000
[1] 6000
> length(unique(out$random[out$part == "dt_ex2"])) # expectation: 6000
[1] 6000
2。现在请取消注释分配最终分数的代码 *[tmp_sim] = 3(应该是第 60,61,67,68 行,上面有 !!!)并再次运行。
> # ====== Distinct Random Numbers ======
> length(unique(out$random)) # expectation: 22000
[1] 10360
> length(unique(out$random[out$part == "base"])) # expectation: 10000
[1] 10000
> length(unique(out$random[out$part == "dt_ex1"])) # expectation: 6000
[1] 180
> length(unique(out$random[out$part == "dt_ex2"])) # expectation: 6000
[1] 180
那就是它变得一团糟,对我来说没有意义。在将几个数字添加到这些数据帧中时,iter 中的 iter 中的 dt_ex1 和 dt_ex2 始终相同。
您是否遇到相同的效果?请问大家知道怎么回事吗?
我尝试了 R 版本 3.5.3 和 3.6.3。还尝试了doRNG 包。总是同样的问题。
联赛模拟代码
# League Simulation
rm(list = ls())
set.seed(666)
cat("\014")
library(sqldf)
library(plyr)
library(dplyr)
# ====== User Functions ======
comb4 = function(x, ...) { #function for combining foreach output
Map(rbind, x, ...)
}
# ====== Data Preparation ======
dt_base = data.frame(id = 1:10,
part = rep("base",10),
random = NA)
dt_ex1 = data.frame(id = 1:10,
part = rep("dt_ex1",10),
HG = c(1,3,6,NA,NA,2,NA,NA,NA,NA), # Home Goals
AG = c(1,3,6,NA,NA,2,NA,NA,NA,NA), # Away Goals
random = NA)
dt_ex2 = data.frame(id = 1:10,
part = rep("dt_ex2",10),
HG = c(1,3,6,NA,NA,2,NA,NA,NA,NA), # Home Goals
AG = c(1,3,6,NA,NA,2,NA,NA,NA,NA), # Away Goals
random = NA)
# ====== Set Parallel Computing ======
library(foreach)
library(doParallel)
cl = makeCluster(3, outfile = "")
registerDoParallel(cl)
# ====== SIMULATION ======
nsim = 1000 # number of simulations
iterChunk = 100 # split nsim into this many chunks
out = data.frame() # prepare output DF
for(iter in 1:ceiling(nsim/iterChunk)){
strt = Sys.time()
out_iter =
foreach(i = 1:iterChunk, .combine = comb4, .multicombine = TRUE, .maxcombine = 100000, .inorder = FALSE, .verbose = FALSE,
.packages = c("plyr", "dplyr", "sqldf")) %dopar% {
## PART 1
# simulation number
id_sim = iterChunk * (iter - 1) + i
# First random numbers set
dt_base[,"random"] = runif(nrow(dt_base))
## PART 2
tmp_sim = is.na(dt_ex1$HG) # no results yet
dt_ex1$random[tmp_sim] = runif(sum(tmp_sim))
# dt_ex1$HG[tmp_sim] = 3 # !!!
# dt_ex1$AG[tmp_sim] = 3 # !!!
## PART 3
tmp_sim = is.na(dt_ex2$HG) # no results yet
dt_ex2$random[tmp_sim] = runif(sum(tmp_sim))
# dt_ex2$HG[tmp_sim] = 3 # !!!
# dt_ex2$AG[tmp_sim] = 3 # !!!
# ---- Save Results
zapasy = rbind.data.frame(dt_base[,c("id","part","random")],
dt_ex1[,c("id","part","random")]
,dt_ex2[,c("id","part","random")]
)
zapasy$id_sim = id_sim
zapasy$iter = iter
zapasy$i = i
out_i = list(zapasy = zapasy)
print(Sys.time())
return(out_i)
}#i;sim_forcycle
out = rbind.data.frame(out,subset(out_iter$zapasy, !is.na(random)))
fnsh = Sys.time()
cat(" [",iter,"] ",fnsh - strt, sep = "")
}#iter
# ====== Distinct Random Numbers ======
length(unique(out$random)) # expectation: 22000
length(unique(out$random[out$part == "base"])) # expectation: 10000
length(unique(out$random[out$part == "dt_ex1"])) # expectation: 6000
length(unique(out$random[out$part == "dt_ex2"])) # expectation: 6000
# ====== Stop Parallel Computing ======
stopCluster(cl)
【问题讨论】:
标签: r random foreach doparallel