由于我没有对 Jeff 的回答发表评论的声誉,因此我将其发布为答案:
我发现使用 makePSOCKcluster 自动启动集群节点在 Windows 中不起作用的原因是内部 parallel 函数 newPSOCKnode 中的 arg 和 outfile 参数包含在 shQuotes 中功能。这会导致 cmd.exe 和 Rscript.exe 的组合返回错误,从而导致 makePSOCKcluster 永远挂起。
以下两个函数定义允许使用makePSOCKcluter自动启动集群节点,假设正确配置ssh或putty/plink进行基于密钥的无密码登录:
makePSOCKcluster <- function (names, ...)
{
if (is.numeric(names)) {
names <- as.integer(names[1L])
if (is.na(names) || names < 1L)
stop("numeric 'names' must be >= 1")
names <- rep("localhost", names)
}
parallel:::.check_ncores(length(names))
options <- parallel:::addClusterOptions(parallel:::defaultClusterOptions, list(...))
cl <- vector("list", length(names))
for (i in seq_along(cl)) cl[[i]] <- newPSOCKnode(names[[i]],
options = options, rank = i)
class(cl) <- c("SOCKcluster", "cluster")
cl
}
newPSOCKnode <- function (machine = "localhost", ..., options = parallel:::defaultClusterOptions,
rank)
{
options <- parallel:::addClusterOptions(options, list(...))
if (is.list(machine)) {
options <- parallel:::addClusterOptions(options, machine)
machine <- machine$host
}
outfile <- parallel:::getClusterOption("outfile", options)
master <- if (machine == "localhost")
"localhost"
else parallel:::getClusterOption("master", options)
port <- parallel:::getClusterOption("port", options)
setup_timeout <- parallel:::getClusterOption("setup_timeout", options)
manual <- parallel:::getClusterOption("manual", options)
timeout <- parallel:::getClusterOption("timeout", options)
methods <- parallel:::getClusterOption("methods", options)
useXDR <- parallel:::getClusterOption("useXDR", options)
env <- paste0("MASTER=", master, " PORT=", port, " OUT=",
#shQuote(outfile), " SETUPTIMEOUT=", setup_timeout, " TIMEOUT=",
(outfile), " SETUPTIMEOUT=", setup_timeout, " TIMEOUT=",
timeout, " XDR=", useXDR)
arg <- "parallel:::.slaveRSOCK()"
rscript <- if (parallel:::getClusterOption("homogeneous", options)) {
shQuote(parallel:::getClusterOption("rscript", options))
}
else "Rscript"
rscript_args <- parallel:::getClusterOption("rscript_args", options)
if (methods)
rscript_args <- c("--default-packages=datasets,utils,grDevices,graphics,stats,methods",
rscript_args)
cmd <- if (length(rscript_args))
paste(rscript, paste(rscript_args, collapse = " "), "-e",
#shQuote(arg), env)
arg, env)
#else paste(rscript, "-e", shQuote(arg), env)
else paste(rscript, "-e", arg, env)
renice <- parallel:::getClusterOption("renice", options)
if (!is.na(renice) && renice)
cmd <- sprintf("nice +%d %s", as.integer(renice), cmd)
if (manual) {
cat("Manually start worker on", machine, "with\n ",
cmd, "\n")
utils::flush.console()
}
else {
if (machine != "localhost") {
rshcmd <- parallel:::getClusterOption("rshcmd", options)
user <- parallel:::getClusterOption("user", options)
cmd <- shQuote(cmd)
cmd <- paste(rshcmd, "-l", user, machine, cmd)
}
if (.Platform$OS.type == "windows") {
system(cmd, wait = FALSE, input = "")
}
else system(cmd, wait = FALSE)
}
con <- socketConnection("localhost", port = port, server = TRUE,
blocking = TRUE, open = "a+b", timeout = timeout)
structure(list(con = con, host = machine, rank = rank), class = if (useXDR)
"SOCKnode"
else "SOCK0node")
}
我计划在有机会时使用更完整的设置说明更新此回复。