【问题标题】:Parallel proccessing in R doParallel foreach save dataR doParallel foreach 中的并行处理保存数据
【发布时间】:2016-06-19 18:29:47
【问题描述】:

在使并行处理部分正常工作方面取得了进展,但使用提取距离保存向量无法正常工作。我得到的错误是

df_Test_Fetch <- data.frame(x_lake_length)
Error in data.frame(x_lake_length) : object 'x_lake_length' not found
write.table(df_Test_Fetch,file="C:/tempTest_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
Error in is.data.frame(x) : object 'df_Test_Fetch' not found

我已尝试更改以下代码,以便将 foreach 步骤输出到 x_lake_length。但这并没有像我希望的那样输出向量。如何将实际结果保存到 csv 文件中。我正在使用 R x64 3.3.0 运行 Windows 8 计算机。

提前谢谢你 仁

这是完整的代码。

 # make sure there is no prexisting data
rm(x_lake_length)

# Libraries ---------------------------------------------------------------
if (!require("pacman")) install.packages("pacman")
pacman::p_load(lakemorpho,rgdal,maptools,sp,doParallel,foreach,
               doParallel)

# HPC ---------------------------------------------------------------------
cores_2_use <- detectCores() - 2
cl          <- makeCluster(cores_2_use, useXDR = F)
clusterSetRNGStream(cl, 9956)
registerDoParallel(cl, cores_2_use)

# Data --------------------------------------------------------------------

ogrDrivers()

dsn <- system.file("vectors", package = "rgdal")[1]
# the line below is commented out but when I run the script on my data the line below is what I use instead of the one above
# then making the name changes as needed
# dsn<-setwd("J:\\Elodea\\ByHUC6\\")
ogrListLayers(dsn)
ogrInfo(dsn=dsn, layer="trin_inca_pl03")
owd <- getwd()
setwd(dsn)
ogrInfo(dsn="trin_inca_pl03.shp", layer="trin_inca_pl03")
setwd(owd)
x <- readOGR(dsn=dsn, layer="trin_inca_pl03")
summary(x)

# Analysis ----------------------------------------------------------------  
myfun <- function(x,i){tmp<-lakeMorphoClass(x[i,],NULL,NULL,NULL)
x_lake_length<-vector("numeric",length = nrow(x))
x_lake_length[i]<-lakeMaxLength(tmp,200)
print(i)
Sys.sleep(0.1)}

foreach(i = 1:nrow(x),.combine=cbind,.packages=c("lakemorpho","rgdal"))  %dopar% (
  myfun(x,i)
)
options(digits=10)
df_Test_Fetch <- data.frame(x_lake_length)
write.table(df_Test_Fetch,file="C:/temp/Test_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
print(proc.time())

【问题讨论】:

    标签: r parallel-foreach


    【解决方案1】:

    我认为这就是您想要的,但如果不了解主题,我无法 100% 确定。

    我所做的是在您的并行函数中添加一个return(),并在您调用foreach 时将该返回对象的值分配给x_lake_length。但我只是猜测这就是你想要做的,所以如果我错了,请纠正我。

    # make sure there is no prexisting data
    rm(x_lake_length)
    
    # Libraries ---------------------------------------------------------------
    if (!require("pacman")) install.packages("pacman")
    pacman::p_load(lakemorpho,rgdal,maptools,sp,doParallel,foreach,
                   doParallel)
    
    # HPC ---------------------------------------------------------------------
    cores_2_use <- detectCores() - 2
    cl          <- makeCluster(cores_2_use, useXDR = F)
    clusterSetRNGStream(cl, 9956)
    registerDoParallel(cl, cores_2_use)
    
    # Data --------------------------------------------------------------------
    
    ogrDrivers()
    
    dsn <- system.file("vectors", package = "rgdal")[1]
    # the line below is commented out but when I run the script on my data the line below is what I use instead of the one above
    # then making the name changes as needed
    # dsn<-setwd("J:\\Elodea\\ByHUC6\\")
    ogrListLayers(dsn)
    ogrInfo(dsn=dsn, layer="trin_inca_pl03")
    owd <- getwd()
    setwd(dsn)
    ogrInfo(dsn="trin_inca_pl03.shp", layer="trin_inca_pl03")
    setwd(owd)
    x <- readOGR(dsn=dsn, layer="trin_inca_pl03")
    summary(x)
    
    # Analysis ----------------------------------------------------------------  
    myfun <- function(x,i){tmp<-lakeMorphoClass(x[i,],NULL,NULL,NULL)
                          x_lake_length<-vector("numeric",length = nrow(x))
                          x_lake_length[i]<-lakeMaxLength(tmp,200)
                          print(i)
                          Sys.sleep(0.1)
                          return(x_lake_length)
    }
    
    x_lake_length <- foreach(i = 1:nrow(x),.combine=cbind,.packages=c("lakemorpho","rgdal"))  %dopar% (
      myfun(x,i)
    )
    
    options(digits=10)
    df_Test_Fetch <- data.frame(x_lake_length)
    write.table(df_Test_Fetch,file="C:/temp/Test_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
    print(proc.time())
    

    【讨论】:

      猜你喜欢
      • 2016-10-08
      • 1970-01-01
      • 1970-01-01
      • 2016-04-02
      • 2015-09-28
      • 1970-01-01
      • 2016-05-09
      • 1970-01-01
      • 2018-10-17
      相关资源
      最近更新 更多