【问题标题】:R Iterate dataset generationR迭代数据集生成
【发布时间】:2021-01-08 17:46:24
【问题描述】:

我想基于存储在[1:21] num/chr 字符串中的 21 个参数生成 21 个具有两个变量的随机正态分布 data.frames(使用 mvrnorm)。另外,每个 data.frame 应该包含两列,其中的字符串对于 21 个集合中的每一个集合都是相同的(一旦我 rbind 将它们放在一起,这将是有意义的)

假设 AGQ.mu2.mat 存储第一个变量 (stddev = 1) 的平均值,Std.mu2.mat 存储第二个变量 (stddev = 20), ORIGID 存储两个字符串(我用一些 LotR 名称替换了数据),并让 corr = .30 和 n 总是 300:

r<-0.30
ORIG<-c("Ang","Ang","Ang","DGu","DGu","DGu", "Har", 
        "Har", "Har", "Isg", "Isg", "Isg", "Mor", "Mor", 
        "Mor", "MMo", "MMo", "MMo", "Rhu", "Rhu", "Rhu")
ID<-c("a","b","c","a","b","c","a","b","c",
         "a","b","c","a","b","c","a","b","c","a","b","c")
AGQ.mu2.mat <- c(-1.06, -1.27, -1.97, -3.27, -3.32, -2.32, -1.05, -0.36, -1.85, -2.30, -2.49, -2.90, -0.80, -0.62, -0.56,  1.22,  1.62,  0.44, -1.43, -1.08, -2.34)
Std.mu2.mat <- c(107, 100, 102, 124, 118, 120, 147, 152, 141, 154, 160, 155, 125, 123, 116, 103, 106, 107, 123, 126, 117)

我想迭代

dt<-data.frame(mvrnorm(n=300, mu=c(AGQ.mu2.mat[1],Std.mu2.mat[1]),
                       Sigma=matrix(c(1, r*20, r*20, (20^2)),
                                  nrow=2), empirical=TRUE))
dt<-rename(dt, "AGQ" = "X1","Std" = "X2")
dt$Herkunft<-ORIG[1]
dt$Bat_ID<-ID[1]

在参数向量的所有 21 个条目上,产生 21 个唯一命名的数据帧。

我试过了

for (i in 1:21)
  {
dt<-data.frame(mvrnorm(n=300, mu=c(AGQ.mu2.mat[i],Std.mu2.mat[i]),
                       Sigma=matrix(c(1, r*20, r*20, (20^2)),
                                  nrow=2), empirical=TRUE))
dt<-rename(dt, "AGQ" = "X1","Std" = "X2")
dt$Herkunft<-ORIG[i]
dt$Bat_ID<-ID[i]
}

dt<-data.frame(mvrnorm(n=300, mu=c(rep(AGQ.mu2.mat,each=1),rep(Std.mu2.mat, each=1)),
                       Sigma=matrix(c(1, r*20, r*20, (20^2)),
                                    nrow=2), empirical=TRUE))
dt<-rename(dt, "Aggr" = "X1","Std" = "X2")
dt$Herkunft<-rep(ORIG,each=1)
dt$Bat_ID<-rep(ID,each=1)

无济于事;即使这样,这也会一遍又一遍地生成相同的数据集,而不会创建唯一命名的新数据集。

我非常感谢任何有关如何迭代它的帮助或方法。提前致谢!

【问题讨论】:

  • 编辑:添加了 AGQ.mu2.mat 和 Std.mu2.mat。

标签: r loops iteration normal-distribution


【解决方案1】:

您需要使用某种方式来收集结果。我选择使用附加的list。其他解决方案可能是使用rbind。你必须用 nrow/ncol 参数来塑造你的矩阵。

library(MASS)

r<-0.30
ORIG<-c("Ang","Ang","Ang","DGu","DGu","DGu", "Har", 
        "Har", "Har", "Isg", "Isg", "Isg", "Mor", "Mor", 
        "Mor", "MMo", "MMo", "MMo", "Rhu", "Rhu", "Rhu")
ID<-c("a","b","c","a","b","c","a","b","c",
         "a","b","c","a","b","c","a","b","c","a","b","c")

AGQ.mu2.mat <- 1:21
Std.mu2.mat <- 1:21

results <- list()

for (i in 1:21) {
dt<-data.frame(mvrnorm(n=300, mu=c(AGQ.mu2.mat[i],Std.mu2.mat[i]),
                       Sigma=matrix(c(1, r*20, r*20, (20^2)), nrow = 2), empirical=TRUE))
colnames(dt) <- c("AGQ","Std")
dt$Herkunft<-ORIG[i]
dt$Bat_ID<-ID[i]

results[[i]] <- dt
}

加入所有结果

do.call(rbind, results)

验证:

> cor(results[[1]][,1:2]);apply(results[[1]][,1:2], 2, mean);apply(results[[1]][,1:2], 2, sd)
    AGQ Std
AGQ 1.0 0.3
Std 0.3 1.0
AGQ Std 
  1   1 
AGQ Std 
  1  20 

【讨论】:

    【解决方案2】:

    这个使用dplyrpurrr 的解决方案可能会有所帮助...

    library(MASS)
    library(dplyr)
    library(purrr)
    
    r <-0.30
    
    ORIG<-c("Ang","Ang","Ang","DGu","DGu","DGu", "Har", 
            "Har", "Har", "Isg", "Isg", "Isg", "Mor", "Mor", 
            "Mor", "MMo", "MMo", "MMo", "Rhu", "Rhu", "Rhu")
    
    ID<-c("a","b","c","a","b","c","a","b","c",
          "a","b","c","a","b","c","a","b","c","a","b","c")
    
    AGQ.mu2.mat <- c(-1.06, -1.27, -1.97, -3.27, -3.32, -2.32, -1.05, -0.36, -1.85, -2.30, -2.49, -2.90, -0.80, -0.62, -0.56,  1.22,  1.62,  0.44, -1.43, -1.08, -2.34)
    Std.mu2.mat <- c(107, 100, 102, 124, 118, 120, 147, 152, 141, 154, 160, 155, 125, 123, 116, 103, 106, 107, 123, 126, 117)
    
    listofdf <- purrr::map2(AGQ.mu2.mat, 
                Std.mu2.mat,
                ~ data.frame(MASS::mvrnorm(n = 300, 
                                           mu=c(.x, .y),
                                           Sigma = matrix(c(1, r*20, r*20, (20^2)), nrow=2), 
                                           empirical=TRUE))
    ) %>% 
       purrr::map2(ORIG, ~ mutate(.x, Herkunft = .y)) %>%
       purrr::map2(ID, ~ mutate(.x, Bat_ID = .y))
    
    head(listofdf[[4]])
    #>          X1         X2 Herkunft Bat_ID
    #> 1 0.4329040  17.616240      DGu      a
    #> 2 2.6589457  -9.385099      DGu      a
    #> 3 2.3626722   9.815020      DGu      a
    #> 4 1.8998816 -24.551505      DGu      a
    #> 5 0.2506713   0.792565      DGu      a
    #> 6 3.4961960   8.112878      DGu      a
    
    
    ### since you mentioned binding them all together
    onebigdf <- bind_rows(listofdf)
    

    【讨论】:

    • 我多么愚蠢。我编辑了问题以添加它们。感谢您的解决方案!
    • 我进行了编辑以使其成为有效代码,我的解决方案仍然有效
    猜你喜欢
    • 1970-01-01
    • 2021-11-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2020-10-09
    • 2018-05-16
    • 1970-01-01
    相关资源
    最近更新 更多