【问题标题】:Calculate the values of a probability density function for multiple columns of means and a single column of sigma计算多列均值和单列 sigma 的概率密度函数值
【发布时间】:2019-07-04 05:56:18
【问题描述】:

鉴于下面的示例数据sampleDT,其中包含mean1mean10 的平均值和标准差sd2,我将不胜感激创建一个函数:

对于每列均值mean1mean10estimate,提取并添加 数据框的密度函数的值的条件 在观察到的变量水平上评估的正态分布 dollar.wage_1 使用标准差 sd2

使用下面的代码,我可以成功计算单列均值,但我看不到如何指定一个函数来同时计算每一列均值。

#样本数据

sampleDT<-structure(list(id = 1:10, N = c(10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L), A = c(62L, 96L, 17L, 41L, 212L, 143L, 143L, 
143L, 73L, 73L), B = c(3L, 1L, 0L, 2L, 170L, 21L, 0L, 33L, 62L, 
17L), C = c(0.05, 0.01, 0, 0.05, 0.8, 0.15, 0, 0.23, 0.85, 0.23
), employer = c(1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L), F = c(0L, 
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L), G = c(1.94, 1.19, 1.16, 
1.16, 1.13, 1.13, 1.13, 1.13, 1.12, 1.12), H = c(0.14, 0.24, 
0.28, 0.28, 0.21, 0.12, 0.17, 0.07, 0.14, 0.12), dollar.wage_1 = c(1.94, 
1.19, 3.16, 3.16, 1.13, 1.13, 2.13, 1.13, 1.12, 1.12), mean1 = c(1.936652081, 
3.688171386, 3.160993574, 3.768485048, 1.311370546, 0.313760016, 
-1.621000294, 1.13182676, 1.114458025, 1.119315775), mean2 = c(1.946806222, 
3.688885811, 3.15903495, 3.767778705, 1.309663497, 0.316394741, 
-1.618552806, 1.134088181, 1.117600968, 1.120688482), mean3 = c(1.893627954, 
3.689341572, 3.157622975, 3.771231512, 1.324985578, 0.318026311, 
-1.620565712, 1.13301769, 1.120760085, 1.119426932), mean4 = c(1.887509366, 
3.660243949, 3.160911994, 3.738992465, 1.331637143, 0.284716279, 
-1.655368774, 1.137338962, 1.122096234, 1.120837428), mean5 = c(7.071170501, 
3.458558276, 3.156676637, 3.160692822, 1.131841192, 1.126997224, 
1.028924299, 1.219378155, 0.118097115, 1.118108075), mean6 = c(7.010141264, 
3.434098438, 3.160978044, 3.161388054, 1.131706507, 1.131073576, 
1.044957033, 1.202376831, 0.088502176, 1.120101488), mean7 = c(6.918631396, 
3.455412441, 3.064840549, 3.158657611, 1.134281965, 1.131677907, 
1.035688483, 1.181551066, 0.542276222, 1.121549931), mean8 = c(6.980214117, 
3.513440689, 3.175191087, 3.158919334, 1.130088008, 1.131692248, 
1.12222788, 1.235102249, 0.281700405, 1.118473791), mean9 = c(6.708505027, 
3.504542699, 3.173629275, 3.158457814, 1.134560107, 1.129357587, 
1.151489857, 1.219991269, 0.364343124, 1.120228667), mean10 = c(6.883206883, 
3.467216323, 3.174805298, 3.160917024, 1.128835398, 1.128265912, 
1.084046983, 1.214981489, 0.160046133, 1.118496504), sd1 = c(2.6334129999306, 
2.6334129999306, 2.6334129999306, 2.6334129999306, 2.6334129999306, 
2.6334129999306, 2.6334129999306, 2.6334129999306, 2.6334129999306, 
2.6334129999306), sd2 = c(514.02608349227, 101.976862386691, 
8.70627514696715, 4.79710442214283, 2.45930925299156e+49, 2.01406038865916e+30, 
1.8980055884822e+34, 1.65244344266379e+28, 26.9398910547703, 
1.74978644797635)), row.names = c(NA, -10L), spec = structure(list(
    cols = list(id = structure(list(), class = c("collector_integer", 
    "collector")), N = structure(list(), class = c("collector_integer", 
    "collector")), A = structure(list(), class = c("collector_integer", 
    "collector")), B = structure(list(), class = c("collector_integer", 
    "collector")), C = structure(list(), class = c("collector_double", 
    "collector")), employer = structure(list(), class = c("collector_integer", 
    "collector")), F = structure(list(), class = c("collector_integer", 
    "collector")), G = structure(list(), class = c("collector_double", 
    "collector")), H = structure(list(), class = c("collector_double", 
    "collector")), dollar.wage_1 = structure(list(), class = c("collector_double", 
    "collector")), mean1 = structure(list(), class = c("collector_double", 
    "collector")), mean2 = structure(list(), class = c("collector_double", 
    "collector")), mean3 = structure(list(), class = c("collector_double", 
    "collector")), mean4 = structure(list(), class = c("collector_double", 
    "collector")), mean5 = structure(list(), class = c("collector_double", 
    "collector")), mean6 = structure(list(), class = c("collector_double", 
    "collector")), mean7 = structure(list(), class = c("collector_double", 
    "collector")), mean8 = structure(list(), class = c("collector_double", 
    "collector")), mean9 = structure(list(), class = c("collector_double", 
    "collector")), mean10 = structure(list(), class = c("collector_double", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector"))), class = "col_spec"), class = c("tbl_df", 
"tbl", "data.frame"))

#我的方法

sampleDT$dens_test <- dnorm(sampleDT$dollar.wage_1, 
                      mean = sampleDT$mean1,sd = sampleDT$sd2)

提前感谢您的帮助。

【问题讨论】:

    标签: r function data.table tidyverse probability-density


    【解决方案1】:

    我们可能会进一步采用您的方法:

    means <- as.matrix(sampleDT[, grep("mean", names(sampleDT))])
    dnorm(sampleDT$dollar.wage_1, mean = means, sd = sampleDT$sd2)
    

    通过这种方式,我们传递了一个均值矩阵,而dollar.wage_1sd2 也由于回收而被正确使用。

    然后你可以简单地将这个结果添加到sampleDTcbind

    【讨论】:

      【解决方案2】:

      这是我使用 data.table 的方法:

      sampleDT <- as.data.table(sampleDT) 
      
      for(i in c(1:10)){   
       sampleDT[, eval(paste0("dnorm",i)):=mapply(dnorm,dollar.wage_1,get(paste0("mean",i)),sd2)] }
      

      【讨论】:

        【解决方案3】:

        或者放入一个 sapply()。

        sapply(grep("^mean", names(sampleDT)), function(x) 
          dnorm(sampleDT$dollar.wage_1, sampleDT[[x]], sampleDT$sd2))
        

        【讨论】:

          【解决方案4】:

          这是dplyr::mutate_at()的典型案例:

          df %>% mutate_at(vars(matches("mean")),
                           funs(dens = dnorm(dollar.wage_1, mean = ., sd = sd2)))
          

          输出将是一个完整的数据集,你不需要绑定任何东西。

          【讨论】:

            猜你喜欢
            • 2020-04-22
            • 1970-01-01
            • 2017-09-20
            • 1970-01-01
            • 1970-01-01
            • 1970-01-01
            • 2017-09-07
            • 2020-04-17
            • 2021-05-25
            相关资源
            最近更新 更多