【问题标题】:Assign multiple results from function when grouping分组时从函数中分配多个结果
【发布时间】:2019-05-08 07:41:30
【问题描述】:

我的代码完全符合我的要求,但我必须为每个组调用我的函数三次,这似乎非常低效。

library(data.table)

myRegr = function(x, y) {
    regr = lm.fit(cbind(1, x), y)
    coefs = regr$coef
    k = coefs[[2]]
    m = coefs[[1]]
    r2 = 1 - var(regr$residuals) / var(y)

    return (c(k = k, m = m, r2 = r2))
}

dt = data.table(a = c(0, 0, 0, 1, 1, 1), 
                x = c(12, 21, 15, 34, 32, 31), 
                y = c(3, 1, 6, 4, 2, 8))

result = dt[,list(minX = min(x),
                    minY = min(y),
                    k = myRegr(x, y)["k"],
                    m = myRegr(x, y)["m"],
                    r2 = myRegr(x, y)["r2"]
                ),
                by = list(a)]


print(result)

输出:

a minX minY          k         m        r2
0   12    1 -0.3095238  8.285714 0.3176692
1   31    2 -1.0000000 37.000000 0.2500000

知道如何将其重写为只调用一次函数吗?


更新: 我的示例没有涵盖完整的问题,因为我选择了第四列,这是一个更好的示例:

library(data.table)

myRegr = function(x, y) {
    regr = lm.fit(cbind(1, x), y)
    coefs = regr$coef
    k = coefs[[2]]
    m = coefs[[1]]
    r2 = 1 - var(regr$residuals) / var(y)

    return (c(k = k, m = m, r2 = r2))
}

df = data.frame(a = c(0, 0, 0, 1, 1, 1), 
                x = c(12, 21, 15, 34, 32, 31), 
                y = c(3, 1, 6, 4, 2, 8),
                time = as.POSIXct(c("2019-01-01 08:12:00", "2019-01-01 08:13:00", "2019-01-01 08:14:00", "2019-01-01 08:12:00", "2019-01-01 08:13:00", "2019-01-01 08:14:00")))

dt = data.table(df)

result = dt[, list(firstX = x[time == min(time)],
                firstY = y[time == min(time)],
                k = myRegr(x, y)["k"],
                m = myRegr(x, y)["m"],
                r2 = myRegr(x, y)["r2"]
            ),
            by = a]


print(result)

输出:

a firstX firstY          k         m        r2
0     12      3 -0.3095238  8.285714 0.3176692
1     34      4 -1.0000000 37.000000 0.2500000

尝试将它全部包装在一个函数中,但它实际上减慢了速度:

library(data.table)

myRegrList = function(group) {
    firstX = group[,x[time == min(time)]]
    firstY = group[,y[time == min(time)]]

    regr = lm.fit(cbind(1, group$x), group$y)
    coefs = regr$coef
    k = coefs[[2]]
    m = coefs[[1]]
    r2 = 1 - var(regr$residuals) / var(group$y)

    return (list(firstX = firstX, firstY = firstY, k = k, m = m, r2 = r2))
}

result = dt[, myRegrList(.SD), by = a]

print(result)

【问题讨论】:

    标签: r data.table


    【解决方案1】:

    如果你让你的函数返回一个列表,你只需要调用

    dt[, myRegr(x, y), by = a]
    #   a minX minY          k         m        r2
    #1: 0   12    1 -0.3095238  8.285714 0.3176692
    #2: 1   31    2 -1.0000000 37.000000 0.2500000
    

    myRegr = function(x, y) {
      regr = lm.fit(cbind(1, x), y)
      coefs = regr$coef
      k = coefs[[2]]
      m = coefs[[1]]
      r2 = 1 - var(regr$residuals) / var(y)
    
      return (list(# minX = min(x),
                   # minY = min(y),
                   k = k,
                   m = m,
                   r2 = r2))
    }
    

    更新

    您可以为 xy 值设置子集,然后加入您的函数结果

    result <- dt[dt[, .I[which.min(time)], by = a]$V1, .(a, x, y)]
    result <- result[dt[, myRegr(x, y), by = a], on = .(a)]
    result
    #   a  x y          k         m        r2
    #1: 0 12 3 -0.3095238  8.285714 0.3176692
    #2: 1 34 4 -1.0000000 37.000000 0.2500000
    

    【讨论】:

      【解决方案2】:

      您可以修改您的函数以返回一个向量和dcast 最终结果:

      library(data.table)
      myRegr = function(x, y) {
        regr <- lm.fit(cbind(1, x), y)
        c(
          regr$coef[[1]],
          regr$coef[[2]],
          1 - var(regr$residuals) / var(y)
        )
      }
      result <- df[, .(minX = min(x), minY = min(y), myRegr(x, y), c("m", "k", "r2")), a]
      dcast(result, a + minX + minY ~ V4, value.var = "V3")
      

      这个解决方案并不完美,因为我必须创建V4(添加c("m", "k", "r2") 向量)。应该有更好的方法来做到这一点(甚至可能不使用dcast)。也许更有经验的data.table 用户可以就此提出建议?


      数据:

      df <- data.table(
        a = c(0, 0, 0, 1, 1, 1), 
        x = c(12, 21, 15, 34, 32, 31), 
        y = c(3, 1, 6, 4, 2, 8)
      )
      

      【讨论】:

        猜你喜欢
        • 2018-02-02
        • 2013-05-02
        • 1970-01-01
        • 2015-02-17
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        相关资源
        最近更新 更多