【问题标题】:How to perform regression analysis by groups and get the estimated coefficients for each group separately in R如何按组进行回归分析并在R中分别获得每个组的估计系数
【发布时间】:2022-11-27 20:57:15
【问题描述】:

我有这样的数据 (数据是作为示例给出的,因此两组具有相同的值)

    dat=structure(list(sku = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), period = c("30.09.2021", 
    "14.03.2019", "01.04.2022", "18.02.2022", "07.07.2021", "09.10.2020", 
    "17.01.2019", "10.11.2020", "14.07.2021", "10.09.2019", "31.01.2019", 
    "01.07.2021", "30.09.2021", "14.03.2019", "01.04.2022", "18.02.2022", 
    "07.07.2021", "09.10.2020", "17.01.2019", "10.11.2020", "14.07.2021", 
    "10.09.2019", "31.01.2019", "01.07.2021"), hist.prices = c(3728.16, 
    34899.84, 6126, 1789.44, 18098.4, 15633.6, 26174.88, 2401.56, 
    12668.88, 239500.8, 26174.88, 5429.52, 3728.16, 34899.84, 6126, 
    1789.44, 18098.4, 15633.6, 26174.88, 2401.56, 12668.88, 239500.8, 
    26174.88, 5429.52), hist.revenue = c(178951.68, 20102307.84, 
    367560, 42946.56, 4343616, 3752064, 11307548.16, 86456.16, 2128371.84, 
    965667225.6, 11307548.16, 390925.44, 178951.68, 20102307.84, 
    367560, 42946.56, 4343616, 3752064, 11307548.16, 86456.16, 2128371.84, 
    965667225.6, 11307548.16, 390925.44), hist.demand = c(254L, 276L, 
    272L, 250L, 299L, 297L, 291L, 260L, 270L, 275L, 295L, 279L, 254L, 
    276L, 272L, 250L, 299L, 297L, 291L, 260L, 270L, 275L, 295L, 279L
    ), hist.cost = c(12572.6698, 10498.9848, 14949.392, 13160.5, 
    14557.9512, 12443.3199, 10692.3294, 10893.116, 13145.976, 10222.6025, 
    10982.9975, 13584.1752, 12572.6698, 10498.9848, 14949.392, 13160.5, 
    14557.9512, 12443.3199, 10692.3294, 10893.116, 13145.976, 10222.6025, 
    10982.9975, 13584.1752), unity.cost = c(49.4987, 38.0398, 54.961, 
    52.642, 48.6888, 41.8967, 36.7434, 41.8966, 48.6888, 37.1731, 
    37.2305, 48.6888, 49.4987, 38.0398, 54.961, 52.642, 48.6888, 
    41.8967, 36.7434, 41.8966, 48.6888, 37.1731, 37.2305, 48.6888
    ), hist.profit = c(1336L, 1592L, 1128L, 1882L, 1387L, 1818L, 
    1357L, 1087L, 1253L, 1009L, 1092L, 1804L, 1336L, 1592L, 1128L, 
    1882L, 1387L, 1818L, 1357L, 1087L, 1253L, 1009L, 1092L, 1804L
    )), class = "data.frame", row.names = c(NA, -24L))

我需要进行回归分析并分别计算每个 sku(组变量)的系数。所有 sku 的需求函数都相同。然后我执行回归:

    # example of linear demand curve (first equation) 
    demand = function(p, alpha = -40, beta = 500, sd = 10) {
      error = rnorm(length(p), sd = sd)
      q = p*alpha + beta + error
      return(q)
    }

在此示例中,这仅适用于一个 sku,但对于所有可用的 sku 都是必需的。

    library(stargazer)
    model.fit = lm(hist.demand ~ hist.prices)
    stargazer(model.fit, type = 'html', header = FALSE) # output
    # estimated parameters
    beta = model.fit$coefficients[1]
    alpha = model.fit$coefficients[2]  
    p.revenue = -beta/(2*alpha) # estimated price for revenue
    p.profit = (alpha*unity.cost - beta)/(2*alpha) # estimated price for profit
    
    true.revenue = function(p) p*(-40*p + 500) # Revenue with true parameters (chunck demand)
    true.profit = function(p) (p - unity.cost)*(-40*p + 500) # price with true parameters
    # estimated curves
    estimated.revenue = function(p) p*(model.fit$coefficients[2]*p + model.fit$coefficients[1])
    estimated.profit = function(p) (p - unity.cost)*(model.fit$coefficients[2]*p + model.fit$coefficients[1])
    opt.revenue = true.revenue(p.revenue) # Revenue with estimated optimum price
    opt.profit = true.profit(p.profit) # Profit with estimated optimum price

如何分别为所有 sku 执行此代码,以便所需的输出是这样的

    sku opt.profit  opt.revenue
    1   722.0413    1562.041
    2   722.0413    1562.041

感谢您的宝贵帮助

【问题讨论】:

    标签: r dplyr


    【解决方案1】:

    我们可以通过以下方式按组从 lm 中获得估计值:

    library(tidyverse)
    library(broom)
    dat %>%
      group_split(sku) %>% 
      map_dfr(.f = function(df){
        lm(hist.demand ~ hist.prices, data = df) %>% 
          tidy() %>% 
          add_column(group = unique(df$sku), .before=1)
      })
    
      group term           estimate std.error statistic  p.value
      <int> <chr>             <dbl>     <dbl>     <dbl>    <dbl>
    1     1 (Intercept) 276.        5.64         48.9   3.09e-13
    2     1 hist.prices   0.0000198 0.0000793     0.249 8.08e- 1
    3     2 (Intercept) 276.        5.64         48.9   3.09e-13
    4     2 hist.prices   0.0000198 0.0000793     0.249 8.08e- 1
    

    【讨论】:

      猜你喜欢
      • 2021-10-04
      • 1970-01-01
      • 2018-07-26
      • 1970-01-01
      • 2021-08-14
      • 2011-10-21
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多