【问题标题】:Calculated (row-wise) columns using column ranges or select pattern使用列范围或选择模式计算的(逐行)列
【发布时间】:2023-04-03 17:40:01
【问题描述】:
gl <- tibble::tribble(
                   ~X4010, ~X4020, ~X4030,  ~X5010, ~X5020, ~X5030, ~X5040,
                     1000,    100,     10,     500,    100,     50,      5,
                     1100,    110,     11,     550,    110,     55,    5.5,
                     1210,    121,   12.1,     605,    121,   60.5,   6.05,
                     1331,  133.1,  13.31,   665.5,  133.1,  66.55,   6.65,
                   1464.1, 146.41,  14.64,  732.05, 146.41,   73.2,   7.32,
                  1610.51, 161.05,   16.1,  805.25, 161.05,  80.52,   8.05,
                  1771.56, 177.15,  17.71,  885.78, 177.15,  88.57,   8.85,
                  1948.71, 194.87,  19.48,  974.35, 194.87,  97.43,   9.74,
                  2143.58, 214.35,  21.43, 1071.79, 214.35, 107.17,  10.71,
                  2357.94, 235.79,  23.57, 1178.97, 235.79, 117.89,  11.78
                  )

需要创建两个计算列,按行添加列,如下所示:

gl %>% 
  mutate(C1 = X4010+X4020+X4030, C2 =  X5010+X5020+X5030+X5040 )

但是,由于 X 变量的数量非常多,我不想单独添加列,而是指定范围,例如 X4010:X4030select(starts_with("X40"))

如何修改C1C2 的公式以使用范围或模式?谢谢。

想要的输出如下:

result <- tibble::tribble(
                   ~X4010, ~X4020, ~X4030,  ~X5010, ~X5020, ~X5030, ~X5040,     ~C1,     ~C2,
                     1000,    100,     10,     500,    100,     50,      5,    1110,     655,
                     1100,    110,     11,     550,    110,     55,    5.5,    1221,   720.5,
                     1210,    121,   12.1,     605,    121,   60.5,   6.05,  1343.1,  792.55,
                     1331,  133.1,  13.31,   665.5,  133.1,  66.55,   6.65, 1477.41,   871.8,
                   1464.1, 146.41,  14.64,  732.05, 146.41,   73.2,   7.32, 1625.15,  958.98,
                  1610.51, 161.05,   16.1,  805.25, 161.05,  80.52,   8.05, 1787.66, 1054.87,
                  1771.56, 177.15,  17.71,  885.78, 177.15,  88.57,   8.85, 1966.42, 1160.35,
                  1948.71, 194.87,  19.48,  974.35, 194.87,  97.43,   9.74, 2163.06, 1276.39,
                  2143.58, 214.35,  21.43, 1071.79, 214.35, 107.17,  10.71, 2379.36, 1404.02,
                  2357.94, 235.79,  23.57, 1178.97, 235.79, 117.89,  11.78,  2617.3, 1544.43
                  )

【问题讨论】:

    标签: r dplyr


    【解决方案1】:

    我希望这是您正在寻找的:

    library(dplyr)
    
    gl %>%
      rowwise() %>%
      mutate(C1 = sum(c_across(starts_with("X40"))),
             C2 = sum(c_across(starts_with("X50"))))
    
    # A tibble: 10 x 9
    # Rowwise: 
       X4010 X4020 X4030 X5010 X5020 X5030 X5040    C1    C2
       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
     1 1000   100   10    500   100   50    5    1110   655 
     2 1100   110   11    550   110   55    5.5  1221   720.
     3 1210   121   12.1  605   121   60.5  6.05 1343.  793.
     4 1331   133.  13.3  666.  133.  66.6  6.65 1477.  872.
     5 1464.  146.  14.6  732.  146.  73.2  7.32 1625.  959.
     6 1611.  161.  16.1  805.  161.  80.5  8.05 1788. 1055.
     7 1772.  177.  17.7  886.  177.  88.6  8.85 1966. 1160.
     8 1949.  195.  19.5  974.  195.  97.4  9.74 2163. 1276.
     9 2144.  214.  21.4 1072.  214. 107.  10.7  2379. 1404.
    10 2358.  236.  23.6 1179.  236. 118.  11.8  2617. 1544.
    

    【讨论】:

      【解决方案2】:

      使用split.default -

      cbind(gl, sapply(split.default(gl, sub('(X\\d).*', '\\1', names(gl))), rowSums))
      
      #     X4010  X4020 X4030   X5010  X5020  X5030 X5040      X4      X5
      #1  1000.00 100.00 10.00  500.00 100.00  50.00  5.00 1110.00  655.00
      #2  1100.00 110.00 11.00  550.00 110.00  55.00  5.50 1221.00  720.50
      #3  1210.00 121.00 12.10  605.00 121.00  60.50  6.05 1343.10  792.55
      #4  1331.00 133.10 13.31  665.50 133.10  66.55  6.65 1477.41  871.80
      #5  1464.10 146.41 14.64  732.05 146.41  73.20  7.32 1625.15  958.98
      #6  1610.51 161.05 16.10  805.25 161.05  80.52  8.05 1787.66 1054.87
      #7  1771.56 177.15 17.71  885.78 177.15  88.57  8.85 1966.42 1160.35
      #8  1948.71 194.87 19.48  974.35 194.87  97.43  9.74 2163.06 1276.39
      #9  2143.58 214.35 21.43 1071.79 214.35 107.17 10.71 2379.36 1404.02
      #10 2357.94 235.79 23.57 1178.97 235.79 117.89 11.78 2617.30 1544.43
      

      【讨论】:

        【解决方案3】:

        我们还可以使用pivot_longer 重塑为“长”格式,并为每列获取sum 并将它们绑定到原始数​​据

        library(dplyr)
        library(tidyr)
        gl %>% 
             mutate(rn = row_number()) %>%
             pivot_longer(cols = -rn, names_to = c('.value', 'grp'), 
                names_pattern = '^(X\\d)(.*)') %>%
             group_by(rn) %>% 
             summarise(across(starts_with('X'), sum, na.rm = TRUE)) %>% 
             select(-rn) %>% 
             bind_cols(gl, .)
        # A tibble: 10 x 9
        #   X4010 X4020 X4030 X5010 X5020 X5030 X5040    X4    X5
        #   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
        # 1 1000   100   10    500   100   50    5    1110   655 
        # 2 1100   110   11    550   110   55    5.5  1221   720.
        # 3 1210   121   12.1  605   121   60.5  6.05 1343.  793.
        # 4 1331   133.  13.3  666.  133.  66.6  6.65 1477.  872.
        # 5 1464.  146.  14.6  732.  146.  73.2  7.32 1625.  959.
        # 6 1611.  161.  16.1  805.  161.  80.5  8.05 1788. 1055.
        # 7 1772.  177.  17.7  886.  177.  88.6  8.85 1966. 1160.
        # 8 1949.  195.  19.5  974.  195.  97.4  9.74 2163. 1276.
        # 9 2144.  214.  21.4 1072.  214. 107.  10.7  2379. 1404.
        #10 2358.  236.  23.6 1179.  236. 118.  11.8  2617. 1544.
        

        【讨论】:

          猜你喜欢
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 2021-07-07
          • 2020-12-23
          • 2022-03-06
          • 1970-01-01
          • 2021-11-30
          • 2022-01-24
          相关资源
          最近更新 更多