【问题标题】:Creating a dataframe from vectors for mean, min, and max in r从 r 中的均值、最小值和最大值的向量创建数据框
【发布时间】:2020-06-23 02:39:46
【问题描述】:

我正在尝试创建一个新数据帧,其中包含现有数据帧 (top_SIM_weekly) 和另一个数据帧 (df_obs_weekly) 的平均值、最小值和最大值。

我已包含可重现的数据集:dput(top_SIM_weekly)

structure(list(X1 = c(18.3039606, 18.3039606, 18.3039606, 18.3039606, 
    18.3039606, 18.3039606), X2 = c(30.73478281, 30.73478281, 30.73478281, 
    30.73478281, 30.73478281, 30.73478281), X3 = c(50.88565679, 50.88565679, 
    50.88565679, 50.88565679, 50.88565679, 50.88565679), X4 = c(35.40195165, 
    35.40195165, 35.40195165, 35.40195165, 35.40195165, 35.40195165 ), X5 = c(23.12681287, 23.12681287, 23.12681287, 23.12681287, 
    23.12681287, 23.12681287), X6 = c(41.35229054, 41.35229054, 41.35229054, 
    41.35229054, 41.35229054, 41.35229054), X7 = c(67.48505048, 67.48505048, 
    67.48505048, 67.48505048, 67.48505048, 67.48505048), X8 = c(53.04008984, 
    53.04008984, 53.04008984, 53.04008984, 53.04008984, 53.04008984 ), X9 = c(58.26971959, 58.26971959, 58.26971959, 58.26971959, 
    58.26971959, 58.26971959), X10 = c(111.1841342, 111.1841342, 
    111.1841342, 111.1841342, 111.1841342, 111.1841342), X11 = c(100.7146155, 
    100.7146155, 100.7146155, 100.7146155, 100.7146155, 100.7146155 ), X12 = c(224.9074216, 224.9074216, 224.9074216, 224.9074216, 
    224.9074216, 224.9074216), X13 = c(255.2490203, 255.2490203, 
    255.2490203, 255.2490203, 255.2490203, 255.2490203), X14 = c(404.1588161, 
    404.1588161, 404.1588161, 404.1588161, 404.1588161, 404.1588161 ), X15 = c(443.9414483, 443.9414483, 443.9414483, 443.9414483, 
    443.9414483, 443.9414483), X16 = c(886.3440158, 886.3440158, 
    886.3440158, 886.3440158, 886.3440158, 886.3440158), X17 = c(1120.305186, 
    1120.305186, 1120.305186, 1120.305186, 1120.305186, 1120.305186 ), X18 = c(1295.250279, 1295.250279, 1295.250279, 1295.250279, 
    1295.250279, 1295.250279), X19 = c(1227.257398, 1227.257398, 
    1227.257398, 1227.257398, 1227.257398, 1227.257398), X20 = c(462.4336653, 
    462.4336653, 462.4336653, 462.4336653, 462.4336653, 462.4336653 ), X21 = c(533.812084, 533.812084, 533.812084, 533.812084, 533.812084, 
    533.812084), X22 = c(-0.895926113, -0.895926113, -0.895926113, 
    -0.895926113, -0.895926113, -0.895926113), X23 = c(0, 0, 0, 0,  0, 0), X24 = c(0.00587596, 0.00587596, 0.00587596, 0.00587596, 
    0.00587596, 0.00587596), X25 = c(0, 0, 0, 0, 0, 0), X26 = c(0,  0, 0, 0, 0, 0), X27 = c(0, 0, 0, 0, 0, 0), X28 = c(0, 0, 0, 0,  0, 0), X29 = c(0, 0, 0, 0, 0, 0), X30 = c(0, 0, 0, 0, 0, 0), 
        X31 = c(0, 0, 0, 0, 0, 0), X32 = c(0, 0, 0, 0, 0, 0), X33 = c(0, 
        0, 0, 0, 0, 0), X34 = c(0, 0, 0, 0, 0, 0), X35 = c(0, 0, 
        0, 0, 0, 0), X36 = c(0, 0, 0, 0, 0, 0), X37 = c(0, 0, 0, 
        0, 0, 0), X38 = c(0, 0, 0, 0, 0, 0), X39 = c(0, 0, 0, 0, 
        0, 0), X40 = c(0, 0, 0, 0, 0, 0), X41 = c(0.076302837, 0.076302837, 
        0.076302837, 0.076302837, 0.076302837, 0.076302837), X42 = c(0.04497379, 
        0.04497379, 0.04497379, 0.04497379, 0.04497379, 0.04497379
        ), X43 = c(0.47941265, 0.47941265, 0.47941265, 0.47941265, 
        0.47941265, 0.47941265), X44 = c(1.70405395, 1.70405395, 
        1.70405395, 1.70405395, 1.70405395, 1.70405395), X45 = c(1.069342585, 
        1.069342585, 1.069342585, 1.069342585, 1.069342585, 1.069342585
        ), X46 = c(0.642559657, 0.642559657, 0.642559657, 0.642559657, 
        0.642559657, 0.642559657), X47 = c(1.423669878, 1.423669878, 
        1.423669878, 1.423669878, 1.423669878, 1.423669878), X48 = c(2.947726076, 
        2.947726076, 2.947726076, 2.947726076, 2.947726076, 2.947726076
        ), X49 = c(2.083167482, 2.083167482, 2.083167482, 2.083167482, 
        2.083167482, 2.083167482), X50 = c(3.001766438, 3.001766438, 
        3.001766438, 3.001766438, 3.001766438, 3.001766438), X51 = c(4.02263428, 
        4.02263428, 4.02263428, 4.02263428, 4.02263428, 4.02263428
        ), X52 = c(5.236041751, 5.236041751, 5.236041751, 5.236041751, 
        5.236041751, 5.236041751), year = c(2018, 2018, 2018, 2018, 
        2018, 2018), corr = c(0.598566601816163, 0.598566601816163, 
        0.598566601816163, 0.598566601816163, 0.598566601816163, 
        0.598566601816163)), row.names = 272:277, class = "data.frame")

还有dput(df_obs_weekly)

structure(list(Epiweek = structure(1L, .Label = "n", class = "factor"), 
X1 = 119L, X2 = 103L, X3 = 96L, X4 = 99L, X5 = 53L, X6 = 91L, 
X7 = 94L, X8 = 101L, X9 = 106L, X10 = 132L, X11 = 134L, X12 = 164L, 
X13 = 189L, X14 = 275L, X15 = 302L, X16 = 416L, X17 = 493L, 
X18 = 201L, X19 = NaN, X20 = NaN, X21 = 410L, X22 = 370L, 
X23 = 426L, X24 = 311L, X25 = 253L, X26 = 264L, X27 = 175L, 
X28 = 158L, X29 = 128L, X30 = 124L, X31 = 96L, X32 = 83L, 
X33 = 69L, X34 = 47L, X35 = 72L, X36 = 64L, X37 = 62L, X38 = 61L, 
X39 = 53L, X40 = 70L, X41 = 75L, X42 = 62L, X43 = 75L, X44 = 44L, 
X45 = 66L, X46 = 86L, X47 = 91L, X48 = 88L, X49 = 86L, X50 = 135L, 
X51 = 95L, X52 = 99L, year = 2018, corr = 1), row.names = 1L, class = "data.frame")

到目前为止,我已尝试使用以下代码从均值、最小值和最大值的向量创建矩阵:

mean <- top_SIM_weekly %>% summarise_if(is.numeric, mean)
max <- top_SIM_weekly %>% summarise_if(is.numeric, max)
min <- top_SIM_weekly %>% summarise_if(is.numeric, min)
y <- rbind(mean,max,min)
row.names(y) <- c("mean","max","min")

但我收到以下错误:Error: expecting a one sided formula, a function, or a function name. 这个错误之前没有出现,所以我不确定发生了什么。我还想以第一行作为列名来切换最终数据帧的行和列。

【问题讨论】:

  • rbind 没有 meanmaxmin 对象
  • df_obs_weekly 用在哪里?
  • 我调整了代码以正确反映meanmaxmin 对象。

标签: r dplyr


【解决方案1】:

也许,我们需要

rbind(mean = x1, max = x2, min = x3)

因为meanmaxmin 对象不是为rbind 创建的


由于'year'和'corr'变量也是numeric,使用starts_with可能会更好

library(dplyr)
library(tidyr)
top_SIM_weekly %>%
   summarise(across(starts_with('X'),  ~
           list(c(mean = mean(.), max = max(.), min = min(.))))) %>% 
    unnest(everything())

另一种方法是使用selectnumeric 变量,使用pivot_longer 转换为“长”格式,并通过meanminmax 进行分组

library(dplyr) #1.0.0
library(tidyr)
library(tibble)
top_SIM_weekly %>%
     select(where(is.numeric))  %>%
     pivot_longer(cols = everything()) %>% 
     group_by(name) %>% 
     summarise(Mean = mean(value), Max = max(value), Min = min(value))%>% 
     column_to_rownames('name') %>% 
     t

【讨论】:

  • 即使在创建对象 meanminmax 之后,我仍然遇到同样的错误。当我尝试运行第一行代码时出现错误:mean &lt;- top_SIM_weekly %&gt;% summarise_if(is.numeric, mean)
  • @Ktass 我用过dplyr 1.0.0
  • library(dplyr) top_SIM_weekly &lt;- top_SIM_weekly[,-c(53:55)] mean &lt;- top_SIM_weekly %&gt;% summarise_if(is.numeric, mean) 我昨天尝试了这段代码并且它工作但现在我再次收到错误。有什么想法吗??
  • @Ktass 你的 dplyr 版本是多少?
  • @Ktass 0.8.3 是旧版本。你能把它更新到 1.0.0
【解决方案2】:
c(mean, sum, min) %>% 
  map_df(function(.x){
          top_SIM_weekly %>% 
            group_by(year,corr) %>% 
            summarise_at(vars(starts_with('X')), .x) %>% 
            return()
        }) %>% 
  bind_rows() 

您可以将 map_df 函数与 function_names 一起使用。 此代码不符合您的要求。但我相信它会有所帮助

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 2012-01-21
    • 1970-01-01
    • 1970-01-01
    • 2013-06-05
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多