df <- data.frame()
for(i in 81:91){
assign(paste0("SampleSizeGaza",i), GazaTB1[GazaTB1$year==i,])
dfi <- data.frame(year = i, YearSampleSize = NROW(eval(as.name(paste0("SampleSizeGaza", i)))))
df <- rbind(df, dfi)
}
一切都在 base R 中,但是我确实需要提到,与使用 dplyr/tidyverse 相比,这非常低效。这是一个非常大的数据框的比较:
> ### Create large dataframe
> GazaTB1 <- data.frame(year = sample(81:91, size = 50000000, replace = TRUE))
> ### for loop
> ptm <- proc.time()
> df <- data.frame()
> for(i in 81:91){
+ assign(paste0("SampleSizeGaza",i), GazaTB1[GazaTB1$year==i,])
+ dfi <- data.frame(year = i, YearSampleSize = NROW(eval(as.name(paste0("SampleSizeGaza", i)))))
+ df <- rbind(df, dfi)
+ }
> proc.time() - ptm
user system elapsed
2.80 1.27 4.08
>
> ### tidyverse
> library(tidyverse)
>
> ptm2 <- proc.time()
> df2 <- GazaTB1 %>%
+ group_by(year) %>%
+ count()
> proc.time() - ptm2
user system elapsed
0.64 0.14 0.78