【问题标题】:How to pivot wider in R on one column value如何在 R 中在一列值上更宽地旋转
【发布时间】:2021-04-22 19:52:51
【问题描述】:

以下是我迄今为止所做的示例数据和操作。我已经以其他方式尝试过,但有一个想法可能会使这更简单一些。预期的结果在底部。我正在寻找的是一种基于 smb 列何时显示总计的方法来更广泛地旋转。 smb.. 1、2、3、4 和总计有五个可能的值。我希望有一个新列 smb.total 将包含每个 smb/year/qtr/area 组合的总数。我尝试在 pivot 更宽的语句前面放置一个过滤器(在底部)

  library(readxl)
  library(dplyr)
  library(stringr)
  library(tidyverse)
  library(gt)


 employment <- c(1,45,125,130,165,260,600,601,2,46,127,132,167,265,601,602,50,61,110,121,170,305,55,603,52,66,112,123,172,310,604,605)
 small <- c(1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA,1,1,2,2,3,4,NA,NA)
 area <-c(001,001,001,001,001,001,001,001,001,001,001,001,001,001,001,001,003,003,003,003,003,003,003,003,003,003,003,003,003,003,003,003)
 year<-c(2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020)
 qtr <-c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2)

 smbtest <- data.frame(employment,small,area,year,qtr)

 smbtest$smb <-0

 smbtest <- smbtest %>% mutate(smb = case_when(employment >=0 & employment <100 ~ "1",employment >=0 
 & employment <150 ~ "2",employment >=0 & employment <250 ~ "3", employment >=0 & employment <500 ~ 
"4", employment >=0 & employment <100000 ~ "Total"))


smbsummary2<-smbtest %>% 
mutate(period = paste0(year,"q",qtr)) %>%
group_by(area,period,smb) %>%
summarise(employment = sum(employment), worksites = n(), 
        .groups = 'drop_last') %>% 
mutate(employment = cumsum(employment),
     worksites = cumsum(worksites))

smbsummary2<- smbsummary2%>%
group_by(area,smb)%>%
mutate(empprevyear=lag(employment),
     empprevyearpp=employment-empprevyear,
     empprevyearpct=((employment/empprevyear)-1), 
empprevyearpct=scales::percent(empprevyearpct,accuracy = 0.01)
)

 area   period    smb      employment     worksites     smb.Total
   1    2020q1     1          46            2              1927
   1    2020q1     2         301            4              1927
   1    2020q1     3         466            5              1927
   1    2020q1     4         726            6              1927
   1    2020q1    Total     1927            8              1927 

smbsummary2<-smbsummary2 %>%
filter(small=='Total')
pivot_wider(names_from = small, values_from = employment)

【问题讨论】:

    标签: r dplyr pivot


    【解决方案1】:

    也许这段代码可以解决你的问题:

    employment <- c(1, 45, 125, 130, 165, 260, 600, 601, 2, 46, 127, 
                    132, 167, 265, 601, 602, 50, 61, 110, 121, 170, 
                    305, 55, 603, 52, 66, 112, 123, 172, 310, 604, 605)
    small <- c(1, 1, 2, 2, 3, 4, NA, NA, 1, 1, 2, 2, 3, 4, NA, NA, 1, 1,
               2, 2, 3, 4, NA, NA, 1, 1, 2, 2, 3, 4, NA, NA)
    area <-c(001, 001, 001, 001, 001, 001, 001, 001, 001, 001, 001, 001,
             001, 001, 001, 001, 003, 003, 003, 003, 003, 003, 003, 003,
             003, 003, 003, 003, 003, 003, 003, 003)
    year<-c(2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 
            2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
            2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
            2020, 2020)
    qtr <-c(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1,
            1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2)
    
    smbtest <- tibble(employment, small, area, year, qtr)
    
    smbtest$smb <- 0
    
    smbtest <- smbtest %>% 
      mutate(smb = case_when(employment >=0 & employment <100 ~ "1",
                             employment >=0 & employment <150 ~ "2",
                             employment >=0 & employment <250 ~ "3", 
                             employment >=0 & employment <500 ~ "4", 
                             employment >=0 & employment <100000 ~ "Total"))
    
    smbtest <- smbtest %>% 
      relocate(smb, year, qtr, area, small, employment)
    
    smbsummary2 <- smbtest %>% 
      mutate(period = paste0(year,"q",qtr)) %>%
      group_by(area, period, smb) %>% 
      summarise(employment = sum(employment), 
                worksites = n()) %>% 
      mutate(employment = cumsum(employment),
             worksites = cumsum(worksites))
    
    smbsummary2 %>% 
      group_by(area, period) %>% 
      mutate(`employ/period (%)` = employment/employment[smb == "Total"]*100)
    

    可能不是最佳答案,但对于您的数据,我认为它运作良好。 如果没有请告诉我。

    干得好!

    【讨论】:

      【解决方案2】:

      我知道我是否理解正确。

      你想要 smb.total 什么?就业变量? 如是。 在您的对象“smbsummary2”中使用以下代码:

      smbsummary2 <- smbtest %>% 
      relocate(smb, year, qtr, area, small, employment) %>% 
      group_by(smb, year, qtr, area) %>% 
      mutate(smb.total = n())
      

      如果不是这样,你能不能给我解释一下?

      【讨论】:

      • 手头的任务是找出每个中小企业类别在总就业人数中所占的份额。因此,首先,它将是 46/1927 或 2.38%。但是,我在想,如果我这样做了,并且将总数(1927)作为一列,那么做一列/第二列会更容易。这有意义吗?
      猜你喜欢
      • 1970-01-01
      • 2022-01-24
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2021-10-13
      • 2021-12-31
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多