【问题标题】:Data Wrangling: Reshaping数据整理:重塑
【发布时间】:2018-11-14 13:15:21
【问题描述】:

需要将数据从 df1 转换为 df2?

a <- c("New Zealand","Afghanistan","Afghanistan" , "New Zealand",  "Afghanistan", "Australia" )
b <- c("Sri Lanka", "Zimbabwe" , "Zimbabwe",  "Sri Lanka", "Zimbabwe" , "India" )
d <- c("no result"  , "Zimbabwe"   , "Zimbabwe"    ,"New Zealand", "Afghanistan", "Australia" )

df1 <- data.frame("Team1" = a, "Team2" = b, "Winner" = d)

Country <- c("New Zealand", "Sri Lanka","Afghanistan","Zimbabwe", "Australia","India"   )
Match <- c(2,2,3,3,1,1)
Win <- c(1,0,1,2,1,0)
Loss <- c(0,1,2,1,0,1)

Draw <- c(1,1,0,0,0,0)

df2 <- data.frame(Country, Match,Win, Loss, Draw )

提前致谢。

【问题讨论】:

  • 你尝试了什么???

标签: r dplyr data.table reshape2


【解决方案1】:

这是一个使用data.table的粗略概念:

library(data.table)
df1_melted <- melt(setDT(df1), id.vars = "Winner", value.name = "Country")
df2b <- df1_melted[, 
                   .(Matches = .N, 
                     Win = sum(Winner == Country), 
                     Loss = sum(Winner != Country & Winner != "no result"),
                     Draw = sum(Winner == "no result")), 
                   by = Country]
df2b

       Country Matches Win Loss Draw
1: New Zealand       2   1    0    1
2: Afghanistan       3   1    2    0
3:   Australia       1   1    0    0
4:   Sri Lanka       2   0    1    1
5:    Zimbabwe       3   2    1    0
6:       India       1   0    1    0

【讨论】:

    【解决方案2】:

    使用 dplyr 的结果相同

    library(tidyverse)
    
    a <- c("New Zealand","Afghanistan","Afghanistan" , "New Zealand",  "Afghanistan", "Australia" )
    b <- c("Sri Lanka", "Zimbabwe" , "Zimbabwe",  "Sri Lanka", "Zimbabwe" , "India" )
    d <- c("no result"  , "Zimbabwe"   , "Zimbabwe"    ,"New Zealand", "Afghanistan", "Australia" )
    
    df1 <- data.frame("Team1" = a, "Team2" = b, "Winner" = d, stringsAsFactors = FALSE)
    
    
    df1 %>% 
      gather(Team1, Team2, key = Team, value = Country) %>% 
      mutate(Result = replace(ifelse(Country == Winner, "Win", "Loss"), Winner == "no result", "Draw")) %>% 
      group_by(Country, Result) %>% 
      summarise(count = n()) %>% 
      spread(key = Result, value = count, fill = 0) %>% 
      mutate(Match = Win + Loss + Draw) %>% 
      select(Country, Match, Win, Loss, Draw)
    
    
    # A tibble: 6 x 5
    # Groups:   Country [6]
        Country     Match   Win  Loss  Draw
        <chr>       <dbl>  <dbl> <dbl> <dbl>
    1 Afghanistan     3     1     2     0
    2   Australia     1     1     0     0
    3       India     1     0     1     0
    4 New Zealand     2     1     0     1
    5   Sri Lanka     2     0     1     1
    6    Zimbabwe     3     2     1     0
    

    【讨论】:

      【解决方案3】:

      这是一个使用 dplyr 的方法

      tableresults <- function(team,df) {
      
        require(tidyverse)
      
        df2 <- df %>%
          filter(Team1 == team | Team2 == team) %>%
          mutate(win = ifelse(Winner == team,1,0),
             draw = ifelse(Winner == 'no result',1,0),
             loss = ifelse(!Winner %in% c('no result',team),1,0),
             country = team) %>%
          group_by(country) %>%
          summarize(match = n(),
                win = sum(win),
                loss = sum(loss),
                draw = sum(draw)) %>%
          ungroup()
      
        return(df2)
      }
      
      countries <- df1 %>% distinct(Team1,Team2) %>% gather() %>% pull(value)
      
      results_tbl <- tibble()
      
      for (i in 1:length(countries)) {
      
        country_tbl <- tableresults(countries[[i]],df1)
        results_tbl <- bind_rows(results_tbl,country_tbl)
      }
      

      结果:

      > results_tbl
      # A tibble: 6 x 5
        country     match   win  loss  draw
        <chr>       <int> <dbl> <dbl> <dbl>
      1 New Zealand     2     1     0     1
      2 Afghanistan     3     1     2     0
      3 Australia       1     1     0     0
      4 Sri Lanka       2     0     1     1
      5 Zimbabwe        3     2     1     0
      6 India           1     0     1     0
      

      【讨论】:

        猜你喜欢
        • 2018-02-08
        • 1970-01-01
        • 1970-01-01
        • 2013-01-27
        • 2019-11-15
        • 2013-07-21
        • 2015-10-07
        • 1970-01-01
        相关资源
        最近更新 更多