从分类数据从长格式转换为宽格式答案

【问题标题】：Convert from long to wide format from categorical data从分类数据从长格式转换为宽格式
【发布时间】：2020-01-13 08:38:49
【问题描述】：

有这样的分类数据：

 data.frame(id = c(1,2,3,4,5), stock1 = c(1,2,0,1,2), stock2 = c(0,1,0,1,1), end = c(0,1,3,0,3), start = c(2,3,0,1,0))

id stock1 stock2 end start
1  1      1      0   0     2
2  2      2      1   1     3
3  3      0      0   3     0
4  4      1      1   0     1
5  5      2      1   3     0

如何将它们从长格式转换为宽格式，在这种格式中，每列都会以特定名称显示是否存在？

预期输出示例：

data.frame(id = c(1,2,3,4,5), stock1_0 = c(0,0,1,0,0), stock1_1 = c(1,0,0,1,0), stock1_2 = c(0,1,0,0,1), stock2_0 = c(1,0,1,0,0), stock2_1 = c(0,1,0,0,0), end_0 = c(1,0,0,1,0), end_1 = c(0,1,0,0,0), end_3 = c(0,0,1,0,1), start_0 = c(0,0,1,0,1), start_1 = c(0,0,0,1,0), start_2 = c(1,0,0,0,0), start_3 = c(0,1,0,0,0))

id stock1_0 stock1_1 stock1_2 stock2_0 stock2_1 end_0 end_1 end_3 start_0 start_1 start_2 start_3
1  1        0        1        0        1        0     1     0     0       0       0       1       0
2  2        0        0        1        0        1     0     1     0       0       0       0       1
3  3        1        0        0        1        0     0     0     1       1       0       0       0
4  4        0        1        0        0        0     1     0     0       0       1       0       0
5  5        0        0        1        0        0     0     0     1       1       0       0       0

【问题讨论】：

标签： r

【解决方案1】：

你可以使用model.matrix。

data.frame(dat[1],
           do.call(cbind, lapply(seq(dat)[-1], function(x) 
             `colnames<-`(m <- model.matrix( ~ as.factor(dat[[x]]) - 1), 
                          paste(names(dat[x]), seq_len(ncol(m)), sep="_")))))


#   id stock1_1 stock1_2 stock1_3 stock2_1 stock2_2 end_1 end_2 end_3 start_1
# 1  1        0        1        0        1        0     1     0     0       0
# 2  2        0        0        1        0        1     0     1     0       0
# 3  3        1        0        0        1        0     0     0     1       1
# 4  4        0        1        0        0        1     1     0     0       0
# 5  5        0        0        1        0        1     0     0     1       1
#   start_2 start_3 start_4
# 1       0       1       0
# 2       0       0       1
# 3       0       0       0
# 4       1       0       0
# 5       0       0       0

数据：

dat <- structure(list(id = c(1, 2, 3, 4, 5), stock1 = c(1, 2, 0, 1, 
2), stock2 = c(0, 1, 0, 1, 1), end = c(0, 1, 3, 0, 3), start = c(2, 
3, 0, 1, 0)), class = "data.frame", row.names = c(NA, -5L))

【讨论】：

【解决方案2】：

library(data.table)
setDT(df)

dcast(melt(df, 'id'), 
      id ~ paste0(variable, '_', value), 
      fun.aggregate = length)


#    id end_0 end_1 end_3 start_0 start_1 start_2 start_3 stock1_0
# 1:  1     1     0     0       0       0       1       0        0
# 2:  2     0     1     0       0       0       0       1        0
# 3:  3     0     0     1       1       0       0       0        1
# 4:  4     1     0     0       0       1       0       0        0
# 5:  5     0     0     1       1       0       0       0        0
#    stock1_1 stock1_2 stock2_0 stock2_1
# 1:        1        0        1        0
# 2:        0        1        0        1
# 3:        0        0        1        0
# 4:        1        0        0        1
# 5:        0        1        0        1

【讨论】：

【解决方案3】：

一种方法是获取长格式数据，将列名与值结合起来，然后以宽格式获取数据。

library(dplyr)
library(tidyr)

df %>%
  pivot_longer(cols = -id) %>%
  unite(name, name, value) %>%
  mutate(value = 1) %>%
  pivot_wider(values_fill = list(value = 0)) 

# A tibble: 5 x 13
#     id stock1_1 stock2_0 end_0 start_2 stock1_2 stock2_1 end_1 start_3 stock1_0 end_3 start_0 start_1
#  <dbl>    <dbl>    <dbl> <dbl>   <dbl>    <dbl>    <dbl> <dbl>   <dbl>    <dbl> <dbl>   <dbl>   <dbl>
#1     1        1        1     1       1        0        0     0       0        0     0       0       0
#2     2        0        0     0       0        1        1     1       1        0     0       0       0
#3     3        0        1     0       0        0        0     0       0        1     1       1       0
#4     4        1        0     1       0        0        1     0       0        0     0       0       1
#5     5        0        0     0       0        1        1     0       0        0     1       1       0

【讨论】：