【问题标题】:Find and summarise co-occurring events between timestamps by group in R在 R 中按组查找和总结时间戳之间同时发生的事件
【发布时间】:2021-07-14 13:00:44
【问题描述】:

我正在处理点播电视日志数据集,我想识别和汇总两个不同时间戳之间的活动设备数量。我在下面创建了一个模拟数据框;每行都是一个“会话”,包含唯一的客户标识符、唯一的设备标识符、正在观看的节目类型以及每个会话的开始/结束时间:-


df<-structure(list(CustomerID = c("0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef"), DeviceID = c("b8d7b4ab-3d1e-40a1-ba9e-13b7d82d519d", 
"b8136ab5-3e81-4ead-a52b-f23609bc4899", "420dc9bf-c14e-4bcd-9559-e1b491f05182", 
"b8136ab5-3e81-4ead-a52b-f23609bc4899", "ccb94e13-2004-4642-82fb-73fd2cdd979e", 
"b8136ab5-3e81-4ead-a52b-f23609bc4899", "b8d7b4ab-3d1e-40a1-ba9e-13b7d82d519d", 
"b8d7b4ab-3d1e-40a1-ba9e-13b7d82d519d", "ccb94e13-2004-4642-82fb-73fd2cdd979e", 
"b8d7b4ab-3d1e-40a1-ba9e-13b7d82d519d", "420dc9bf-c14e-4bcd-9559-e1b491f05182", 
"b8d7b4ab-3d1e-40a1-ba9e-13b7d82d519d", "420dc9bf-c14e-4bcd-9559-e1b491f05182", 
"b8136ab5-3e81-4ead-a52b-f23609bc4899", "ccb94e13-2004-4642-82fb-73fd2cdd979e", 
"b8136ab5-3e81-4ead-a52b-f23609bc4899", "420dc9bf-c14e-4bcd-9559-e1b491f05182", 
"66a9e7dd-57ee-4c8a-a090-950cae9b02a1", "a8f4bb78-a0f2-476b-9303-2761b06a65fc", 
"66a9e7dd-57ee-4c8a-a090-950cae9b02a1", "66a9e7dd-57ee-4c8a-a090-950cae9b02a1", 
"a8f4bb78-a0f2-476b-9303-2761b06a65fc", "a8f4bb78-a0f2-476b-9303-2761b06a65fc", 
"c293d135-800e-4a62-898d-f0959bf0870d", "66a9e7dd-57ee-4c8a-a090-950cae9b02a1", 
"a8f4bb78-a0f2-476b-9303-2761b06a65fc", "09109879-1061-4325-ae85-9c853dbf7882", 
"09109879-1061-4325-ae85-9c853dbf7882", "c293d135-800e-4a62-898d-f0959bf0870d", 
"66a9e7dd-57ee-4c8a-a090-950cae9b02a1", "a8f4bb78-a0f2-476b-9303-2761b06a65fc", 
"66a9e7dd-57ee-4c8a-a090-950cae9b02a1", "09109879-1061-4325-ae85-9c853dbf7882", 
"66a9e7dd-57ee-4c8a-a090-950cae9b02a1", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"41f98340-0724-4dcc-b9bd-1bdd12307f87", "41f98340-0724-4dcc-b9bd-1bdd12307f87", 
"a481ffe1-9bfe-4cac-9da8-553c4da2e224", "41f98340-0724-4dcc-b9bd-1bdd12307f87", 
"a481ffe1-9bfe-4cac-9da8-553c4da2e224", "07d11a53-9b21-4fa1-b055-41f0247c642f", 
"66b65dcb-5416-4bcc-ac9e-2222e2d50a28", "c1ceebbb-24cf-4b1d-8576-8bcb3aaa4534", 
"29277218-9798-406e-b9ee-717184bf6f0e", "41f98340-0724-4dcc-b9bd-1bdd12307f87", 
"41f98340-0724-4dcc-b9bd-1bdd12307f87", "41f98340-0724-4dcc-b9bd-1bdd12307f87", 
"66b65dcb-5416-4bcc-ac9e-2222e2d50a28", "66b65dcb-5416-4bcc-ac9e-2222e2d50a28", 
"29277218-9798-406e-b9ee-717184bf6f0e", "c1ceebbb-24cf-4b1d-8576-8bcb3aaa4534", 
"66b65dcb-5416-4bcc-ac9e-2222e2d50a28", "04ba4776-8afc-4e86-86de-7b85668bf075", 
"a58aa3cc-a231-4a82-8377-56b34306a446", "04ba4776-8afc-4e86-86de-7b85668bf075", 
"7c193822-4ce9-4086-b274-d013e2180ae1", "d2832ac1-f3fd-468a-ace9-efa6a4e25e41", 
"d2832ac1-f3fd-468a-ace9-efa6a4e25e41", "7c193822-4ce9-4086-b274-d013e2180ae1", 
"a58aa3cc-a231-4a82-8377-56b34306a446", "7c193822-4ce9-4086-b274-d013e2180ae1", 
"7c193822-4ce9-4086-b274-d013e2180ae1", "d2832ac1-f3fd-468a-ace9-efa6a4e25e41", 
"7c193822-4ce9-4086-b274-d013e2180ae1", "7c193822-4ce9-4086-b274-d013e2180ae1", 
"4eb8bf81-1f5c-4593-8205-2d0a0d77d0d0", "7c193822-4ce9-4086-b274-d013e2180ae1", 
"7c193822-4ce9-4086-b274-d013e2180ae1", "7c193822-4ce9-4086-b274-d013e2180ae1", 
"58286c18-2df6-461b-8a04-096625f678d2", "58286c18-2df6-461b-8a04-096625f678d2", 
"58286c18-2df6-461b-8a04-096625f678d2", "fc5d9b88-a545-4f69-9c55-7b57103a165c", 
"3007c886-8fde-4b05-8ae5-b4f8df0467a1", "fc5d9b88-a545-4f69-9c55-7b57103a165c", 
"fc5d9b88-a545-4f69-9c55-7b57103a165c", "fc5d9b88-a545-4f69-9c55-7b57103a165c", 
"3007c886-8fde-4b05-8ae5-b4f8df0467a1", "58286c18-2df6-461b-8a04-096625f678d2", 
"fc5d9b88-a545-4f69-9c55-7b57103a165c", "fc5d9b88-a545-4f69-9c55-7b57103a165c", 
"3007c886-8fde-4b05-8ae5-b4f8df0467a1", "58286c18-2df6-461b-8a04-096625f678d2", 
"3007c886-8fde-4b05-8ae5-b4f8df0467a1", "3007c886-8fde-4b05-8ae5-b4f8df0467a1", 
"3007c886-8fde-4b05-8ae5-b4f8df0467a1"), ShowGenre = c("Music", 
"Music", "Sport", "Drama", "Kids", "Documentary", "News", "Movie", 
"Drama", "News", "News", "Kids", "Documentary", "Movie", "Movie", 
"Documentary", "Movie", "Music", "Sport", "Movie", "Movie", "Movie", 
"Drama", "News", "Movie", "Movie", "Documentary", "Movie", "Music", 
"Drama", "News", "News", "Movie", "Drama", "News", "Documentary", 
"Documentary", "Drama", "Music", "Sport", "Sport", "Movie", "Music", 
"Drama", "Sport", "Drama", "Drama", "Kids", "Drama", "Documentary", 
"Sport", "Music", "Music", "Documentary", "Drama", "News", "Music", 
"Music", "Movie", "Documentary", "Documentary", "Documentary", 
"Sport", "Music", "News", "News", "Sport", "Documentary", "Music", 
"Documentary", "News", "Drama", "Drama", "Documentary", "News", 
"Music", "Kids", "Drama", "Documentary", "News", "Drama", "Documentary", 
"Movie", "News", "Kids", "Movie", "Music", "Kids", "Kids", "Movie", 
"Music", "News", "Movie", "Kids", "Music", "Music", "Kids", "Kids", 
"News", "Kids", "Movie", "Documentary"), SessionStart = structure(c(1612132904, 
1612133106, 1612136282, 1612139373, 1612139378, 1612140041, 1612140405, 
1612143192, 1612143292, 1612143854, 1612143976, 1612144065, 1612144220, 
1612144263, 1612144334, 1612144356, 1612146166, 1612146226, 1612146248, 
1612146440, 1612146989, 1612147206, 1612148624, 1612152735, 1612153241, 
1612153475, 1612154929, 1612155104, 1612155562, 1612155992, 1612159668, 
1612159851, 1612160073, 1612165858, 1612168664, 1612169607, 1612169662, 
1612169779, 1612171481, 1612172015, 1612172166, 1612172358, 1612172446, 
1612172505, 1612172544, 1612172601, 1612172607, 1612172969, 1612173898, 
1612175729, 1612177333, 1612178891, 1612180467, 1612180651, 1612181087, 
1612181168, 1612181233, 1612186335, 1612186358, 1612186740, 1612187098, 
1612187181, 1612187519, 1612187704, 1612187730, 1612187890, 1612187936, 
1612188139, 1612188486, 1612188494, 1612188580, 1612192309, 1612192504, 
1612193382, 1612194334, 1612194365, 1612194396, 1612194579, 1612194762, 
1612194984, 1612195094, 1612195096, 1612195252, 1612195837, 1612196401, 
1612199002, 1612200677, 1612200762, 1612200829, 1612201556, 1612201802, 
1612202166, 1612202555, 1612202852, 1612203272, 1612204749, 1612204989, 
1612205005, 1612205067, 1612206077, 1612206260, 1612206263), tzone = "Europe/London", class = c("POSIXct", 
"POSIXt")), SessionEnd = structure(c(1612137925, 1612139792, 
1612140039, 1612141093, 1612139380, 1612143136, 1612140640, 1612143256, 
1612146067, 1612144022, 1612152403, 1612144131, 1612144270, 1612144284, 
1612144337, 1612144652, 1612146227, 1612146238, 1612146439, 1612146493, 
1612152522, 1612148610, 1612149051, 1612153217, 1612153464, 1612154778, 
1612155086, 1612155551, 1612155877, 1612156110, 1612159851, 1612160072, 
1612160227, 1612168654, 1612171480, 1612169607, 1612169740, 1612172007, 
1612172194, 1612172104, 1612172337, 1612172465, 1612172496, 1612172520, 
1612172599, 1612172604, 1612172653, 1612175721, 1612174311, 1612177318, 
1612177340, 1612178923, 1612180650, 1612180839, 1612181167, 1612181232, 
1612181276, 1612186398, 1612186358, 1612186885, 1612187809, 1612187184, 
1612187704, 1612187890, 1612187789, 1612187899, 1612188138, 1612188485, 
1612188498, 1612189623, 1612188597, 1612192404, 1612193479, 1612195723, 
1612194375, 1612194396, 1612194578, 1612194761, 1612194984, 1612195044, 
1612195251, 1612195517, 1612195252, 1612195864, 1612196431, 1612200445, 
1612201368, 1612200786, 1612200896, 1612201633, 1612202122, 1612204649, 
1612205037, 1612203929, 1612203278, 1612204749, 1612205014, 1612205834, 
1612205067, 1612206261, 1612206305, 1612206343), tzone = "Europe/London", class = c("POSIXct", 
"POSIXt"))), class = "data.frame", row.names = c(NA, -102L))

因此,在此数据框中,有 6 个唯一的客户标识符,每个标识符都有不同数量的唯一设备。以下是按客户划分的独特设备数量:-

* <chr>                                <int>
1 0289d477-427e-4b91-bd4d-8fd579ef2b87     4
2 12a6dd1e-484c-4c94-a7ab-6443a58b4159     4
3 aeffea0a-fdbf-4c88-8a47-8eaeee4339ef     3
4 e4f0a5ef-f808-4869-9370-c7fcee63ea98     5
5 fb442c22-2595-4245-9f49-a2ea3581ee88     1
6 fc20bfb6-172e-4f55-9467-12ed99579503     6

您可以想象,在现代点播电视服务中,家庭中的某个人可能正在大屏幕电视上观看节目,而与此同时,另一个房间中的某个人正在平板电脑/手机上观看不同的节目电话/等

鉴于这种情况,我希望能够总结以下几点:-

  1. 每位客户一次有多少会话处于活动状态?使用第一个活动会话的“SessionStart”时间戳和最后一个活动会话的“SessionEnd”时间戳作为定义的时间段。
  2. 在此会话期间处于活动状态的唯一设备的计数
  3. 在活动会话中观看的独特类型的计数

期望的输出

这是数据框的前两行:-

如果这只是我们必须处理的两行,那么所需的输出将如下所示:-

CustomerID                                Num_Unique_Devices     Num_Unique_Genre       Genres       
0289d477-427e-4b91-bd4d-8fd579ef2b87               2                    1                 Music

StartTime_FirstSession      EndTime_LastSession
2021-01-31 22:41:44         2021-02-01 00:36:32


谁能帮助想出一个解决方案来获得这个结果?目前需要一些超出我专业知识的东西,因此非常感谢任何帮助。谢谢:)

【问题讨论】:

    标签: r dplyr timestamp grouping


    【解决方案1】:
    library(tidyverse)
    

    我们可以从添加一个指示器开始,显示当前会话是否为 与上一节重叠。可以这样完成

    (step1 <- df %>% 
      as_tibble() %>% 
      group_by(CustomerID) %>% 
      arrange(SessionStart) %>% 
      mutate(
        overlap = SessionStart < lag(SessionEnd)
      )
    )
    #> # A tibble: 102 x 6
    #> # Groups:   CustomerID [6]
    #>    CustomerID DeviceID ShowGenre SessionStart        SessionEnd          overlap
    #>    <chr>      <chr>    <chr>     <dttm>              <dttm>              <lgl>  
    #>  1 0289d477-~ b8d7b4a~ Music     2021-01-31 22:41:44 2021-02-01 00:05:25 NA     
    #>  2 0289d477-~ b8136ab~ Music     2021-01-31 22:45:06 2021-02-01 00:36:32 TRUE   
    #>  3 0289d477-~ 420dc9b~ Sport     2021-01-31 23:38:02 2021-02-01 00:40:39 TRUE   
    #>  4 0289d477-~ b8136ab~ Drama     2021-02-01 00:29:33 2021-02-01 00:58:13 TRUE   
    #>  5 0289d477-~ ccb94e1~ Kids      2021-02-01 00:29:38 2021-02-01 00:29:40 TRUE   
    #>  6 0289d477-~ b8136ab~ Document~ 2021-02-01 00:40:41 2021-02-01 01:32:16 FALSE  
    #>  7 0289d477-~ b8d7b4a~ News      2021-02-01 00:46:45 2021-02-01 00:50:40 TRUE   
    #>  8 0289d477-~ b8d7b4a~ Movie     2021-02-01 01:33:12 2021-02-01 01:34:16 FALSE  
    #>  9 0289d477-~ ccb94e1~ Drama     2021-02-01 01:34:52 2021-02-01 02:21:07 FALSE  
    #> 10 0289d477-~ b8d7b4a~ News      2021-02-01 01:44:14 2021-02-01 01:47:02 TRUE   
    #> # ... with 92 more rows
    

    重叠列中的 NA 应更改为 FALSE。我们能做的 与合并。我们想要的下一件事是创建一个 SessionNumber 每次非重叠会话开始时递增。一种方法是 cumsum(!overlap)

    (step2 <- step1 %>% 
      mutate(
        SessionID = cumsum(!coalesce(overlap, FALSE))
      )
    )
    #> # A tibble: 102 x 7
    #> # Groups:   CustomerID [6]
    #>    CustomerID DeviceID ShowGenre SessionStart        SessionEnd          overlap
    #>    <chr>      <chr>    <chr>     <dttm>              <dttm>              <lgl>  
    #>  1 0289d477-~ b8d7b4a~ Music     2021-01-31 22:41:44 2021-02-01 00:05:25 NA     
    #>  2 0289d477-~ b8136ab~ Music     2021-01-31 22:45:06 2021-02-01 00:36:32 TRUE   
    #>  3 0289d477-~ 420dc9b~ Sport     2021-01-31 23:38:02 2021-02-01 00:40:39 TRUE   
    #>  4 0289d477-~ b8136ab~ Drama     2021-02-01 00:29:33 2021-02-01 00:58:13 TRUE   
    #>  5 0289d477-~ ccb94e1~ Kids      2021-02-01 00:29:38 2021-02-01 00:29:40 TRUE   
    #>  6 0289d477-~ b8136ab~ Document~ 2021-02-01 00:40:41 2021-02-01 01:32:16 FALSE  
    #>  7 0289d477-~ b8d7b4a~ News      2021-02-01 00:46:45 2021-02-01 00:50:40 TRUE   
    #>  8 0289d477-~ b8d7b4a~ Movie     2021-02-01 01:33:12 2021-02-01 01:34:16 FALSE  
    #>  9 0289d477-~ ccb94e1~ Drama     2021-02-01 01:34:52 2021-02-01 02:21:07 FALSE  
    #> 10 0289d477-~ b8d7b4a~ News      2021-02-01 01:44:14 2021-02-01 01:47:02 TRUE   
    #> # ... with 92 more rows, and 1 more variable: SessionID <int>
    

    最后,我们现在按 CustomerID 和 SessionID 分组并创建最终的 带有摘要的数据框。

    (step3 <- step2 %>% 
        group_by(CustomerID, SessionID) %>% 
        summarise(
          n_unique_devices = n_distinct(DeviceID),
          n_unique_genres = n_distinct(ShowGenre),
          first_session_start = min(SessionStart),
          last_session_end = max(SessionEnd),
          .groups = "drop"
        )
    )
    #> # A tibble: 69 x 6
    #>    CustomerID     SessionID n_unique_devices n_unique_genres first_session_start
    #>  * <chr>              <int>            <int>           <int> <dttm>             
    #>  1 0289d477-427e~         1                4               4 2021-01-31 22:41:44
    #>  2 0289d477-427e~         2                2               2 2021-02-01 00:40:41
    #>  3 0289d477-427e~         3                1               1 2021-02-01 01:33:12
    #>  4 0289d477-427e~         4                3               3 2021-02-01 01:34:52
    #>  5 0289d477-427e~         5                2               2 2021-02-01 01:50:20
    #>  6 0289d477-427e~         6                1               1 2021-02-01 01:52:14
    #>  7 0289d477-427e~         7                1               1 2021-02-01 01:52:36
    #>  8 0289d477-427e~         8                1               1 2021-02-01 02:22:46
    #>  9 12a6dd1e-484c~         1                1               1 2021-02-01 02:23:46
    #> 10 12a6dd1e-484c~         2                1               1 2021-02-01 02:24:08
    #> # ... with 59 more rows, and 1 more variable: last_session_end <dttm>
    

    reprex package (v1.0.0) 于 2021-07-14 创建

    【讨论】:

    • 这太棒了,非常感谢您的解决方案,它运行良好。我想知道如何添加一个额外的列来提供在步骤 3 中观看的独特流派的字符串?
    • 您可以添加genres = str_c(unique(ShowGenre), collapse = ",") 进行总结
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 2020-06-03
    • 2021-11-27
    • 1970-01-01
    • 2017-02-05
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多