【发布时间】:2019-12-19 01:23:58
【问题描述】:
我有来自自动电话调查的数据。此调查包括 2 次呼叫者断开连接时的回拨。
这是来自一位受访者的数据。不过,我有数千个类似的数据需要处理。
第一列是受访者的ID,第二列是订购ID(因为时间戳不是毫秒级的),第三列是调查工具和受访者手机(本质上是受访者)之间的交互,第四列是日期时间戳,最后是我之前计算的行之间的时间差。
test <- data.frame(matrix(c(
111, 2340, 'Enqueueing call', '12/2/19 14:53:57', NA,
111, 6174, 'Call expired, will be retried in next schedule window', '12/2/19 20:14:22', 19225,
111, 10386, 'Answer', '12/3/19 15:48:56', 70474,
111, 10387, 'Contacted', '12/3/19 15:48:56', 0,
111, 10388, 'Intro', '12/3/19 15:48:56', 0,
111, 10389, 'Timeout', '12/3/19 15:49:16', 20,
111, 10390, 'Intro', '12/3/19 15:49:16', 0,
111, 10391, '1', '12/3/19 15:49:30', 14,
111, 10392, 'Started', '12/3/19 15:49:30', 0,
111, 10393, 'Q1', '12/3/19 15:49:30', 0,
111, 10394, '1', '12/3/19 15:49:45', 15,
111, 10395, 'Q2', '12/3/19 15:49:45', 0,
111, 10396, 'Timeout', '12/3/19 15:49:54', 9,
111, 10397, 'Q2', '12/3/19 15:49:54', 0,
111, 10398, 'Timeout', '12/3/19 15:50:03', 9,
111, 10399, 'Q2', '12/3/19 15:50:03', 0,
111, 10400, 'Timeout', '12/3/19 15:50:11', 8,
111, 17658, 'Timeout. Call failed.', '12/4/19 17:50:27', 93616,
111, 19932, 'Call expired, will be retried in next schedule window', '12/4/19 20:45:17', 10490,
111, 25647, 'Call expired, will be retried in next schedule window', '12/5/19 20:33:27', 85690,
111, 31516, 'Call expired, will be retried in next schedule window', '12/6/19 20:17:18', 85431,
111, 36781, 'Call expired, will be retried in next schedule window', '12/7/19 20:02:16', 85498,
111, 38718, 'Answer', '12/8/19 10:24:07', 51711,
111, 38719, 'Q2', '12/8/19 10:24:07', 0,
111, 38720, 'Timeout', '12/8/19 10:24:16', 9,
111, 38721, 'Q2', '12/8/19 10:24:16', 0,
111, 38722, 'Timeout', '12/8/19 10:24:24', 8,
111, 38723, 'Q2', '12/8/19 10:24:24', 0,
111, 38724, 'Timeout', '12/8/19 10:24:33', 9,
111, 45339, 'Timeout. Call failed.', '12/9/19 12:25:08', 93635,
111, 49026, 'Call expired, will be retried in next schedule window', '12/9/19 20:31:26', 29178,
111, 53972, 'Call expired, will be retried in next schedule window', '12/10/19 20:14:38', 85392,
111, 58277, 'Answer', '12/11/19 17:55:29', 78051,
111, 58278, 'Q2', '12/11/19 17:55:29', 0,
111, 58279, 'Timeout', '12/11/19 17:55:38', 9,
111, 58280, 'Q2', '12/11/19 17:55:38', 0,
111, 58281, 'Timeout', '12/11/19 17:55:46', 8,
111, 58282, 'Q2', '12/11/19 17:55:46', 0,
111, 58283, '61', '12/11/19 17:55:57', 11,
111, 58284, 'Q3', '12/11/19 17:55:57', 0,
111, 58285, '4', '12/11/19 17:56:13', 16,
111, 58286, 'Q4', '12/11/19 17:56:13', 0,
111, 58288, '3', '12/11/19 17:56:42', 29,
111, 58289, 'Interim partial', '12/11/19 17:56:42', 0,
111, 58290, 'Q5', '12/11/19 17:56:42', 0,
111, 58291, '3', '12/11/19 17:56:59', 17,
111, 58292, 'Q6', '12/11/19 17:56:59', 0,
111, 58293, '3', '12/11/19 17:57:25', 26,
111, 58294, 'Q7', '12/11/19 17:57:25', 0,
111, 58295, '3', '12/11/19 17:57:38', 13,
111, 58296, 'Q8', '12/11/19 17:57:38', 0,
111, 58297, '3', '12/11/19 17:57:50', 12,
111, 58298, 'Q9', '12/11/19 17:57:50', 0,
111, 58299, 'Timeout', '12/11/19 17:58:09', 19,
111, 58300, 'Q9', '12/11/19 17:58:09', 0,
111, 58301, '10', '12/11/19 17:58:32', 23,
111, 58302, 'Q10', '12/11/19 17:58:32', 0,
111, 58303, '1', '12/11/19 17:58:49', 17,
111, 58304, 'Q11', '12/11/19 17:58:49', 0,
111, 58307, '3', '12/11/19 17:59:02', 13,
111, 58308, 'Q12', '12/11/19 17:59:02', 0,
111, 58309, 'Timeout', '12/11/19 17:59:23', 21,
111, 58310, 'Q13', '12/11/19 17:59:23', 0,
111, 58311, 'Timeout', '12/11/19 17:59:44', 21,
111, 58312, 'Q13', '12/11/19 17:59:44', 0,
111, 58313, '4', '12/11/19 17:59:51', 7,
111, 58314, 'Q14', '12/11/19 17:59:51', 0,
111, 58318, '2', '12/11/19 18:00:19', 28,
111, 58319, 'Q15', '12/11/19 18:00:19', 0,
111, 58320, '3', '12/11/19 18:00:36', 17,
111, 58321, 'Q16', '12/11/19 18:00:36', 0,
111, 58322, '4', '12/11/19 18:01:04', 28,
111, 58323, 'Q17', '12/11/19 18:01:04', 0,
111, 58324, '1', '12/11/19 18:01:18', 14,
111, 58325, 'Q18', '12/11/19 18:01:18', 0,
111, 58327, '4', '12/11/19 18:01:42', 24,
111, 58328, 'Q19', '12/11/19 18:01:42', 0,
111, 58329, '1', '12/11/19 18:01:56', 14,
111, 58330, 'Q20', '12/11/19 18:01:56', 0,
111, 58331, '1', '12/11/19 18:02:19', 23,
111, 58332, 'Q21', '12/11/19 18:02:19', 0,
111, 58333, '1', '12/11/19 18:02:28', 9,
111, 58334, 'Q22', '12/11/19 18:02:28', 0,
111, 58335, '2', '12/11/19 18:02:52', 24,
111, 58336, 'Completed', '12/11/19 18:02:52', 0,
111, 58337, 'Complete Message', '12/11/19 18:02:52', 0,
111, 58338, 'Thank you', '12/11/19 18:02:52', 0),
nrow=87, ncol=5, byrow=T,
dimnames=list(c(NULL), c("Respondent.ID", "order.ID", "Interaction", "Datetime", "difftime"))))
我需要总结 difftime 的时间,但仅基于某些条件。本质上,我希望数据如下所示,因此我可以总结 Include = 1 的时间。
条件 1:Interaction 之后的行 = "Answer" 和 Interaction 之前的行之间的所有行 = "Timeout. Call failed."应该包括 = 1。 条件 2:Interaction = "Answer" 之后的行和 Interaction = "Completed" 之前的行之间的所有行都应该是 Include = 1。
new <- data.frame(matrix(c(
111, 2340, 'Enqueueing call', ' 12/2/19 14:53:57 ', NA, 0,
111, 6174, 'Call expired, will be retried in next schedule window', ' 12/2/19 20:14:22 ', 19225, 0,
111, 10386, 'Answer', ' 12/3/19 15:48:56 ', 70474, 0,
111, 10387, 'Contacted', ' 12/3/19 15:48:56 ', 0, 1,
111, 10388, 'Intro', ' 12/3/19 15:48:56 ', 0, 1,
111, 10389, 'Timeout', ' 12/3/19 15:49:16 ', 20, 1,
111, 10390, 'Intro', ' 12/3/19 15:49:16 ', 0, 1,
111, 10391, '1', ' 12/3/19 15:49:30 ', 14, 1,
111, 10392, 'Started', ' 12/3/19 15:49:30 ', 0, 1,
111, 10393, 'Q1', ' 12/3/19 15:49:30 ', 0, 1,
111, 10394, '1', ' 12/3/19 15:49:45 ', 15, 1,
111, 10395, 'Q2', ' 12/3/19 15:49:45 ', 0, 1,
111, 10396, 'Timeout', ' 12/3/19 15:49:54 ', 9, 1,
111, 10397, 'Q2', ' 12/3/19 15:49:54 ', 0, 1,
111, 10398, 'Timeout', ' 12/3/19 15:50:03 ', 9, 1,
111, 10399, 'Q2', ' 12/3/19 15:50:03 ', 0, 1,
111, 10400, 'Timeout', ' 12/3/19 15:50:11 ', 8, 1,
111, 17658, 'Timeout. Call failed.', ' 12/4/19 17:50:27 ', 93616, 0,
111, 19932, 'Call expired, will be retried in next schedule window', ' 12/4/19 20:45:17 ', 10490, 0,
111, 25647, 'Call expired, will be retried in next schedule window', ' 12/5/19 20:33:27 ', 85690, 0,
111, 31516, 'Call expired, will be retried in next schedule window', ' 12/6/19 20:17:18 ', 85431, 0,
111, 36781, 'Call expired, will be retried in next schedule window', ' 12/7/19 20:02:16 ', 85498, 0,
111, 38718, 'Answer', ' 12/8/19 10:24:07 ', 51711, 0,
111, 38719, 'Q2', ' 12/8/19 10:24:07 ', 0, 1,
111, 38720, 'Timeout', ' 12/8/19 10:24:16 ', 9, 1,
111, 38721, 'Q2', ' 12/8/19 10:24:16 ', 0, 1,
111, 38722, 'Timeout', ' 12/8/19 10:24:24 ', 8, 1,
111, 38723, 'Q2', ' 12/8/19 10:24:24 ', 0, 1,
111, 38724, 'Timeout', ' 12/8/19 10:24:33 ', 9, 1,
111, 45339, 'Timeout. Call failed.', ' 12/9/19 12:25:08 ', 93635, 0,
111, 49026, 'Call expired, will be retried in next schedule window', ' 12/9/19 20:31:26 ', 29178, 0,
111, 53972, 'Call expired, will be retried in next schedule window', ' 12/10/19 20:14:38 ', 85392, 0,
111, 58277, 'Answer', ' 12/11/19 17:55:29 ', 78051, 0,
111, 58278, 'Q2', ' 12/11/19 17:55:29 ', 0, 1,
111, 58279, 'Timeout', ' 12/11/19 17:55:38 ', 9, 1,
111, 58280, 'Q2', ' 12/11/19 17:55:38 ', 0, 1,
111, 58281, 'Timeout', ' 12/11/19 17:55:46 ', 8, 1,
111, 58282, 'Q2', ' 12/11/19 17:55:46 ', 0, 1,
111, 58283, '61', ' 12/11/19 17:55:57 ', 11, 1,
111, 58284, 'Q3', ' 12/11/19 17:55:57 ', 0, 1,
111, 58285, '4', ' 12/11/19 17:56:13 ', 16, 1,
111, 58286, 'Q4', ' 12/11/19 17:56:13 ', 0, 1,
111, 58288, '3', ' 12/11/19 17:56:42 ', 29, 1,
111, 58289, 'Interim partial', ' 12/11/19 17:56:42 ', 0, 1,
111, 58290, 'Q5', ' 12/11/19 17:56:42 ', 0, 1,
111, 58291, '3', ' 12/11/19 17:56:59 ', 17, 1,
111, 58292, 'Q6', ' 12/11/19 17:56:59 ', 0, 1,
111, 58293, '3', ' 12/11/19 17:57:25 ', 26, 1,
111, 58294, 'Q7', ' 12/11/19 17:57:25 ', 0, 1,
111, 58295, '3', ' 12/11/19 17:57:38 ', 13, 1,
111, 58296, 'Q8', ' 12/11/19 17:57:38 ', 0, 1,
111, 58297, '3', ' 12/11/19 17:57:50 ', 12, 1,
111, 58298, 'Q9', ' 12/11/19 17:57:50 ', 0, 1,
111, 58299, 'Timeout', ' 12/11/19 17:58:09 ', 19, 1,
111, 58300, 'Q9', ' 12/11/19 17:58:09 ', 0, 1,
111, 58301, '10', ' 12/11/19 17:58:32 ', 23, 1,
111, 58302, 'Q10', ' 12/11/19 17:58:32 ', 0, 1,
111, 58303, '1', ' 12/11/19 17:58:49 ', 17, 1,
111, 58304, 'Q11', ' 12/11/19 17:58:49 ', 0, 1,
111, 58307, '3', ' 12/11/19 17:59:02 ', 13, 1,
111, 58308, 'Q12', ' 12/11/19 17:59:02 ', 0, 1,
111, 58309, 'Timeout', ' 12/11/19 17:59:23 ', 21, 1,
111, 58310, 'Q13', ' 12/11/19 17:59:23 ', 0, 1,
111, 58311, 'Timeout', ' 12/11/19 17:59:44 ', 21, 1,
111, 58312, 'Q13', ' 12/11/19 17:59:44 ', 0, 1,
111, 58313, '4', ' 12/11/19 17:59:51 ', 7, 1,
111, 58314, 'Q14', ' 12/11/19 17:59:51 ', 0, 1,
111, 58318, '2', ' 12/11/19 18:00:19 ', 28, 1,
111, 58319, 'Q15', ' 12/11/19 18:00:19 ', 0, 1,
111, 58320, '3', ' 12/11/19 18:00:36 ', 17, 1,
111, 58321, 'Q16', ' 12/11/19 18:00:36 ', 0, 1,
111, 58322, '4', ' 12/11/19 18:01:04 ', 28, 1,
111, 58323, 'Q17', ' 12/11/19 18:01:04 ', 0, 1,
111, 58324, '1', ' 12/11/19 18:01:18 ', 14, 1,
111, 58325, 'Q18', ' 12/11/19 18:01:18 ', 0, 1,
111, 58327, '4', ' 12/11/19 18:01:42 ', 24, 1,
111, 58328, 'Q19', ' 12/11/19 18:01:42 ', 0, 1,
111, 58329, '1', ' 12/11/19 18:01:56 ', 14, 1,
111, 58330, 'Q20', ' 12/11/19 18:01:56 ', 0, 1,
111, 58331, '1', ' 12/11/19 18:02:19 ', 23, 1,
111, 58332, 'Q21', ' 12/11/19 18:02:19 ', 0, 1,
111, 58333, '1', ' 12/11/19 18:02:28 ', 9, 1,
111, 58334, 'Q22', ' 12/11/19 18:02:28 ', 0, 1,
111, 58335, '2', ' 12/11/19 18:02:52 ', 24, 1,
111, 58336, 'Completed', ' 12/11/19 18:02:52 ', 0, 0,
111, 58337, 'Complete Message', ' 12/11/19 18:02:52 ', 0, 0,
111, 58338, 'Thank you', ' 12/11/19 18:02:52 ', 0, 0),
nrow=87, ncol=6, byrow=T,
dimnames=list(c(NULL), c("Respondent.ID", "order.ID", "Response", "Datetime", "difftime", "Include"))))
我尝试添加指示开始和停止位置的列,但不知道如何索引开始和停止之间的行。
更新
我发现了一个案例,我们切断了调查,最终切断了一些受访者。发生这种情况时,他们不会在交互数据中获得停止代码。结果,rle 没有获取我们想要捕获的最终运行长度。我一直在摆弄它,但这对我来说是新事物,所以我想我会发布一个上面的测试数据集版本来模拟这种情况。
test2 <- data.frame(matrix(c(
111, 2340, 'Enqueueing call', '12/2/19 14:53:57', NA,
111, 6174, 'Call expired, will be retried in next schedule window', '12/2/19 20:14:22', 19225,
111, 10386, 'Answer', '12/3/19 15:48:56', 70474,
111, 10387, 'Contacted', '12/3/19 15:48:56', 0,
111, 10388, 'Intro', '12/3/19 15:48:56', 0,
111, 10389, 'Timeout', '12/3/19 15:49:16', 20,
111, 10390, 'Intro', '12/3/19 15:49:16', 0,
111, 10391, '1', '12/3/19 15:49:30', 14,
111, 10392, 'Started', '12/3/19 15:49:30', 0,
111, 10393, 'Q1', '12/3/19 15:49:30', 0,
111, 10394, '1', '12/3/19 15:49:45', 15,
111, 10395, 'Q2', '12/3/19 15:49:45', 0,
111, 10396, 'Timeout', '12/3/19 15:49:54', 9,
111, 10397, 'Q2', '12/3/19 15:49:54', 0,
111, 10398, 'Timeout', '12/3/19 15:50:03', 9,
111, 10399, 'Q2', '12/3/19 15:50:03', 0,
111, 10400, 'Timeout', '12/3/19 15:50:11', 8,
111, 17658, 'Timeout. Call failed.', '12/4/19 17:50:27', 93616,
111, 19932, 'Call expired, will be retried in next schedule window', '12/4/19 20:45:17', 10490,
111, 25647, 'Call expired, will be retried in next schedule window', '12/5/19 20:33:27', 85690,
111, 31516, 'Call expired, will be retried in next schedule window', '12/6/19 20:17:18', 85431,
111, 36781, 'Call expired, will be retried in next schedule window', '12/7/19 20:02:16', 85498,
111, 38718, 'Answer', '12/8/19 10:24:07', 51711,
111, 38719, 'Q2', '12/8/19 10:24:07', 0,
111, 38720, 'Timeout', '12/8/19 10:24:16', 9,
111, 38721, 'Q2', '12/8/19 10:24:16', 0,
111, 38722, 'Timeout', '12/8/19 10:24:24', 8,
111, 38723, 'Q2', '12/8/19 10:24:24', 0,
111, 38724, 'Timeout', '12/8/19 10:24:33', 9,
111, 45339, 'Timeout. Call failed.', '12/9/19 12:25:08', 93635,
111, 49026, 'Call expired, will be retried in next schedule window', '12/9/19 20:31:26', 29178,
111, 53972, 'Call expired, will be retried in next schedule window', '12/10/19 20:14:38', 85392,
111, 58277, 'Answer', '12/11/19 17:55:29', 78051,
111, 58278, 'Q2', '12/11/19 17:55:29', 0,
111, 58279, 'Timeout', '12/11/19 17:55:38', 9,
111, 58280, 'Q2', '12/11/19 17:55:38', 0,
111, 58281, 'Timeout', '12/11/19 17:55:46', 8,
111, 58282, 'Q2', '12/11/19 17:55:46', 0,
111, 58283, '61', '12/11/19 17:55:57', 11,
111, 58284, 'Q3', '12/11/19 17:55:57', 0,
111, 58285, '4', '12/11/19 17:56:13', 16,
111, 58286, 'Q4', '12/11/19 17:56:13', 0,
111, 58288, '3', '12/11/19 17:56:42', 29,
111, 58289, 'Interim partial', '12/11/19 17:56:42', 0,
111, 58290, 'Q5', '12/11/19 17:56:42', 0,
111, 58291, '3', '12/11/19 17:56:59', 17,
111, 58292, 'Q6', '12/11/19 17:56:59', 0,
111, 58293, '3', '12/11/19 17:57:25', 26,
111, 58294, 'Q7', '12/11/19 17:57:25', 0,
111, 58295, '3', '12/11/19 17:57:38', 13,
111, 58296, 'Q8', '12/11/19 17:57:38', 0,
111, 58297, '3', '12/11/19 17:57:50', 12,
111, 58298, 'Q9', '12/11/19 17:57:50', 0,
111, 58299, 'Timeout', '12/11/19 17:58:09', 19,
111, 58300, 'Q9', '12/11/19 17:58:09', 0,
111, 58301, '10', '12/11/19 17:58:32', 23,
111, 58302, 'Q10', '12/11/19 17:58:32', 0,
111, 58303, '1', '12/11/19 17:58:49', 17,
111, 58304, 'Q11', '12/11/19 17:58:49', 0,
111, 58307, '3', '12/11/19 17:59:02', 13,
111, 58308, 'Q12', '12/11/19 17:59:02', 0,
111, 58309, 'Timeout', '12/11/19 17:59:23', 21,
111, 58310, 'Q13', '12/11/19 17:59:23', 0,
111, 58311, 'Timeout', '12/11/19 17:59:44', 21,
111, 58312, 'Q13', '12/11/19 17:59:44', 0,
111, 58313, '4', '12/11/19 17:59:51', 7,
111, 58314, 'Q14', '12/11/19 17:59:51', 0,
111, 58318, '2', '12/11/19 18:00:19', 28
),
nrow=68, ncol=5, byrow=T,
dimnames=list(c(NULL), c("Respondent.ID", "order.ID", "Interaction", "Datetime", "difftime"))))
【问题讨论】: