【问题标题】:Pick Different values from a column and write values correspondingly in R从列中选择不同的值并在 R 中相应地写入值
【发布时间】:2019-03-15 04:08:29
【问题描述】:

我有一个看起来像这样的数据框:

可重复的数据:

structure(list(User = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Jibran", class = "factor"), 
    Event = structure(c(1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 
    1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("IN", 
    "OUT"), class = "factor"), Time = c("04/15/2015 00:31", "04/16/2015 20:10", 
    "04/21/2015 14:59", "04/22/2015 01:01", "04/22/2015 10:46", 
    "04/23/2015 00:58", "04/23/2015 14:50", "04/24/2015 01:37", 
    "04/25/2015 01:01", "04/27/2015 00:57", "04/17/2015 10:32", 
    "04/29/2015 15:03", "05/01/2015 00:44", "05/02/2015 01:19", 
    "05/02/2015 15:08", "05/03/2015 01:08", "05/03/2015 15:06", 
    "05/04/2015 01:01", "05/04/2015 15:11", "05/05/2015 01:08"
    )), row.names = c(NA, -20L), class = "data.frame")

我正在寻找的值是事件发生变化的值, 例如:

用户 |活动 |时间

Jibran IN 2015 年 4 月 21 日 14:59 Jibran OUT 2015 年 4 月 22 日 1:01 .. .. 下一个事件不同时发生的值(仅当事件有 In 后跟 Out 时才有值)

我解决这个问题的方法是:

x = read.csv("TimeLog2.csv",header=TRUE)
df <- data.frame(matrix(ncol = 3, nrow = 0))

names(df)[1]<-paste("UserName")
names(df)[2]<-paste("Login")
names(df)[3]<-paste("Logout")


for(i in 1:length(x$Event))
{
   if(x$Event[[i]]== 'IN' && x$Event[[i+1]]== 'OUT'){
   df$Login[[i]]<-(x$Time[[i]])
   df$Logout[[i]]<-(x$Time[[i+1]])
   }
}

返回:

$&lt;-.data.frame(*tmp*, "Login", value = c(NA, NA, 4L)) 中的错误: 替换有3行,数据有0

所需的输出应如下所示:

要确保的一件事是,只有在同一天或第二天(根据日期)发生的事件更改才应写入下一个数据帧,以获得准确的登录/注销值。


【问题讨论】:

  • 您需要一个解决方案。我要数据。
  • 附图是数据集。 @AndreElrico
  • 我不能处理图片,我只能看它们。
  • 发布一个尽可能接近您的真实数据集的示例也很好。例如,为多个用户发布一个示例数据集(2 个用户就足够了)。
  • 您可以通过复制以下结果来共享您的数据:dput(YOURDATA)

标签: r excel datetime dataframe time


【解决方案1】:
#df1 <-
#    read.csv2("TimeLog2.csv", sep = ",")[,1:3]

library(data.table)

df1$Time2 <- df1$Time %>% as.Date(., format = "%m/%d/%Y", tz = 'GMT')
df1$grp   <- shift(cumsum(df1$Event == "OUT"), 1 , 0)

setDT(df1)[, dataDiff := c(.SD$Time2[-.N] - .SD$Time2[.N] > -2 , F)  ,by=grp]
df1 <- df1[, .SD[as.logical(cumsum(.SD$dataDiff)),], by=grp][,`:=`(dataDiff = NULL, Time2 = NULL)][]

dcast(df1, User + grp ~ Event)[,`:=`(grp = NULL)][]

结果:

#     User              IN            OUT
#1: Jibran 4/21/2015 14:59 4/22/2015 1:01
#2: Jibran 4/22/2015 10:46 4/23/2015 0:58
#3: Jibran 4/23/2015 14:50 4/24/2015 1:37
#4: Jibran  5/2/2015 15:08  5/3/2015 1:08
#5: Jibran  5/3/2015 15:06  5/4/2015 1:01
#6: Jibran  5/4/2015 15:11  5/5/2015 1:08

【讨论】:

    【解决方案2】:

    数据

    df = structure(list(User = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Jibran", class = "factor"), 
    Event = structure(c(1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 
    1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("IN", 
    "OUT"), class = "factor"), Time = structure(c(9L, 10L, 12L, 
    13L, 14L, 15L, 16L, 17L, 18L, 19L, 11L, 20L, 1L, 2L, 3L, 
    4L, 5L, 6L, 7L, 8L), .Label = c("05/01/2015 00:44", "05/02/2015 01:19", 
    "05/02/2015 15:08", "05/03/2015 01:08", "05/03/2015 15:06", 
    "05/04/2015 01:01", "05/04/2015 15:11", "05/05/2015 01:08", 
    "4/15/2015 0:31", "4/16/2015 20:10", "4/17/2015 10:32", "4/21/2015 14:59", 
    "4/22/2015 1:01", "4/22/2015 10:46", "4/23/2015 0:58", "4/23/2015 14:50", 
    "4/24/2015 1:37", "4/25/2015 1:01", "4/27/2015 0:57", "4/29/2015 15:03"
    ), class = "factor")), class = "data.frame", row.names = c(NA, 
    -20L))
    

    解决方案

    library(dplyr)
    library(tidyverse)
    library(data.table)
    
    df %>%
      mutate(Time = mdy_hm(Time)) %>%                           # update to date variables
      group_by(id = rleid(Event)) %>%                           # create a grouping variable
      filter((Event == "IN" & Time == max(Time)) |              # keep max time for IN
             (Event == "OUT" & Time == min(Time))) %>%          # keep min time for OUT
      ungroup() %>%                                             # forget the grouping
      mutate(id = cumsum(Event == "IN")) %>%                    # create a new grouping variable
      spread(Event, Time) %>%                                   # reshape data
      filter(ceiling(difftime(OUT, IN, units="days")) < 2) %>%  # exclude cases where difference in time is 2+ days
      select(-id)                                               # remove grouping variable
    
    # # A tibble: 6 x 3
    #     User   IN                  OUT                
    #    <fct>  <dttm>              <dttm>             
    # 1 Jibran 2015-04-21 14:59:00 2015-04-22 01:01:00
    # 2 Jibran 2015-04-22 10:46:00 2015-04-23 00:58:00
    # 3 Jibran 2015-04-23 14:50:00 2015-04-24 01:37:00
    # 4 Jibran 2015-05-02 15:08:00 2015-05-03 01:08:00
    # 5 Jibran 2015-05-03 15:06:00 2015-05-04 01:01:00
    # 6 Jibran 2015-05-04 15:11:00 2015-05-05 01:08:00
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2020-12-20
      • 2021-10-25
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多