【问题标题】:I want to assign "day" and"night" variables based on maximum duration inside and outside "08:00:00-20:00:00"我想根据“08:00:00-20:00:00”内外的最大持续时间分配“白天”和“夜晚”变量
【发布时间】:2019-09-03 14:02:56
【问题描述】:

我正在尝试在 DateTime 数据库中添加一个新变量,当它不拦截“08:00:00”/“20:00:00”但当它根据 08:00-20:00(白天)或 20:00-08:00(晚上)内花费的最长时间截取我想分配“白天”或“夜晚”的这两个时间点。

    #Current input
    pacman::p_load(pacman,lubridate,chron)
    id<-c("m1","m1","m1","m2","m2","m2","m3","m4","m4")
    x<-c("1998-01-03 10:00:00","1998-01-03 16:00:00","1998-01-03 19:20:00","1998-01-04 00:50:00","1998-01-06 11:20:00","1998-01-06 20:50:00","1998-01-06 22:00:00","1998-01-07 06:30:00","1998-01-07 07:50:00")
    start<-as.POSIXct(x,"%Y-%m-%d %H:%M:%S",tz="UTC")
    y<-c("1998-01-03 16:00:00","1998-01-03 19:20:00","1998-01-04 00:50:00","1998-01-06 11:20:00","1998-01-06 20:50:00","1998-01-06 22:00:00","1998-01-07 07:40:00","1998-01-07 07:50:00","1998-01-07 08:55:00")
    end<-as.POSIXct(y,"%Y-%m-%d %H:%M:%S",tz="UTC")
    mydata<-data.frame(id,start,end)

    #Current output
    df1 <- mydata %>%
      mutate(start1 = as.POSIXct(sub("\\d+-\\d+-\\d+", Sys.Date(), start)),
             end1 = as.POSIXct(sub("\\d+-\\d+-\\d+", Sys.Date(), end)),
             day.night = case_when(start1 >= as.POSIXct('08:00:00', format = "%T") &
                 end1 >= as.POSIXct('08:00:00', format = "%T") &
                 end1 < as.POSIXct('20:00:00', format = "%T") ~ "day",
               start1 >= as.POSIXct('20:00:00', format = "%T") &
              (start1 < as.POSIXct('08:00:00', format = "%T") | end1 < as.POSIXct('23:00:00', format = "%T"))|
                (start1 < as.POSIXct('08:00:00', format = "%T") & end1 < as.POSIXct('08:00:00', format = "%T")) ~ "night",
              difftime(as.POSIXct('20:00:00', format = "%T"), start1) > difftime(end1, as.POSIXct('20:00:00', format = "%T")) ~ "day",
              difftime(as.POSIXct('20:00:00', format = "%T"), start1) < difftime(end1, as.POSIXct('20:00:00', format = "%T")) ~ "night",
                       TRUE ~ "mixed"))

当前输出错误分配了截取 08:00-20:00 的任何时段

即第 3 行应该 = "night",因为 4hrs50mins 是 "night" 而 40 mins 是 "day"

第 4 行应该 =“夜晚”,因为 31 小时 50 分钟是“夜晚”,而 28 小时 20 分钟是“白天”

    #Current table 
    id               start                 end              start1                end1 day.night
    1 m1 1998-01-03 10:00:00 1998-01-03 16:00:00 2019-09-03 10:00:00 2019-09-03 16:00:00       day
    2 m1 1998-01-03 16:00:00 1998-01-03 19:20:00 2019-09-03 16:00:00 2019-09-03 19:20:00       day
    3 m1 1998-01-03 19:20:00 1998-01-04 00:50:00 2019-09-03 19:20:00 2019-09-03 00:50:00       day
    4 m2 1998-01-04 00:50:00 1998-01-06 11:20:00 2019-09-03 00:50:00 2019-09-03 11:20:00       day
    5 m2 1998-01-06 11:20:00 1998-01-06 20:50:00 2019-09-03 11:20:00 2019-09-03 20:50:00       day
    6 m2 1998-01-06 20:50:00 1998-01-06 22:00:00 2019-09-03 20:50:00 2019-09-03 22:00:00     night
    7 m3 1998-01-06 22:00:00 1998-01-07 07:40:00 2019-09-03 22:00:00 2019-09-03 07:40:00     night
    8 m4 1998-01-07 06:30:00 1998-01-07 07:50:00 2019-09-03 06:30:00 2019-09-03 07:50:00     night
    9 m4 1998-01-07 07:50:00 1998-01-07 08:55:00 2019-09-03 07:50:00 2019-09-03 08:55:00       day

【问题讨论】:

    标签: r dataframe datetime lubridate


    【解决方案1】:
    library(dplyr)
    library(lubridate)
    library(chron)
    
    id<-c("m1","m1","m1","m2","m2","m2","m3","m4","m4")
    x<-c("1998-01-03 10:00:00","1998-01-03 16:00:00","1998-01-03 19:20:00","1998-01-04 00:50:00","1998-01-06 11:20:00","1998-01-06 20:50:00","1998-01-06 22:00:00","1998-01-07 06:30:00","1998-01-07 07:50:00")
    start<-as.POSIXct(x,"%Y-%m-%d %H:%M:%S",tz="UTC")
    y<-c("1998-01-03 16:00:00","1998-01-03 19:20:00","1998-01-04 00:50:00","1998-01-06 11:20:00","1998-01-06 20:50:00","1998-01-06 22:00:00","1998-01-07 07:40:00","1998-01-07 07:50:00","1998-01-07 08:55:00")
    end<-as.POSIXct(y,"%Y-%m-%d %H:%M:%S",tz="UTC")
    mydata<-data.frame(id,start,end)
    
    #Current output
    df1 <- mydata %>%
      mutate(i = interval(start, end),
             total_interval_length = time_length(i, unit = "hour")) %>% 
    
      # Calculate daytime hours on first and last days
      mutate(first_day = floor_date(start, unit = "day"),
             last_day = floor_date(end, unit = "day")) %>% 
      mutate(first_day_daytime = 
               interval(update(first_day, hour = 8), update(first_day, hour = 20)),
             last_day_daytime = 
               interval(update(last_day, hour = 8), update(last_day, hour = 20))) %>% 
      mutate(first_day_overlap = 
               coalesce(as.numeric(as.duration(intersect(first_day_daytime, i)), "hour"),0),
             last_day_overlap = 
               coalesce(as.numeric(as.duration(intersect(last_day_daytime, i)), "hour"),0)
             ) %>% 
    
      # Calculate total daytime hours
      # For rows of one date only, that is just first_day_overlap (or last_day_overlap since it's the same day)
      # For rows in multiple dates, it's the first_day_overlap plus last_day_overlap plus 12 hours for each day in between
      mutate(daytime_length = 
               ifelse(first_day == last_day,
                      first_day_overlap, 
                      first_day_overlap + last_day_overlap + 
                        12*(as.numeric(as.duration(interval(first_day, last_day)), "day")-1))
             ) %>% 
    
      # Assign day or night classification
      mutate(day_night = ifelse(daytime_length >= total_interval_length - daytime_length, "day", "night"))
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2013-07-12
      • 1970-01-01
      • 2011-06-27
      • 2019-12-07
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多