【发布时间】:2022-06-18 05:19:57
【问题描述】:
/* Data Setup */
DROP TABLE IF EXISTS #DaysPerJob;
CREATE TABLE #DaysPerJob
(
GroupID INT, JobDesc VARCHAR(100), StartDate DATE, EndDate DATE
)
INSERT INTO #DaysPerJob(GroupID, JobDesc, StartDate, EndDate)
VALUES
(23293, 'Food Prep', '2017-03-01', '2017-07-17')
, (23293, 'Finisher', '2021-11-19', NULL)
, (23293, 'Cashier', '2021-12-06', '2021-12-10')
, (26208, '3rd SHift Stocker', '2019-09-25', '2020-11-05')
, (26208, 'Order Fulfillment Assoc', '2020-08-05', '2021-04-16')
, (26208, 'Customer Service Rep', '2021-05-10', '2021-10-15')
, (26208, 'Delivery Driver', '2021-11-15', NULL)
, (26208, 'Another Job', '2022-02-23', '2022-03-02')
, (26208, 'Same Day Job Start as Prev Job End', '2022-03-01', NULL)
--SELECT * FROM #DaysPerJob dpj ORDER BY dpj.GroupID, dpj.StartDate, dpj.EndDate
/* Days Per Job Calculations - Attempts */
SELECT dj.GroupID, dj.JobDesc, dj.StartDate, dj.EndDate
, LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.GroupID, dj.StartDate, dj.EndDate) AS PreviousJobEndDate
, DATEDIFF(DAY, dj.StartDate, IsNull(dj.EndDate, GetDate())) AS daysPerJob
FROM #DaysPerJob dj
ORDER BY dj.GroupID, dj.StartDate, dj.EndDate
我如何获得每组独特天数的总和?
上面的 SQL 将为您提供一个工作记录表。每个职位都有一个开始日期,但并非所有职位都有一个结束日期,这意味着他们仍然受雇于该职位。
我一直在努力解决的问题是如何计算独特的工作天数。使用 DATEDIFF 函数简单地计算每个作业的天数非常容易,但是我目前无法计算同一范围内的其他作业,因为它会将这些天数计算两次。
我按开始日期排序,然后使用 LAG 将最后一个作业的结束日期与下一个作业的开始日期进行比较。如果当前工作开始日期
但是上述情况存在问题...如果我的上一份工作没有结束日期,或者如果最后一份工作的结束日期也是 > 当前的工作结束日期怎么办?这意味着整个当前工作与上一个工作在同一范围内,因此我们不应计算任何天数,并且天数将变为 0,因此当计算总天数时,它不会计算该天数工作。这是最后一个问题,我不知道是哪个问题导致我在 Stack Overflow 上发布这个问题。
/* Some SQL below of some things I have tried */
/* Days Per Job Calculations - Attempts */
SELECT dj.GroupID, dj.JobDesc, dj.StartDate, dj.EndDate
, LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.GroupID, dj.StartDate, dj.EndDate) AS PreviousJobEndDate
/* Check if next record is within same date range. The idea here is if the job is within the
| same Range we replace the current Jobs Start Date with the last Jobs End Date
*/
, CASE WHEN ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) >= dj.StartDate
AND ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) <= dj.EndDate
THEN IsNull( ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ), GetDate() )
ELSE dj.StartDate
END AS StartDateForSet
/* The below CASE is the same logic as the above CASE but just an output stating if the
| next job was found to be within the same range or if a NEW Set has begun.
*/
, CASE WHEN ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) >= dj.StartDate
AND ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) <= dj.EndDate
THEN 'InRange'
ELSE 'NewSet'
END AS withinRangeCheck
, DATEDIFF(DAY, dj.StartDate, IsNull(dj.EndDate, GetDate())) AS daysPerJob
/* This is the field that I want to use to eventually SUM using GROUPing and aggregate functions however I first
| need to get it to correctly output the unique days. If the current job falls within the previous jobs date
| range the idea is that this calculation would account for that and move the End Date accordingly so it either
| does NOT count any days within the new job or counts the trailing days should the job end date fall after the previous job.
*/
, DATEDIFF(DAY /* StartDate */
, (CASE WHEN( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) >= dj.StartDate
AND ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) <= dj.EndDate
THEN IsNull( ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ), GetDate() )
ELSE dj.StartDate
END
)
/* EndDate If Null Use Current Date */
, IsNull(dj.EndDate, GetDate())
) AS DaysEmployedWithinSet
FROM #DaysPerJob dj
ORDER BY dj.GroupID, dj.StartDate, dj.EndDate
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-| -|-|-|-|-|-|-|-|-|-|-|-|
此问题的解决方案如下,基于选择的正确发布答案
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-| -|-|-|-|-|-|-|-|-|-|-|-|
我真的认为这个问题会有更多答案,但这并不容易……至少这不是我的问题,我的同事也无法回答。无论如何,这个问题有两个答案。一个帖子,无论它多么接近,都没有准确计算出受雇天数。我对数据和 Excel 中的计算进行了三次检查,并根据本示例中提供的数据集,总计应该如下所示在 SQL Server 版本中使用递归 CTE 创建日期表。
/* SUM Unique Days in Multiple Date Range Records (SQL Server).sql
| SQL Server Example
| Desc: The below shows how to obtain the unique days employed. Meaning we don't count the
| same day twice should an individual be employed at more than job at any given time.
*/
/* Data Setup */
DROP TABLE IF EXISTS #DaysPerJob;
CREATE TABLE #DaysPerJob
(
GroupID INT, JobDesc VARCHAR(100), StartDate DATE, EndDate DATE
)
INSERT INTO #DaysPerJob(GroupID, JobDesc, StartDate, EndDate)
VALUES
(23293, 'Food Prep', '2017-03-01', '2017-07-17')
, (23293, 'Finisher', '2021-11-19', NULL)
, (23293, 'Starter', '2021-11-21', '2021-12-13')
, (23293, 'Cashier', '2021-12-06', '2021-12-10')
, (26208, '3rd SHift Stocker', '2019-09-25', '2020-11-05')
, (26208, 'Order Fulfillment Assoc', '2020-08-05', '2021-04-16')
, (26208, 'Customer Service Rep', '2021-05-10', '2021-10-15')
, (26208, 'Delivery Driver', '2021-11-15', NULL)
, (26208, 'Another Job', '2022-02-23', '2022-03-02')
, (26208, 'Same Day Job Start as Prev Job End', '2022-03-01', NULL)
;
/* Using a Recursive CTE to produce a dates table to later be JOINed on */
WITH Dates(date) AS
(
SELECT MIN(StartDate) AS date
FROM #DaysPerJob
UNION ALL
SELECT DATEADD(DAY, 1, date)
FROM Dates
WHERE date < GetDate()
)
, ranked AS
( /* Needing to rank each job record in order to later remove the overlapping days when employed at more than one job at one time. */
SELECT j.*, d.*
, ROW_NUMBER() OVER (PARTITION BY j.GroupID, d.date ORDER BY j.GroupID, j.StartDate, IsNull(j.EndDate, GetDate())) AS ranker
FROM Dates d
LEFT JOIN #DaysPerJob j ON j.StartDate <= d.date
AND IsNull(j.EndDate, GetDate()) >= d.date
WHERE j.GroupID IS NOT NULL /* This filter removes all days in the Dates table where there was no employment */
--AND j.GroupID = 26208 --23293
--ORDER BY d.date, j.StartDate, IsNull(j.EndDate, GetDate()), j.GroupID
--OPTION (MaxRecursion 0)
)
/* Non Aggregate Data - UnComment to view */
/*
SELECT * FROM ranked r WHERE r.GroupID IS NOT NULL
ORDER BY r.date, r.StartDate, IsNull(r.EndDate, GetDate()), r.GroupID
OPTION (MaxRecursion 0)
*/
/* Aggregated Data */
SELECT r.GroupID, COUNT(*) AS daysEmployed, MIN(date) AS minStartDate, MAX(date) AS maxEndDate
, DATEDIFF(DAY, MIN(date), MAX(date)) AS TotalDaysInRange
/* To get total number of days NOT employed we simply take the TotalDaysInRange and subtract the daysEmployed */
, DATEDIFF(DAY, MIN(date), MAX(date)) - COUNT(*) AS unEmployedDays
FROM ranked r
WHERE r.ranker = 1
GROUP BY r.GroupID
ORDER BY r.GroupID
OPTION (MaxRecursion 0) /* The default MaxRecursion setting is 100. Generating more than 100 dates using this method will require the Option (MaxRecursion N) segment of the query, where N is the desired MaxRecursion setting. Setting this to 0 will remove the MaxRecursion limitation altogether */
根据截至 22 年 2 月 6 日发布的今天的屏幕截图,总数为:
GroupID 23293 : 335 天工作
GroupID 26208 : 929 天工作
这篇 SO Post 提供了有关如何填充日期表的出色示例,其中一些答案无需使用 Option (MaxRecursion) 即可完成这一壮举
【问题讨论】:
标签: sql-server