【问题标题】:Aggregate SQL query across multiple tables by grouping通过分组跨多个表聚合 SQL 查询
【发布时间】:2020-08-05 01:36:51
【问题描述】:

我想对多个表中的记录进行分组。

样本数据:

create table UserTable (
  Id integer not null,
  Name varchar(12) not null
);

insert into UserTable  values (1,  'A B');
insert into UserTable  values (2,  'A C');
insert into UserTable  values (3,  'A C A C');
insert into UserTable  values (4,  'A C C');
insert into UserTable  values (5,  'A C B');
insert into UserTable  values (6,  'A C C');
insert into UserTable  values (7,  'A C D');
insert into UserTable  values (8,  'A C E');
insert into UserTable  values (9,  'A C F');


create table LogTable (
  LogId integer not null,
  Username varchar(12) not null,
  Event varchar(12) not null
);

insert into LogTable  values (1, 'A C A C', 'Read');
insert into LogTable  values (2, 'A C F', 'Write');
insert into LogTable  values (3, 'A C F', 'Read');
insert into LogTable  values (4, 'A C C', 'Update');
insert into LogTable  values (5,'A C C', 'Read');
insert into LogTable  values (6,'A C F', 'Read');
insert into LogTable  values (7,'A C F', 'Update');
insert into LogTable  values (7,'A C F', 'Write');
insert into LogTable  values (7,'A C E','Update');
insert into LogTable  values (7,'A C F', 'Delete');
insert into LogTable  values (10,'A C B', 'Delete');
insert into LogTable  values (11, 'A C F','Copy');
insert into LogTable  values (12, 'A C B','Read');
insert into LogTable  values (13, 'A C F','Update');
insert into LogTable  values (14, 'A C F','Copy');
insert into LogTable  values (15, 'A C F','Read');
insert into LogTable  values (16, 'A C F','Update');
insert into LogTable  values (17, 'A C F','Copy');
insert into LogTable  values (18, 'A C C','Read');
insert into LogTable  values (19, 'A C D','Update');

create table Activity (
  Id integer not null,
  ActivityType varchar(12) not null,
  UserId varchar(12) not null
);

insert into Activity  values (1, 'Videos', 8);
insert into Activity  values (2, 'Text',   7);
insert into Activity  values (3, 'Page',   7);
insert into Activity  values (4, 'Text',   7);
insert into Activity  values (5, 'Text',   9);
insert into Activity  values (6, 'Chat',   8);
insert into Activity  values (7, 'Chat',   5);
insert into Activity  values (7, 'File',   8);
insert into Activity  values (7, 'Videos', 1);
insert into Activity  values (7, 'Text',   4);
insert into Activity  values (10, 'Image', 4);
insert into Activity  values (11, 'Image', 6);
insert into Activity  values (12, 'Chat',  3);
insert into Activity  values (13, 'Chat',  2);
insert into Activity  values (14, 'Page',  1);
insert into Activity  values (15, 'Vidoes',1);
insert into Activity  values (16, 'Vidoes',6);
insert into Activity  values (17, 'Vidoes',5);
insert into Activity  values (18, 'Vidoes',5);
insert into Activity  values (19, 'Chat',  5);

我尝试过的:

SELECT UT.Id,UT.Name,
SUM(CASE 
WHEN LT.Event = 'Read'  THEN 1 
ELSE 0 END
)  AS [USER READ],
SUM(CASE 
WHEN LT.Event = 'Delete'    THEN 1 
ELSE 0 END
)  AS [USER DELETE],
SUM(CASE 
WHEN AC.ActivityType = 'Videos' THEN 1 
WHEN AC.ActivityType = 'Text'   THEN 1  
WHEN AC.ActivityType = 'Page'   THEN 1  
WHEN AC.ActivityType = 'Image'  THEN 1 
ELSE 0 END
)  AS [LEARNING ACTIVITY],
SUM(CASE WHEN AC.ActivityType = 'Chat' THEN 1 ELSE 0 END)  AS [Chat]
FROM UserTable UT
LEFT JOIN Activity AC ON UT.Id = AC.UserId
LEFT JOIN LogTable LT ON LT.Username = UT.Name
GROUP BY UT.Id, UT.Name
        

期望的输出:

Id  |   Name    |   LEARNING ACTIVITY | Chat  | USER READ | USER DELETE|
------------------------------------------------------------------------
1   |   A B     |   2                 | 0     |     0     |     0      |
2   |   A C     |   0                 | 1     |     0     |     0      |
3   |   A C A C |   0                 | 1     |     1     |     0      |
4   |   A C C   |   2                 | 0     |     2     |     0      |
5   |   A C B   |   0                 | 2     |     1     |     1      |
6   |   A C R   |   1                 | 0     |     0     |     0      |
7   |   A C D   |   3                 | 0     |     0     |     0      |
8   |   A C E   |   1                 | 1     |     0     |     0      |
9   |   A C F   |   1                 | 0     |     3     |     1      |

如何聚合两个不相关的表并按IdName 分组?

  1. 用户一起加入和汇总活动
  • 学习活动是(视频、文本、页面和图像)作为ActivityType的总和
  • Chat 是 Chat 为 ActivityType 的所有行
  1. Users 一起加入和聚合 LogTable

【问题讨论】:

    标签: sql sql-server


    【解决方案1】:

    你应该在加入之前进行聚合,这样可以避免多对多的加入导致过度计数:

    SELECT UT.Id,UT.Name, 
      coalesce([LEARNING ACTIVITY],0),
      coalesce([Chat],0),
      coalesce([USER READ],0),
      coalesce([USER DELETE],0)
    FROM UserTable UT
    LEFT JOIN 
     (
       select UserId, 
          SUM(CASE 
                WHEN ActivityType = 'Videos' THEN 1 
                WHEN ActivityType = 'Text'   THEN 1  
                WHEN ActivityType = 'Page'   THEN 1  
                WHEN ActivityType = 'Image'  THEN 1 
                ELSE 0
              END)  AS [LEARNING ACTIVITY],
           SUM(CASE WHEN ActivityType = 'Chat' THEN 1 ELSE 0 END)  AS [Chat]
       from Activity
       group by UserId
     ) AC
    ON UT.Id = AC.UserId
    LEFT JOIN 
     (
       select Username, 
          SUM(CASE 
                WHEN Event = 'Read'  THEN 1 
                ELSE 0 END
             )  AS [USER READ],
          SUM(CASE 
                WHEN Event = 'Delete'    THEN 1 
                ELSE 0 END
             )  AS [USER DELETE]
       from LogTable
       group by UserName
      ) LT
    ON LT.Username = UT.Name
    

    fiddle

    简化 CASE(COALESCE 处理 NULL):

    SELECT UT.Id,UT.Name, 
      coalesce([LEARNING ACTIVITY],0),
      coalesce([Chat],0),
      coalesce([USER READ],0),
      coalesce([USER DELETE],0)
    FROM UserTable UT
    LEFT JOIN 
     (
       select UserId, 
          SUM(CASE WHEN ActivityType IN ('Videos','Text','Page','Image') THEN 1 END)  AS [LEARNING ACTIVITY],
          SUM(CASE WHEN ActivityType = 'Chat' THEN 1 END)  AS [Chat]
       from Activity
       group by UserId
     ) AC
    ON UT.Id = AC.UserId
    LEFT JOIN 
     (
       select Username, 
          SUM(CASE WHEN Event = 'Read'   THEN 1 END)  AS [USER READ],
          SUM(CASE WHEN Event = 'Delete' THEN 1 END)  AS [USER DELETE]
       from LogTable
       group by UserName
      ) LT
    ON LT.Username = UT.Name
    

    【讨论】:

      【解决方案2】:

      我建议您使用窗口函数和外部应用来执行您需要的聚合。这是我提出的与您的预期结果相匹配的查询:

      https://docs.microsoft.com/en-us/sql/t-sql/queries/select-over-clause-transact-sql?view=sql-server-ver15

      https://docs.microsoft.com/en-us/u-sql/statements-and-expressions/select/from/select-selecting-from-cross-apply-and-outer-apply

      select distinct
          UserId,
          UT.Name,
          sum(case when A.ActivityType in ('Videos','Text','Page','Image') then 1 else 0 end) over (partition by UserId) [Learning Activity],
          sum(case when A.ActivityType = 'Chat' then 1 else 0 end) over (partition by UserId) Chat,
          coalesce(LT2.[User Read],0) as [User Read],
          coalesce(LT2.[User Delete],0) as [User Delete]
      from Activity A
      join UserTable UT
          on A.UserId = UT.Id
      outer apply
      (
          select distinct
              sum(case when LT.[Event] = 'Read' then 1 else 0 end) over (partition by UserId) [User Read],
              sum(case when LT.[Event] = 'Delete' then 1 else 0 end) over (partition by UserId) [User Delete]
          from LogTable LT
          where LT.Username = UT.Name
      
      ) LT2
      

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2017-08-20
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2013-08-22
        • 1970-01-01
        相关资源
        最近更新 更多