【发布时间】:2020-10-18 06:39:07
【问题描述】:
我在表 'QWERTY' 中有一个 VARIANT 列调用 'REQUEST',其中包含类似 JSON 的数组对象
{
"ID": "123123",
"workflowHistory": [
{
"id": "666",
"workflowType": "CCC",
"entityId": "123123",
"creator": {
"id": "503081",
"displayName": "AGENT2",
"email": "AGENT2@SOMETHING.com",
"userAvatarUrl": "XXXXXXX"
},
"createdDate": "2020-04-30T21:58:09Z",
"deletor": null,
"deletedDate": null,
"clientId": "000000000",
"value": "00000000"
},
{
"id": "555",
"workflowType": "AAA",
"entityId": "123123",
"creator": {
"id": "503080",
"displayName": "AGENT1",
"email": "AGENT1@SOMETHING.com",
"userAvatarUrl": "XXXXXXX"
},
"createdDate": "2020-04-30T21:55:09Z",
"deletor": null,
"deletedDate": null,
"clientId": "000000000",
"value": "00000000"
},
{
"id": "444",
"workflowType": "xyz",
"entityId": "123123",
"creator": {
"id": "503080",
"displayName": "AGENT1",
"email": "AGENT1@SOMETHING.com",
"userAvatarUrl": "XXXXXXX"
},
"createdDate": "2020-04-30T21:19:09Z",
"deletor": null,
"deletedDate": null,
"clientId": "000000000",
"value": "00000000"
},
{
"id": "333",
"workflowType": "BBB",
"entityId": "123123",
"creator": {
"id": "503079",
"displayName": "AGENT0",
"email": "AGENT0@SOMETHING.com",
"userAvatarUrl": "XXXXXXX"
},
"createdDate": "2020-04-30T21:10:09Z",
"deletor": null,
"deletedDate": null,
"clientId": "000000000",
"value": "00000000"
},
{
"id": "222",
"workflowType": "ZZZ",
"entityId": "123123",
"creator": {
"id": "503079",
"displayName": "AGENT0",
"email": "AGENT0@SOMETHING.com",
"userAvatarUrl": "XXXXXXX"
},
"createdDate": "2020-04-30T21:08:09Z",
"deletor": null,
"deletedDate": null,
"clientId": "000000000",
"value": "00000000"
}
]
}
另外,'QWERTY' 表有 HAVERST_DATE 和 PK ARTICLE_ID(与 REQUEST:workflowHistory.ID 相同),我正在尝试获取包含以下列的输出:
- 身份证
- AGENTn 的上次创建日期
- AGENTn 的首次创建日期
- 上一个由 AGENTn-1 创建的 createdDate
- 下一个由 AGENTn+1 创建的 createdDate
我想要这样的输出:
为此,我正在构建如下查询:
使用 WorkFlow_Parsed AS(
SELECT ARTICLE_ID,
HARVEST_DATE,
value:createdDate::timestamp_tz AS create_date,
value:creator:email AS email,
value:workflowType AS workflowType,
value:value AS value
FROM 'QWERTY', lateral flatten( input => REQUEST:workflowHistory )
),
lag_Agent_timing AS
(SELECT
WorkFlow_Parsed.ARTICLE_ID AS ARTICLE_ID,WorkFlow_Parsed.email,LAG(WorkFlow_Parsed.create_date) IGNORE NULLS over (partition by WorkFlow_Parsed.email,WorkFlow_Parsed.ARTICLE_ID order by WorkFlow_Parsed.create_date) AS lag_date_value
FROM WorkFlow_Parsed),
lead_agent_timing AS
(SELECT
WorkFlow_Parsed.ARTICLE_ID AS ARTICLE_ID,WorkFlow_Parsed.email,LEAD(WorkFlow_Parsed.create_date) IGNORE NULLS over (partition by WorkFlow_Parsed.email,WorkFlow_Parsed.ARTICLE_ID order by WorkFlow_Parsed.create_date) AS lead_date_value
FROM WorkFlow_Parsed)
SELECT
DISTINCT
WorkFlow_Parsed.ARTICLE_ID AS _ARTICLE_ID,
WorkFlow_Parsed.email AS _email,
last_value(WorkFlow_Parsed.create_date) over (partition by WorkFlow_Parsed.email,WorkFlow_Parsed.ARTICLE_ID order by WorkFlow_Parsed.create_date) AS last_date_value,
first_value(WorkFlow_Parsed.create_date) over (partition by WorkFlow_Parsed.email,WorkFlow_Parsed.ARTICLE_ID order by WorkFlow_Parsed.create_date) AS first_date_value,
MAX(lag_Agent_timing.lag_date_value),
MIN(lead_agent_timing.lead_date_value)
FROM WorkFlow_Parsed
JOIN lag_Agent_timing ON WorkFlow_Parsed.ARTICLE_ID=lag_Agent_timing.ARTICLE_ID AND lag_Agent_timing.email=WorkFlow_Parsed.email
JOIN lead_agent_timing ON WorkFlow_Parsed.ARTICLE_ID=lead_agent_timing.ARTICLE_ID AND lead_agent_timing.email=WorkFlow_Parsed.email
GROUP BY _ARTICLE_ID,_email
但我收到错误消息:“[SYS_VW.CREATE_DATE_1] 不是有效的按表达式分组”`
我该如何解决?
【问题讨论】:
标签: sql json snowflake-cloud-data-platform lag flatten