【发布时间】:2021-10-21 10:41:58
【问题描述】:
我正在尝试将此 pl/pgSQL 函数转换为在 Snowflake 上工作。不幸的是,我刚开始使用 Snowflake,无法正确转换它。 Snowflake 计划在今年年底之前支持 pgSQL 查询,不幸的是,目前还没有实现..
这里简要介绍一下,让您了解我的表存储的内容以及函数的作用。 我有三张桌子。您可以在此问题的底部找到所有表和示例数据的 DDL 语句。
- Events - 这是源表。
- 持续时间 - 这是目标表。
- 属性 - 这是一个用于引用的表格,每当调用函数时都会更新。
我每天一次将数据导入事件表。我们感兴趣的事件是 device_types 1 和 2(进入/退出)的事件。然后我运行我的函数,它计算具有相同 card_nr 的事件之间的正确持续时间。之后,我将这些持续时间导入到我的持续时间表中并更新属性表。
以下是事件示例:
下面是函数被调用后的持续时间示例:
我需要介绍的最重要的事情是:
- 我应该过滤制造商始终为“XX”,因为有像 YY 或 ZZ 这样的制造商
- 我应该只计算属性中 events.event_time >= durationLimitDate 的持续时间
- 我需要确保 event_id_arrival 和 event_id_departure 不在目标表(持续时间)中以避免重复!
- 将计算结果插入持续时间表后,我必须更新属性中的 durationLimitDate。知道
durationLimitDate = (Max(event_time) - durationLimitDays))
功能
CREATE OR REPLACE FUNCTION calculateduration() RETURNS void AS $function$
WITH cte AS (SELECT e.id, e.card_nr, e.event_time, e.ticket_type, e.manufacturer, e.carpark_id, e.device_type,
ROW_NUMBER() OVER (ORDER BY e.card_nr, e.carpark_id, e.event_time, e.device_type) AS rn
FROM events e
LEFT JOIN durations d ON d.event_id_arrival = e.id OR d.event_id_departure = e.id
WHERE e.event_time >= (SELECT PROP_VALUE::timestamp FROM properties WHERE prop_key = 'DURATION.LIMIT.DATE')
AND e.device_type IN (1, 2)
AND event_type = 2
AND e.manufacturer LIKE 'XX%'
AND d.id IS NULL)
INSERT INTO durations (id, odb_created_at, event_id_arrival, event_id_departure,
event_time_arrival, event_time_departure,
card_nr, ticket_type, duration, manufacturer, carpark_id)
SELECT nextval('durations_id_seq'),
current_timestamp,
arrived_entry.id,
departed_entry.id,
arrived_entry.event_time,
departed_entry.event_time,
arrived_entry.card_nr,
arrived_entry.ticket_type,
date_part('epoch', departed_entry.event_time::timestamp - arrived_entry.event_time::timestamp),
arrived_entry.manufacturer,
arrived_entry.carpark_id
FROM (SELECT * FROM cte WHERE cte.device_type = 1) AS arrived_entry
INNER JOIN (SELECT * FROM cte WHERE cte.device_type = 2) AS departed_entry ON arrived_entry.card_nr = departed_entry.card_nr
AND arrived_entry.carpark_id = departed_entry.carpark_id
AND arrived_entry.rn + 1 = departed_entry.rn;
UPDATE properties
SET PROP_VALUE = (SELECT (MAX(event_time) - ((SELECT PROP_VALUE FROM properties WHERE prop_key = 'DURATION.LIMIT.DAYS') ||' day')::interval) FROM events WHERE event_time >= (SELECT PROP_VALUE::timestamp FROM properties WHERE prop_key = 'DURATION.LIMIT.DATE'))
WHERE PROP_KEY ='DURATION.LIMIT.DATE';
$function$
LANGUAGE sql;
DDL 脚本
-- events
CREATE TABLE IF NOT EXISTS events (
id bigint NOT NULL autoincrement start 1 increment 1 PRIMARY KEY,
odb_created_at timestamp without time zone NOT NULL,
event_time timestamp without time zone NOT NULL,
device_type integer NOT NULL,
event_type integer NOT NULL,
ticket_type integer NOT NULL,
card_nr character varying(100),
count integer DEFAULT 1 NOT NULL,
manufacturer character varying(200),
carpark_id bigint
);
-- durations
CREATE TABLE IF NOT EXISTS durations (
id bigint NOT NULL autoincrement start 1 increment 1 PRIMARY KEY,
odb_created_at timestamp without time zone NOT NULL,
event_id_arrival bigint,
event_id_departure bigint,
event_time_arrival timestamp without time zone,
event_time_departure timestamp without time zone,
card_nr character varying(100),
ticket_type integer,
duration integer,
manufacturer character varying(200),
carpark_id bigint
);
--properties
create or replace TABLE PROPERTIES (
PROP_KEY VARCHAR(80) NOT NULL,
PROP_VALUE VARCHAR(250),
primary key (PROP_KEY)
);
样本数据:
INSERT INTO properties (prop_key,prop_value) VALUES
('DURATION.LIMIT.DAYS','30'),
('DURATION.LIMIT.DATE','2021-08-01 00:00:00.00');
INSERT INTO public.events (id, odb_created_at, event_time, device_type, event_type, ticket_type, card_nr, count, manufacturer, carpark_id) VALUES(188160996, '2021-10-02 04:28:26.338', '2021-10-01 09:14:41.32', 1, 2, 11, '03998988030897300007782', 1, 'XX', 1852);
INSERT INTO public.events (id, odb_created_at, event_time, device_type, event_type, ticket_type, card_nr, count, manufacturer, carpark_id) VALUES(188160790, '2021-10-02 04:28:26.248', '2021-10-01 09:31:10.94', 2, 2, 11, '03998988030897300007782', 1, 'XX', 1852);
INSERT INTO public.events (id, odb_created_at, event_time, device_type, event_type, ticket_type, card_nr, count, manufacturer, carpark_id) VALUES(188146489, '2021-10-02 04:26:55.069', '2021-10-01 10:03:01.57', 1, 2, 500, '01479804030429500089598', 1, 'XX', 1563);
INSERT INTO public.events (id, odb_created_at, event_time, device_type, event_type, ticket_type, card_nr, count, manufacturer, carpark_id) VALUES(188146069, '2021-10-02 04:26:54.852', '2021-10-01 11:49:58.45', 2, 2, 500, '01479804030429500089598', 1, 'XX', 1563);
INSERT INTO public.events (id, odb_created_at, event_time, device_type, event_type, ticket_type, card_nr, count, manufacturer, carpark_id) VALUES(188161161, '2021-10-02 04:28:26.372', '2021-10-01 18:44:33.62', 1, 2, 11, '03998988030897300007782', 1, 'XX', 1852);
INSERT INTO public.events (id, odb_created_at, event_time, device_type, event_type, ticket_type, card_nr, count, manufacturer, carpark_id) VALUES(188160950, '2021-10-02 04:28:26.329', '2021-10-01 18:45:51.903', 2, 2, 11, '03998988030897300007782', 1, 'XX', 1852);
INSERT INTO public.events (id, odb_created_at, event_time, device_type, event_type, ticket_type, card_nr, count, manufacturer, carpark_id) VALUES(188161227, '2021-10-02 04:28:26.374', '2021-10-01 23:21:18.58', 1, 2, 11, '04139733030897300003136', 1, 'XX', 1852);
INSERT INTO public.events (id, odb_created_at, event_time, device_type, event_type, ticket_type, card_nr, count, manufacturer, carpark_id) VALUES(188160974, '2021-10-02 04:28:26.334', '2021-10-01 23:24:03.29', 2, 2, 11, '04139733030897300003136', 1, 'XX', 1852);
INSERT INTO public.events (id, odb_created_at, event_time, device_type, event_type, ticket_type, card_nr, count, manufacturer, carpark_id) VALUES(188239864, '2021-10-03 04:24:43.345', '2021-10-02 06:49:55.97', 1, 2, 11, '01719400030897300061410', 1, 'XX', 1852);
INSERT INTO public.events (id, odb_created_at, event_time, device_type, event_type, ticket_type, card_nr, count, manufacturer, carpark_id) VALUES(188239649, '2021-10-03 04:24:43.308', '2021-10-02 07:02:08.72', 2, 2, 11, '01719400030897300061410', 1, 'XX', 1852);
谢谢!
独特的测试
CREATE TABLE IF NOT EXISTS test (
id bigint NOT NULL AUTOINCREMENT PRIMARY KEY -- Check the syntax!
, odb_created_at timestamp without time zone NOT NULL
, event_time timestamp without time zone NOT NULL
, device_type integer NOT NULL
, event_type integer NOT NULL
, ticket_type integer NOT NULL
, card_nr character varying(100)
, count integer DEFAULT 1 NOT NULL
, manufacturer character varying(200)
, carpark_id bigint
, UNIQUE (card_nr, event_time) -- NATURAL KEY
);
INSERT INTO EUILOGS_DEV.ILOGS.TEST (ODB_CREATED_AT, EVENT_TIME, DEVICE_TYPE, EVENT_TYPE, TICKET_TYPE, CARD_NR, COUNT, MANUFACTURER, CARPARK_ID)
VALUES(current_timestamp(), '2021-01-01 15:00:00.000', 1, 1, 1, 'CARD1', 1, 'MAN1', 1);
INSERT INTO EUILOGS_DEV.ILOGS.TEST (ODB_CREATED_AT, EVENT_TIME, DEVICE_TYPE, EVENT_TYPE, TICKET_TYPE, CARD_NR, COUNT, MANUFACTURER, CARPARK_ID)
VALUES(current_timestamp(), '2021-01-01 15:00:00.000', 1, 1, 1, 'CARD1', 1, 'MAN1', 1);
【问题讨论】:
-
如果我理解正确,您的
durations和properties表都可以从events计算,因此它们基本上是多余的。 [另外:您对自动增量的处理是错误的] -
嗨@wildplasser。我需要属性始终是最大 event_time 减去 30 天。我发现将它们保存在特定表中更容易。持续时间将收集基本上过滤事件数据,以便匹配 2 个事件,这在某些时候可能是多余的,但我需要将它分成两个表
-
另外:
events和durations上都需要 自然键。可能类似于(card_number, event_time)这个自然键将允许检测/避免重复。
标签: postgresql snowflake-cloud-data-platform plpgsql