【发布时间】:2021-12-27 11:34:20
【问题描述】:
我正在尝试转换数据。
输入:
| col01 | col02 | col03 |
|---|---|---|
| 1/2/3 | John/Smith/David | 2000/2500/1000 |
输出:
| col01 | col02 | col03 |
|---|---|---|
| 1 | John | 2000 |
| 2 | Smith | 2500 |
| 3 | David | 1000 |
提前致谢。
【问题讨论】:
我正在尝试转换数据。
输入:
| col01 | col02 | col03 |
|---|---|---|
| 1/2/3 | John/Smith/David | 2000/2500/1000 |
输出:
| col01 | col02 | col03 |
|---|---|---|
| 1 | John | 2000 |
| 2 | Smith | 2500 |
| 3 | David | 1000 |
提前致谢。
【问题讨论】:
你可以使用:
WITH input_items (col01, col02, col03, c1st, c1end, c2st, c2end, c3st, c3end) AS (
SELECT col01,
col02,
col03,
1,
INSTR(col01, '/', 1),
1,
INSTR(col02, '/', 1),
1,
INSTR(col03, '/', 1)
FROM input
UNION ALL
SELECT col01,
col02,
col03,
CASE c1end WHEN 0 THEN 0 ELSE c1end + 1 END,
CASE c1end WHEN 0 THEN 0 ELSE INSTR(col01, '/', c1end + 1) END,
CASE c2end WHEN 0 THEN 0 ELSE c2end + 1 END,
CASE c2end WHEN 0 THEN 0 ELSE INSTR(col02, '/', c2end + 1) END,
CASE c3end WHEN 0 THEN 0 ELSE c3end + 1 END,
CASE c3end WHEN 0 THEN 0 ELSE INSTR(col03, '/', c3end + 1) END
FROM input_items
WHERE c1end > 0
OR c2end > 0
OR c3end > 0
)
SELECT CASE
WHEN c1st = 0
THEN NULL
WHEN c1end = 0
THEN SUBSTR(col01, c1st)
ELSE SUBSTR(col01, c1st, c1end - c1st)
END AS col01,
CASE
WHEN c2st = 0
THEN NULL
WHEN c2end = 0
THEN SUBSTR(col02, c2st)
ELSE SUBSTR(col02, c2st, c2end - c2st)
END AS col02,
CASE
WHEN c3st = 0
THEN NULL
WHEN c3end = 0
THEN SUBSTR(col03, c3st)
ELSE SUBSTR(col03, c3st, c3end - c3st)
END AS col03
FROM input_items;
或者,使用正则表达式(写起来更短,但运行效率低得多):
WITH input_items (col01, col02, col03, v1, v2, v3, idx, max_idx) AS (
SELECT col01,
col02,
col03,
REGEXP_SUBSTR(col01 || '/', '(.*?)/', 1, 1, NULL, 1),
REGEXP_SUBSTR(col02 || '/', '(.*?)/', 1, 1, NULL, 1),
REGEXP_SUBSTR(col03 || '/', '(.*?)/', 1, 1, NULL, 1),
1,
GREATEST(
REGEXP_COUNT(col01 || '/', '(.*?)/'),
REGEXP_COUNT(col02 || '/', '(.*?)/'),
REGEXP_COUNT(col03 || '/', '(.*?)/')
)
FROM input
UNION ALL
SELECT col01,
col02,
col03,
REGEXP_SUBSTR(col01 || '/', '(.*?)/', 1, idx + 1, NULL, 1),
REGEXP_SUBSTR(col02 || '/', '(.*?)/', 1, idx + 1, NULL, 1),
REGEXP_SUBSTR(col03 || '/', '(.*?)/', 1, idx + 1, NULL, 1),
idx + 1,
max_idx
FROM input_items
WHERE idx < max_idx
)
SELECT v1 AS col1,
v2 AS col2,
v3 AS col3
FROM input_items;
其中,对于样本数据:
CREATE TABLE input(col01, col02, col03) AS
SELECT '1/2/3', 'John/Smith/David', '2000/2500/1000' FROM DUAL;
两个输出:
COL01 COL02 COL03 1 John 2000 2 Smith 2500 3 David 1000
db小提琴here
【讨论】:
您也可以为此目的使用以下查询。 如您所见,它使用两个正则函数(regexp_count 和 regexp_substr)分别计算和提取每次出现斜线 '[^/]+' 字符之前的所有连续字符。
WITH Your_Input_Tab (col01, col02, col03) AS (
select '1/2/3', 'John/Smith/David', '2000/2500/1000' from dual
UNION ALL
select '5/6', 'Chris/Kim', '2800/1570' from dual
UNION ALL
select '8/9/10/11/12', 'Eric/Tidiane/Salim/Joe/Steven', '2700/1800/4000/2000/1450' from dual
)
select regexp_substr(col01, '[^/]+', 1, level) col01
, regexp_substr(col02, '[^/]+', 1, level) col02
, regexp_substr(col03, '[^/]+', 1, level) col03
from Your_Input_Tab t
connect by level <= greatest (
regexp_count(col01, '[^/]+')
, regexp_count(col02, '[^/]+')
, regexp_count(col03, '[^/]+')
)
and prior col01 = col01
and prior sys_guid() is not null
;
here is a link to a very detailed explanation of this approach
【讨论】: