【发布时间】:2016-05-31 17:00:30
【问题描述】:
从 csv 文件读取数据后,我得到了以下格式的字符串
v_lastline = '29218368,8062115," Benedict Canyon Equities, Inc",CLS,,FAX';
我只是想将它转换成一个数组,同时将包含 6 个值,, Inc 之前的逗号需要转义。
谁能建议在 PL/SQL 中最好的方法是什么?
【问题讨论】:
从 csv 文件读取数据后,我得到了以下格式的字符串
v_lastline = '29218368,8062115," Benedict Canyon Equities, Inc",CLS,,FAX';
我只是想将它转换成一个数组,同时将包含 6 个值,, Inc 之前的逗号需要转义。
谁能建议在 PL/SQL 中最好的方法是什么?
【问题讨论】:
这类似于this question,但您的列表中有空元素;并且我在那里尝试过的一种模式的简单翻译跳过了那些:
var v_lastline varchar2(50);
exec :v_lastline := '29218368,8062115," Benedict Canyon Equities, Inc",CLS,,FAX';
select level as lvl,
regexp_substr(:v_lastline, '("[^"]*"|[^,]+)', 1, level) as element
from dual
connect by level <= regexp_count(:v_lastline, '("[^"]*"|[^,]+)');
LVL ELEMENT
---------- ----------------------------------------
1 29218368
2 8062115
3 " Benedict Canyon Equities, Inc"
4 CLS
5 FAX
如果您可以识别一个永远不会出现在数据中的特殊字符,那么您可以通过将每个逗号更改为逗号+字符将其放入空元素中来解决此问题,然后在拆分后将其删除:
select level as lvl,
replace(regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level), '§', null) as element
from dual
connect by regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level) is not null;
LVL ELEMENT
---------- ----------------------------------------
1 29218368
2 8062115
3 " Benedict Canyon Equities, Inc"
4 CLS
5
6 FAX
它是分割分隔字符串的常用方法which is explained in detail here的扩展。
replace(:v_lastline, ',', ',§') 将 ...,CLS,,FAX 更改为 ...,§CLS,§,§FAX,其中 § 是您永远不会看到的角色。regexp_substr(..., '(§"[^"]*"|[^,]+)', 1, level) 使用正则表达式标记更新的值,该正则表达式查找任何双引号括起来的值(现在也以特殊字符开头)或非逗号;评估的顺序意味着引号部分内的逗号将被忽略。level 是分层查询语法的一部分,其中:connect by regexp_substr(<same value and pattern>) is not null 刚刚知道有多少令牌。replace(regexp_substr(...), , '§', null) 删除了第一步中使用的特殊字符。如果您愿意/需要,您也可以使用更高级别的replace() 删除双引号,并修剪空格。
您还没有完全说明数组的含义,但是您可以在 PL/SQL 中运行该查询并将批量收集到一个集合中(如果您打算使用它)。例如,使用内置的 ODCIVARCHAR2LIST 集合类型:
set serveroutput on
declare
v_lastline varchar2(50);
v_array sys.odcivarchar2list;
begin
v_lastline := '29218368,8062115," Benedict Canyon Equities, Inc",CLS,,FAX';
select trim(replace(replace(
regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level), '§', null), '"', null))
bulk collect into v_array
from dual
connect by regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level) is not null;
dbms_output.put_line('Number of elements: ' || v_array.count);
for i in 1..v_array.count loop
dbms_output.put_line('Index ' || i || ' has: ' || v_array(i));
end loop;
end;
/
Number of elements: 6
Index 1 has: 29218368
Index 2 has: 8062115
Index 3 has: Benedict Canyon Equities, Inc
Index 4 has: CLS
Index 5 has:
Index 6 has: FAX
对于多个空元素,这也(现在)有效:
exec :v_lastline := '29218368,8062115," Benedict Canyon Equities, Inc",,,,,,,CLS,,,,,FAX,,,,,,,,,,,,,,,,,,INVOICE';
select level as lvl,
replace(regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level), '§', null) as element
from dual
connect by regexp_substr(replace(:v_lastline, ',', ',§'),
'(§"[^"]*"|[^,]+)', 1, level) is not null;
LVL ELEMENT
---------- ----------------------------------------
1 29218368
2 8062115
3 " Benedict Canyon Equities, Inc"
4
...
9
10 CLS
11
...
14
15 FAX
16
...
32
33 INVOICE
【讨论】:
'29218368,8062115," Benedict Canyon Equities, Inc",,, 这样的字符串失败了,而不是返回 6 个值,而是返回 4 个值。
如果你的 CSV 的结构是固定的,你可以试试这样的:
with text(text) as ( select '29218368,8062115," Benedict Canyon Equities, Inc",CLS,,FAX' from dual)
select level,
trim(',' from
case
when level in (1,2) then
regexp_substr(text, '(.*??)\,', 1, level)
when level = 3 then
regexp_substr(text, '"(.*??)"', 1, 1)
when level in (4,5) then
regexp_substr(text, '(.*??)\,', instr(text, '"', 1, 2), level -2)
when level = 6 then
regexp_substr(text, '\,([^\,]*)', instr(text, '"', 1, 2), 3)
end
)
from text
connect by level <= 6
通过以不同的方式处理每个部分,这对 CSV 的结构做出了强有力的假设,但在我看来,很难找到一个真正通用的问题解决方案。
【讨论】:
这里是一个没有正则表达式的解决方案,首先创建两个辅助函数
/* CAR select car('hello,world,bla') from dual --> hello */
create or replace function car(PI_STR in varchar2,
PI_SEPARATOR in varchar2 default ',')
return varchar2 is l_pos number;
begin
l_pos := instr(PI_STR, PI_SEPARATOR);
if l_pos > 0 then
return substr(PI_STR, 1, l_pos - 1);
end if;
return PI_STR;
end;
/* CDR select cdr('hello,world,bla') from dual --> world,bla */
create or replace function cdr(PI_STR in varchar2,
PI_SEPARATOR in varchar2 default ',')
return varchar2 is l_pos number;
begin
l_pos := instr(PI_STR, PI_SEPARATOR);
if l_pos > 0 then
return substr(PI_STR, l_pos + length(PI_SEPARATOR));
end if;
return '';
end;
现在:通过 ',' 提取,如果找到转义字符,则将每个结果与下一个条目连接到下一个转义字符:
create or replace type csv_col is table of varchar2(4000);
create or replace function get_columns(PI_STR in varchar2,
PI_SEPARATOR in varchar2,
PI_ESC_CHAR in varchar2)
return csv_col pipelined is l_car varchar2(4000);
l_cdr varchar2(4000);
l_car_esc varchar2(4000);
begin
l_car := car(PI_STR, PI_SEPARATOR);
l_cdr := cdr(PI_STR, PI_SEPARATOR);
-- check for escape char
l_car_esc := cdr(l_car, PI_ESC_CHAR);
if l_car_esc is not null then
l_car := l_car_esc || PI_SEPARATOR || car(l_cdr, PI_ESC_CHAR);
l_cdr := cdr(cdr(l_cdr, PI_ESC_CHAR), PI_SEPARATOR);
end if;
loop
if l_car is null and l_cdr is null then
exit;
end if;
pipe row(l_car);
l_car := car(l_cdr, PI_SEPARATOR);
l_cdr := cdr(l_cdr, PI_SEPARATOR);
l_car_esc := cdr(l_car, PI_ESC_CHAR);
if l_car_esc is not null then
l_car := l_car_esc || PI_SEPARATOR || car(l_cdr, PI_ESC_CHAR);
l_cdr := cdr(cdr(l_cdr, PI_ESC_CHAR), PI_SEPARATOR);
dbms_output.put_line(l_car);
dbms_output.put_line(l_cdr);
end if;
end loop;
end;
这样称呼它:
select *
from table(get_columns('29218368,8062115," Benedict Canyon Equities, Inc",CLS,,FAX',
',',
'"'));
--> 结果
29218368
8062115
Benedict Canyon Equities, Inc
CLS
FAX
【讨论】: