1.数据脚本
drop table test1 ;
drop table test2 ;
drop table test3 ;
create table test1 as
select * from (select d.*,rownum as rn
from dba_objects d
where d.owner not in ('SYS','SYSTEM','PUBLIC'))
where mod(rn,20)=0;
create table test2 as select * from test1;
create table test3 as select * from test1;
alter table test1 add constraint pk_test1 primary key (object_id);
alter table test2 add constraint pk_test2 primary key (object_id);
alter table test3 add constraint pk_test3 primary key (object_id);
2.SQL优化
2.1 优化前
select count(t1.object_id)
from test1 t1
where (select sum(t2.rn)
from test2 t2
where t1.data_object_id=t2.data_object_id)<
(select sum(t3.rn)
from test3 t3
where t1.object_name=t3.object_name);
SQL的执行计划是Filter连接,被驱动表全部走全表扫描,性能不好。
(※假设连接条件是owner,重复数据很多,Filter也是很好的连接方式,因为Filter内部维护一个匹配用的临时表)
2.2 with优化
with tmp2 as
(select t1.object_id, t2.data_object_id,sum(t2.rn) as sum_rn
from test1 t1,
test2 t2
where t1.data_object_id=t2.data_object_id
group by t1.object_id, t2.data_object_id)
, tmp3 as
(select t1.object_id, t3.object_name,sum(t3.rn) as sum_rn
from test1 t1,
test3 t3
where t1.object_name=t3.object_name
group by t1.object_id,t3.object_name)
select count(object_id) from
(select distinct t1.object_id
from test1 t1,
tmp2 t2,
tmp3 t3
where t1.object_id=t2.object_id
and t1.object_id=t3.object_id
and t2.sum_rn<t3.sum_rn);
为了避开Filter连接,我们先用with改写SQL,但是带来了另一个问题,Test1扫描了3次,虽然性能有所提高,但是一个表扫描三次,感觉不是最优的改写方式。
2.3 分析函数优化
select count(distinct object_id) from
(select t1.*,
sum(t2.rn) over (partition by t2.data_object_id) as t2_sum_rn,
sum(t3.rn) over (partition by t2.object_name) as t3_sum_rn
from test1 t1
left join test2 t2
on t1.data_object_id=t2.data_object_id
left join test3 t3
on t1.object_name=t3.object_name)
where t2_sum_rn<t3_sum_rn;
改为分析函数后,避开了Filter连接和Test1的重复扫描,性能相比改善前提高接近一倍。