基于calcite做傻瓜式的sql优化(三)

 

 

 上一篇:基于calcite做傻瓜式的sql优化(二)

上一篇说到的是Hive是如何对sql进行解析,生成ASTNode 

那么Hive拿到ASTNode之后,就会触发:BaseSemanticAnalyzer.analyze这个方法;

这个方法非常的重要,从AST到task的生成这一系列的操作,都会在这个调用栈下进行的;

 如下图:

基于calcite做傻瓜式的sql优化(三)

 按照:基于calcite做傻瓜式的sql优化给出的sql示例,我们提前看下,经过hive各阶段优化后,会改变什么样子

sql:

select 
  * 
from 
  (
    select 
      Sname, 
      Sex, 
      Sage, 
      Sdept, 
      count(1) as num 
    from 
      student_ext 
    group by 
      Sname, 
      Sex, 
      Sage, 
      Sdept
  ) t1 
  left join student_ext t2 on t1.Sname = t2.Sname 
where 
  t1.Sage > 10 
  and t2.Sdept = 'MA';
##########################Gen Calcite Plan##############################################
HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9])
  HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))])
    HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available])
      HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4])
        HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)])
          HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1])
            HiveTableScan(table=[[default.student_ext]])
      HiveTableScan(table=[[default.student_ext]])

##########################applyPreJoinOrderingTransforms-0##############################################
HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9])
  HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))])
    HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available])
      HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4])
        HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)])
          HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1])
            HiveTableScan(table=[[default.student_ext]])
      HiveTableScan(table=[[default.student_ext]])

##########################Push Down Semi Joins##############################################
HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9])
  HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))])
    HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available])
      HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4])
        HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)])
          HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1])
            HiveTableScan(table=[[default.student_ext]])
      HiveTableScan(table=[[default.student_ext]])

##########################JOIN Add not null filters##############################################
HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9])
  HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))])
    HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available])
      HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4])
        HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)])
          HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1])
            HiveTableScan(table=[[default.student_ext]])
      HiveTableScan(table=[[default.student_ext]])

##########################Constant propagation, common filter extraction, and PPD##############################################
HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9])
  HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))])
    HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available])
      HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4])
        HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)])
          HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1])
            HiveTableScan(table=[[default.student_ext]])
      HiveTableScan(table=[[default.student_ext]])

##########################basePlan##############################################
HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9])
  HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))])
    HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available])
      HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4])
        HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)])
          HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1])
            HiveTableScan(table=[[default.student_ext]])
      HiveTableScan(table=[[default.student_ext]])

##########################Projection Pruning##############################################
HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))])
  HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available])
    HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)])
      HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[1])
        HiveProject(sname=[$1], sex=[$2], sage=[$3], sdept=[$4])
          HiveTableScan(table=[[default.student_ext]])
    HiveProject(sno=[$0], sname=[$1], sex=[$2], sage=[$3], sdept=[$4])
      HiveTableScan(table=[[default.student_ext]])

##########################Apply Pre Join Order optimizations##############################################
HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))])
  HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available])
    HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)])
      HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[1])
        HiveProject(sname=[$1], sex=[$2], sage=[$3], sdept=[$4])
          HiveTableScan(table=[[default.student_ext]])
    HiveProject(sno=[$0], sname=[$1], sex=[$2], sage=[$3], sdept=[$4])
      HiveTableScan(table=[[default.student_ext]])

##########################优化后的执行计划##############################################
HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))])
  HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available])
    HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)])
      HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[1])
        HiveProject(sname=[$1], sex=[$2], sage=[$3], sdept=[$4])
          HiveTableScan(table=[[default.student_ext]])
    HiveProject(sno=[$0], sname=[$1], sex=[$2], sage=[$3], sdept=[$4])
      HiveTableScan(table=[[default.student_ext]])
View Code

相关文章: