上一篇说到的是Hive是如何对sql进行解析,生成ASTNode
那么Hive拿到ASTNode之后,就会触发:BaseSemanticAnalyzer.analyze这个方法;
这个方法非常的重要,从AST到task的生成这一系列的操作,都会在这个调用栈下进行的;
如下图:
按照:基于calcite做傻瓜式的sql优化给出的sql示例,我们提前看下,经过hive各阶段优化后,会改变什么样子
sql:
select * from ( select Sname, Sex, Sage, Sdept, count(1) as num from student_ext group by Sname, Sex, Sage, Sdept ) t1 left join student_ext t2 on t1.Sname = t2.Sname where t1.Sage > 10 and t2.Sdept = 'MA';
##########################Gen Calcite Plan############################################## HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9]) HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))]) HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)]) HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1]) HiveTableScan(table=[[default.student_ext]]) HiveTableScan(table=[[default.student_ext]]) ##########################applyPreJoinOrderingTransforms-0############################################## HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9]) HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))]) HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)]) HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1]) HiveTableScan(table=[[default.student_ext]]) HiveTableScan(table=[[default.student_ext]]) ##########################Push Down Semi Joins############################################## HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9]) HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))]) HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)]) HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1]) HiveTableScan(table=[[default.student_ext]]) HiveTableScan(table=[[default.student_ext]]) ##########################JOIN Add not null filters############################################## HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9]) HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))]) HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)]) HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1]) HiveTableScan(table=[[default.student_ext]]) HiveTableScan(table=[[default.student_ext]]) ##########################Constant propagation, common filter extraction, and PPD############################################## HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9]) HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))]) HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)]) HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1]) HiveTableScan(table=[[default.student_ext]]) HiveTableScan(table=[[default.student_ext]]) ##########################basePlan############################################## HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4], sno=[$5], sname1=[$6], sex1=[$7], sage1=[$8], sdept1=[$9]) HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))]) HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(sname=[$0], sex=[$1], sage=[$2], sdept=[$3], num=[$4]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)]) HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[$4], $f4=[1]) HiveTableScan(table=[[default.student_ext]]) HiveTableScan(table=[[default.student_ext]]) ##########################Projection Pruning############################################## HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))]) HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[1]) HiveProject(sname=[$1], sex=[$2], sage=[$3], sdept=[$4]) HiveTableScan(table=[[default.student_ext]]) HiveProject(sno=[$0], sname=[$1], sex=[$2], sage=[$3], sdept=[$4]) HiveTableScan(table=[[default.student_ext]]) ##########################Apply Pre Join Order optimizations############################################## HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))]) HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[1]) HiveProject(sname=[$1], sex=[$2], sage=[$3], sdept=[$4]) HiveTableScan(table=[[default.student_ext]]) HiveProject(sno=[$0], sname=[$1], sex=[$2], sage=[$3], sdept=[$4]) HiveTableScan(table=[[default.student_ext]]) ##########################优化后的执行计划############################################## HiveFilter(condition=[AND(>($2, 10), =($9, 'MA'))]) HiveJoin(condition=[=($0, $6)], joinType=[left], algorithm=[none], cost=[not available]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[count($4)]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[1]) HiveProject(sname=[$1], sex=[$2], sage=[$3], sdept=[$4]) HiveTableScan(table=[[default.student_ext]]) HiveProject(sno=[$0], sname=[$1], sex=[$2], sage=[$3], sdept=[$4]) HiveTableScan(table=[[default.student_ext]])