一、数仓搭建 - ADS 层
1.1 设备主题
1.1.1 活跃设备数(日、周、月)
需求定义:
日活:当日活跃的设备数
周活:当周活跃的设备数
月活:当月活跃的设备数
1)建表语句
drop table if exists ads_uv_count; create external table ads_uv_count( `dt` string COMMENT \'统计日期\', `day_count` bigint COMMENT \'当日用户数量\', `wk_count` bigint COMMENT \'当周用户数量\', `mn_count` bigint COMMENT \'当月用户数量\', `is_weekend` string COMMENT \'Y,N 是否是周末,用于得到本周最终结果\', `is_monthend` string COMMENT \'Y,N 是否是月末,用于得到本月最终结果\' ) COMMENT \'活跃设备数\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_uv_count/\';
2)导入数据
insert into table ads_uv_count select \'2020-03-10\' dt, daycount.ct, wkcount.ct, mncount.ct, if(date_add(next_day(\'2020-03-10\',\'MO\'),-1)=\'2020-03-10\',\'Y\',\'N\') , if(last_day(\'2020-03-10\')=\'2020-03-10\',\'Y\',\'N\') from ( select \'2020-03-10\' dt, count(*) ct from dwt_uv_topic where login_date_last=\'2020-03-10\' )daycount join ( select \'2020-03-10\' dt, count (*) ct from dwt_uv_topic where login_date_last>=date_add(next_day(\'2020-03-10\',\'MO\'),-7) and login_date_last<= date_add(next_day(\'2020-03-10\',\'MO\'),-1) ) wkcount on daycount.dt=wkcount.dt join ( select \'2020-03-10\' dt, count (*) ct from dwt_uv_topic where date_format(login_date_last,\'yyyy-MM\')=date_format(\'2020-03-10\',\'yyyy-MM\') )mncount on daycount.dt=mncount.dt;
3)查询导入结果
1.1.2 每日新增设备
1)建表语句
drop table if exists ads_new_mid_count; create external table ads_new_mid_count ( `create_date` string comment \'创建时间\' , `new_mid_count` BIGINT comment \'新增设备数量\' ) COMMENT \'每日新增设备信息数量\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_new_mid_count/\';
2)导入数据
insert into table ads_new_mid_count select login_date_first, count(*) from dwt_uv_topic where login_date_first=\'2020-03-10\' group by login_date_first;
3)查询导入数据
select * from ads_new_mid_count;
1.1.3 沉默用户数
需求定义:
沉默用户:只在安装当天启动过,且启动时间是在 7 天前
1)建表语句
drop table if exists ads_silent_count; create external table ads_silent_count( `dt` string COMMENT \'统计日期\', `silent_count` bigint COMMENT \'沉默设备数\' ) row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_silent_count\';
2)导入 2020-03-20 数据
insert into table ads_silent_count select \'2020-03-15\', count(*) from dwt_uv_topic where login_date_first=login_date_last and login_date_last<=date_add(\'2020-03-15\',-7);
3)查询导入数据
select * from ads_silent_count;
1.1.4 本周回流用户数
需求定义:
本周回流用户:上周未活跃,本周活跃的设备,且不是本周新增设备
1)建表语句
drop table if exists ads_back_count; create external table ads_back_count( `dt` string COMMENT \'统计日期\', `wk_dt` string COMMENT \'统计日期所在周\', `wastage_count` bigint COMMENT \'回流设备数\' ) row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_back_count\';
2)导入数据:
insert into table ads_back_count select \'2020-03-15\', count(*) from ( select mid_id from dwt_uv_topic where login_date_last>=date_add(next_day(\'2020-03-15\',\'MO\'),-7) and login_date_last<= date_add(next_day(\'2020-03-15\',\'MO\'),-1) and login_date_first<date_add(next_day(\'2020-03-15\',\'MO\'),-7) )current_wk left join ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day(\'2020-03-15\',\'MO\'),-7*2) and dt<= date_add(next_day(\'2020-03-15\',\'MO\'),-7-1) group by mid_id )last_wk on current_wk.mid_id=last_wk.mid_id where last_wk.mid_id is null;
3)查询结果
select * from ads_back_count;
1.1.5 流失用户数
需求定义:
流失用户:最近 7 天未活跃的设备
1)建表语句
drop table if exists ads_wastage_count; create external table ads_wastage_count( `dt` string COMMENT \'统计日期\', `wastage_count` bigint COMMENT \'流失设备数\' ) row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_wastage_count\';
2)导入 2020-03-20 数据
insert into table ads_wastage_count select \'2020-03-20\', count(*) from ( select mid_id from dwt_uv_topic where login_date_last<=date_add(\'2020-03-20\',-7) group by mid_id )t1;
3)查询结果
select * from ads_wastage_count;
1.1.6 留存率
1)建表语句
drop table if exists ads_user_retention_day_rate; create external table ads_user_retention_day_rate ( `stat_date` string comment \'统计日期\', `create_date` string comment \'设备新增日期\', `retention_day` int comment \'截止当前日期留存天数\', `retention_count` bigint comment \'留存数量\', `new_mid_count` bigint comment \'设备新增数量\', `retention_ratio` decimal(10,2) comment \'留存率\' ) COMMENT \'每日用户留存情况\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_user_retention_day_rate/\';
2)导入数据
insert into table ads_user_retention_day_rate select \'2020-03-10\',--统计日期 date_add(\'2020-03-10\',-1),--新增日期 1,--留存天数 sum(if(login_date_first=date_add(\'2020-03-10\',-1) and login_date_last=\'2020-03-10\',1,0)),--2020-03-09 的 1 日留存数 sum(if(login_date_first=date_add(\'2020-03-10\',-1),1,0)),--2020-03-09 新增 sum(if(login_date_first=date_add(\'2020-03-10\',-1) and login_date_last=\'2020-03-10\',1,0))/sum(if(login_date_first=date_add(\'2020-03-10\',- 1),1,0))*100 from dwt_uv_topic union all select \'2020-03-10\',--统计日期 date_add(\'2020-03-10\',-2),--新增日期 2,--留存天数 sum(if(login_date_first=date_add(\'2020-03-10\',-2) and login_date_last=\'2020-03-10\',1,0)),--2020-03-08 的 2 日留存数 sum(if(login_date_first=date_add(\'2020-03-10\',-2),1,0)),--2020-03-08 新增 sum(if(login_date_first=date_add(\'2020-03-10\',-2) and login_date_last=\'2020-03-10\',1,0))/sum(if(login_date_first=date_add(\'2020-03-10\',- 2),1,0))*100 from dwt_uv_topic union all select \'2020-03-10\',--统计日期 date_add(\'2020-03-10\',-3),--新增日期 3,--留存天数 sum(if(login_date_first=date_add(\'2020-03-10\',-3) and login_date_last=\'2020-03-10\',1,0)),--2020-03-07 的 3 日留存数 sum(if(login_date_first=date_add(\'2020-03-10\',-3),1,0)),--2020-03-07 新增 sum(if(login_date_first=date_add(\'2020-03-10\',-3) and login_date_last=\'2020-03-10\',1,0))/sum(if(login_date_first=date_add(\'2020-03-10\',- 3),1,0))*100 from dwt_uv_topic;
3)查询导入数据
select * from ads_user_retention_day_rate;
1.1.7 最近连续三周活跃用户数
1)建表语句
drop table if exists ads_continuity_wk_count; create external table ads_continuity_wk_count( `dt` string COMMENT \'统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日 期\', `wk_dt` string COMMENT \'持续时间\', `continuity_count` bigint COMMENT \'活跃次数\' ) row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_continuity_wk_count\';
2)导入 2020-03-20 所在周的数据
insert into table ads_continuity_wk_count select \'2020-03-15\', concat(date_add(next_day(\'2020-03-15\',\'MO\'),-7*3),\'_\',date_add(next_day(\' 2020-03-15\',\'MO\'),-1)), count(*) from ( select mid_id from ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day(\'2020-03-10\',\'monday\'),-7) and dt<=date_add(next_day(\'2020-03-10\',\'monday\'),-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day(\'2020-03-10\',\'monday\'),-7*2) and dt<=date_add(next_day(\'2020-03-10\',\'monday\'),-7-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day(\'2020-03-10\',\'monday\'),-7*3) and dt<=date_add(next_day(\'2020-03-10\',\'monday\'),-7*2-1) group by mid_id )t1 group by mid_id having count(*)=3 )t2
3)查询
select * from ads_continuity_wk_count;
1.1.8 最近七天内连续三天活跃用户数
1)建表语句
drop table if exists ads_continuity_uv_count; create external table ads_continuity_uv_count( `dt` string COMMENT \'统计日期\', `wk_dt` string COMMENT \'最近 7 天日期\', `continuity_count` bigint ) COMMENT \'连续活跃设备数\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_continuity_uv_count\';
2)写出导入数据的 SQL 语句
insert into table ads_continuity_uv_count select \'2020-03-12\', concat(date_add(\'2020-03-12\',-6),\'_\',\'2020-03-12\'), count(*) from ( select mid_id from ( select mid_id from ( select mid_id, date_sub(dt,rank) date_dif from ( select mid_id, dt, rank() over(partition by mid_id order by dt) rank from dws_uv_detail_daycount where dt>=date_add(\'2020-03-12\',-6) and dt<=\'2020-03-12\' )t1 )t2 group by mid_id,date_dif having count(*)>=3 )t3 group by mid_id )t4;
7.2 会员主题
7.2.1 会员主题信息
1)建表
drop table if exists ads_user_topic; create external table ads_user_topic( `dt` string COMMENT \'统计日期\', `day_users` string COMMENT \'活跃会员数\', `day_new_users` string COMMENT \'新增会员数\', `day_new_payment_users` string COMMENT \'新增消费会员数\', `payment_users` string COMMENT \'总付费会员数\', `users` string COMMENT \'总会员数\', `day_users2users` decimal(10,2) COMMENT \'会员活跃率\', `payment_users2users` decimal(10,2) COMMENT \'会员付费率\', `day_new_users2users` decimal(10,2) COMMENT \'会员新鲜度\' ) COMMENT \'会员主题信息表\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_user_topic\';
2)导入数据
insert into table ads_user_topic select \'2020-03-10\', sum(if(login_date_last=\'2020-03-10\',1,0)), sum(if(login_date_first=\'2020-03-10\',1,0)), sum(if(payment_date_first=\'2020-03-10\',1,0)), sum(if(payment_count>0,1,0)), count(*), sum(if(login_date_last=\'2020-03-10\',1,0))/count(*), sum(if(payment_count>0,1,0))/count(*), sum(if(login_date_first=\'2020-03-10\',1,0))/sum(if(login_date_last=\'2020-03-10\',1,0)) from dwt_user_topic
3)查询数据
hive (gmall)> select * from ads_user_topic;
4)vim ads_user_topic.sh
添加如下内容:
#!/bin/bash APP=gmall hive=/opt/modules/hive/bin/hive # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 if [ -n "$1" ] ;then do_date=$1 else do_date=`date -d "-1 day" +%F` fi sql=" with tmp_day_users as ( select \'$do_date\' dt, count(*) day_users from ${APP}.dwt_user_topic where login_date_last=\'$do_date\' ), tmp_day_new_users as ( select \'$do_date\' dt, count(*) day_new_users from ${APP}.dwt_user_topic where login_date_last=\'$do_date\' and login_date_first=\'$do_date\' ), tmp_day_new_payment_users as ( select \'$do_date\' dt, count(*) day_new_payment_users from ${APP}.dwt_user_topic where payment_date_first=\'$do_date\' ), tmp_payment_users as ( select \'$do_date\' dt, count(*) payment_users from ${APP}.dwt_user_topic where payment_date_first is not null ), tmp_users as ( select \'$do_date\' dt, count(*) users from ${APP}.dwt_user_topic tmp_users ) insert into table ${APP}.ads_user_topic select \'$do_date\' dt, day_users, day_new_users, day_new_payment_users, payment_users, users, day_users/users, payment_users/users, day_new_users/users from tmp_day_users join tmp_day_new_users on tmp_day_users.dt=tmp_day_new_users.dt join tmp_day_new_payment_users on tmp_day_users.dt=tmp_day_new_payment_users.dt join tmp_payment_users on tmp_day_users.dt=tmp_payment_users.dt join tmp_users on tmp_day_users.dt=tmp_users.dt; " $hive -e "$sql"
5)增加脚本执行权限
chmod 770 ads_user_topic.sh
6)执行脚本导入数据
ads_user_topic.sh 2020-03-11
7)查看导入数据
select * from ads_user_topic;
7.2.2 漏斗分析
统计“浏览->购物车->下单->支付”的转化率
思路:统计各个行为的人数,然后计算比值
1)建表语句
drop table if exists ads_user_action_convert_day; create external table ads_user_action_convert_day( `dt` string COMMENT \'统计日期\', `total_visitor_m_count` bigint COMMENT \'总访问人数\', `cart_u_count` bigint COMMENT \'加入购物车的人数\', `visitor2cart_convert_ratio` decimal(10,2) COMMENT \'访问到加入购物车转化率\', `order_u_count` bigint COMMENT \'下单人数\', `cart2order_convert_ratio` decimal(10,2) COMMENT \'加入购物车到下单转化率\', `payment_u_count` bigint COMMENT \'支付人数\', `order2payment_convert_ratio` decimal(10,2) COMMENT \'下单到支付的转化率\' ) COMMENT \'用户行为漏斗分析\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_user_action_convert_day/\';
2)数据装载
insert into table ads_user_action_convert_day select \'2020-03-10\', uv.day_count, ua.cart_count, cast(ua.cart_count/uv.day_count as decimal(10,2)) visitor2cart_convert_ratio, ua.order_count, cast(ua.order_count/ua.cart_count as decimal(10,2)) visitor2order_convert_ratio, ua.payment_count, cast(ua.payment_count/ua.order_count as decimal(10,2)) order2payment_convert_ratio from ( select dt, sum(if(cart_count>0,1,0)) cart_count, sum(if(order_count>0,1,0)) order_count, sum(if(payment_count>0,1,0)) payment_count from dws_user_action_daycount where dt=\'2020-03-10\' group by dt )ua join ads_uv_count uv on uv.dt=ua.dt;
3)查询加载数据
select * from ads_user_action_convert_day;
7.3 商品主题
7.3.1 商品个数信息
1)建表语句
drop table if exists ads_product_info; create external table ads_product_info( `dt` string COMMENT \'统计日期\', `sku_num` string COMMENT \'sku 个数\', `spu_num` string COMMENT \'spu 个数\' ) COMMENT \'商品个数信息\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_product_info\';
2)导入数据
insert into table ads_product_info select \'2020-03-10\' dt, sku_num, spu_num from ( select \'2020-03-10\' dt, count(*) sku_num from dwt_sku_topic ) tmp_sku_num join ( select \'2020-03-10\' dt, count(*) spu_num from ( select spu_id from dwt_sku_topic group by spu_id ) tmp_spu_id ) tmp_spu_num on tmp_sku_num.dt=tmp_spu_num.dt;
3)查询结果数据
select * from ads_product_info;
7.3.2 商品销量排名
1)建表语句
drop table if exists ads_product_sale_topN; create external table ads_product_sale_topN( `dt` string COMMENT \'统计日期\', `sku_id` string COMMENT \'商品 ID\', `payment_amount` bigint COMMENT \'销量\' ) COMMENT \'商品个数信息\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_product_sale_topN\';
2)导入数据
insert into table ads_product_sale_topN select \'2020-03-10\' dt, sku_id, payment_amount from dws_sku_action_daycount where dt=\'2020-03-10\' order by payment_amount desc limit 10;
3)查询结果数据
select * from ads_product_sale_topN;
7.3.3 商品收藏排名
1)建表语句
drop table if exists ads_product_favor_topN; create external table ads_product_favor_topN( `dt` string COMMENT \'统计日期\', `sku_id` string COMMENT \'商品 ID\', `favor_count` bigint COMMENT \'收藏量\' ) COMMENT \'商品收藏 TopN\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_product_favor_topN\';
2)导入数据
insert into table ads_product_favor_topN select \'2020-03-10\' dt, sku_id, favor_count from dws_sku_action_daycount where dt=\'2020-03-10\' order by favor_count desc limit 10;
3)查询数据
select * from ads_product_favor_topN;
7.3.4 商品加入购物车排名
1)建表语句
drop table if exists ads_product_cart_topN; create external table ads_product_cart_topN( `dt` string COMMENT \'统计日期\', `sku_id` string COMMENT \'商品 ID\', `cart_num` bigint COMMENT \'加入购物车数量\' ) COMMENT \'商品加入购物车 TopN\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_product_cart_topN\';
2)导入数据
insert into table ads_product_cart_topN select \'2020-03-10\' dt, sku_id, cart_num from dws_sku_action_daycount where dt=\'2020-03-10\' order by cart_num desc limit 10;
3)查询数据
select * from ads_product_cart_topN;
7.3.5 商品退款率排名(最近 30 天)
1)建表语句
drop table if exists ads_product_refund_topN; create external table ads_product_refund_topN( `dt` string COMMENT \'统计日期\', `sku_id` string COMMENT \'商品 ID\', `refund_ratio` decimal(10,2) COMMENT \'退款率\' ) COMMENT \'商品退款率 TopN\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_product_refund_topN\';
2)导入数据
insert into table ads_product_refund_topN select \'2020-03-10\', sku_id, refund_last_30d_count/payment_last_30d_count*100 refund_ratio from dwt_sku_topic order by refund_ratio desc limit 10;
3)查询数据
select * from ads_product_refund_topN;
7.3.6 商品差评率
1)建表语句
drop table if exists ads_appraise_bad_topN; create external table ads_appraise_bad_topN( `dt` string COMMENT \'统计日期\', `sku_id` string COMMENT \'商品 ID\', `appraise_bad_ratio` decimal(10,2) COMMENT \'差评率\' ) COMMENT \'商品差评率 TopN\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_appraise_bad_topN\';
2)导入数据
insert into table ads_appraise_bad_topN select \'2020-03-10\' dt, sku_id, appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_coun t+appraise_default_count) appraise_bad_ratio from dws_sku_action_daycount where dt=\'2020-03-10\' order by appraise_bad_ratio desc limit 10;
3)查询数据
select * from ads_appraise_bad_topN;
7.4 营销主题(用户+商品+购买行为)
7.4.1 下单数目统计
需求分析:统计每日下单数,下单金额及下单用户数
1)建表语句
drop table if exists ads_order_daycount; create external table ads_order_daycount( dt string comment \'统计日期\', order_count bigint comment \'单日下单笔数\', order_amount decimal(10,2) comment \'单日下单金额\', order_users bigint comment \'单日下单用户数\' ) comment \'每日订单总计表\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_order_daycount\';
2)导入数据
insert into table ads_order_daycount select \'2020-03-10\', sum(order_count), sum(order_amount), sum(if(order_count>0,1,0)) from dws_user_action_daycount where dt=\'2020-03-10\';
3)查询数据
select * from ads_order_daycount;
7.4.2 支付信息统计
每日支付金额、支付人数、支付商品数、支付笔数以及下单到支付的平均时长(取自 DWD)
1)建表
drop table if exists ads_payment_daycount; create external table ads_payment_daycount( dt string comment \'统计日期\', payment_count bigint comment \'单日支付笔数\', payment_amount decimal(10,2) comment \'单日支付金额\', payment_user_count bigint comment \'单日支付人数\', payment_sku_count bigint comment \'单日支付商品数\', payment_avg_time double comment \'下单到支付的平均时长,取分钟数\' ) comment \'每日订单总计表\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_payment_daycount\';
2)导入数据
insert into table ads_payment_daycount select tmp_payment.dt, tmp_payment.payment_count, tmp_payment.payment_amount, tmp_payment.payment_user_count, tmp_skucount.payment_sku_count, tmp_time.payment_avg_time from ( select \'2020-03-15\' dt, sum(payment_count) payment_count, sum(payment_amount) payment_amount, sum(if(payment_count>0,1,0)) payment_user_count from dws_user_action_daycount where dt=\'2020-03-15\' )tmp_payment join ( select \'2020-03-15\' dt, sum(if(payment_count>0,1,0)) payment_sku_count from dws_sku_action_daycount where dt=\'2020-03-15\' )tmp_skucount on tmp_payment.dt=tmp_skucount.dt join ( select \'2020-03-15\' dt, sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60 payment_avg_time from dwd_fact_order_info where dt=\'2020-03-15\' and payment_time is not null )tmp_time on tmp_payment.dt=tmp_time.dt
3)查询数据
select * from ads_payment_daycount;
7.4.3 复购率
1)建表语句
drop table ads_sale_tm_category1_stat_mn; create external table ads_sale_tm_category1_stat_mn ( tm_id string comment \'品牌 id\', category1_id string comment \'1 级品类 id \', category1_name string comment \'1 级品类名称 \', buycount bigint comment \'购买人数\', buy_twice_last bigint comment \'两次以上购买人数\', buy_twice_last_ratio decimal(10,2) comment \'单次复购率\', buy_3times_last bigint comment \'三次以上购买人数\', buy_3times_last_ratio decimal(10,2) comment \'多次复购率\', stat_mn string comment \'统计月份\', stat_date string comment \'统计日期\' ) COMMENT \'复购率统计\' row format delimited fields terminated by \'\t\' location \'/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/\';
2)数据导入
insert into table ads_sale_tm_category1_stat_mn select mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name, sum(if(mn.order_count>=1,1,0)) buycount, sum(if(mn.order_count>=2,1,0)) buyTwiceLast, sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0)) buyTwiceLastRatio, sum(if(mn.order_count>=3,1,0)) buy3timeLast , sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0)) buy3timeLastRatio , date_format(\'2020-03-10\' ,\'yyyy-MM\') stat_mn, \'2020-03-10\' stat_date from ( select user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name, sum(order_count) order_count from dws_sale_detail_daycount sd where date_format(dt,\'yyyy-MM\')=date_format(\'2020-03-10\' ,\'yyyy-MM\') group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name ) mn group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;
7.5 ADS 层导入脚本
1)vim dwt_to_ads.sh
在脚本中填写如下内容
#!/bin/bash hive=/opt/modules/hive/bin/hive # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 if [ -n "$1" ] ;then do_date=$1 else do_date=`date -d "-1 day" +%F` fi sql="use gmall; insert into table ads_uv_count select \'$do_date\', sum(if(login_date_last=\'$do_date\',1,0)), sum(if(login_date_last>=date_add(next_day(\'$do_date\',\'monday\'),-7) and login_date_last<=date_add(next_day(\'$do_date\',\'monday\'),-1) ,1,0)), sum(if(date_format(login_date_last,\'yyyy-MM\')=date_format(\'$do_date\',\'yyyy-M M\'),1,0)), if(\'$do_date\'=date_add(next_day(\'$do_date\',\'monday\'),-1),\'Y\',\'N\'), if(\'$do_date\'=last_day(\'$do_date\'),\'Y\',\'N\') from dwt_uv_topic; insert into table ads_new_mid_count select \'$do_date\', count(*) from dwt_uv_topic where login_date_first=\'$do_date\'; insert into table ads_silent_count select \'$do_date\', count(*) from dwt_uv_topic where login_date_first=login_date_last and login_date_last<=date_add(\'$do_date\',-7); insert into table ads_back_count select \'$do_date\', concat(date_add(next_day(\'2020-03-10\',\'MO\'),-7),\'_\',date_add(next_day(\'2020- 03-10\',\'MO\'),-1)), count(*) from ( select mid_id from dwt_uv_topic where login_date_last>=date_add(next_day(\'$do_date\',\'MO\'),-7) and login_date_last<= date_add(next_day(\'$do_date\',\'MO\'),-1) and login_date_first<date_add(next_day(\'$do_date\',\'MO\'),-7) )current_wk left join ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day(\'$do_date\',\'MO\'),-7*2) and dt<= date_add(next_day(\'$do_date\',\'MO\'),-7-1) group by mid_id )last_wk on current_wk.mid_id=last_wk.mid_id where last_wk.mid_id is null; insert into table ads_wastage_count select \'$do_date\', count(*) from dwt_uv_topic where login_date_last<=date_add(\'$do_date\',-7); insert into table ads_user_retention_day_rate select \'$do_date\', date_add(\'$do_date\',-3), 3, sum(if(login_date_first=date_add(\'$do_date\',-3) and login_date_last=\'$do_date\',1,0)), sum(if(login_date_first=date_add(\'$do_date\',-3),1,0)), sum(if(login_date_first=date_add(\'$do_date\',-3) and login_date_last=\'$do_date\',1,0))/sum(if(login_date_first=date_add(\'$do_date\', -3),1,0))*100 from dwt_uv_topic union all select \'$do_date\', date_add(\'$do_date\',-2), 2, sum(if(login_date_first=date_add(\'$do_date\',-2) and login_date_last=\'$do_date\',1,0)), sum(if(login_date_first=date_add(\'$do_date\',-2),1,0)), sum(if(login_date_first=date_add(\'$do_date\',-2) and login_date_last=\'$do_date\',1,0))/sum(if(login_date_first=date_add(\'$do_date\', -2),1,0))*100 from dwt_uv_topic union all select \'$do_date\', date_add(\'$do_date\',-1), 1, sum(if(login_date_first=date_add(\'$do_date\',-1) and login_date_last=\'$do_date\',1,0)), sum(if(login_date_first=date_add(\'$do_date\',-1),1,0)), sum(if(login_date_first=date_add(\'$do_date\',-1) and login_date_last=\'$do_date\',1,0))/sum(if(login_date_first=date_add(\'$do_date\', -1),1,0))*100 from dwt_uv_topic; insert into table ads_continuity_wk_count select \'$do_date\', concat(date_add(next_day(\'$do_date\',\'MO\'),-7*3),\'_\',date_add(next_day(\'$do_d ate\',\'MO\'),-1)), count(*) from ( select mid_id from ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day(\'$do_date\',\'monday\'),-7) and dt<=date_add(next_day(\'$do_date\',\'monday\'),-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day(\'$do_date\',\'monday\'),-7*2) and dt<=date_add(next_day(\'$do_date\',\'monday\'),-7-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day(\'$do_date\',\'monday\'),-7*3) and dt<=date_add(next_day(\'$do_date\',\'monday\'),-7*2-1) group by mid_id )t1 group by mid_id having count(*)=3 )t2; insert into table ads_continuity_uv_count select \'$do_date\', concat(date_add(\'$do_date\',-6),\'_\',\'$do_date\'), count(*) from ( select mid_id from ( select mid_id from ( select mid_id, date_sub(dt,rank) date_dif from ( select mid_id, dt, rank() over(partition by mid_id order by dt) rank from dws_uv_detail_daycount where dt>=date_add(\'$do_date\',-6) and dt<=\'$do_date\' )t1 )t2 group by mid_id,date_dif having count(*)>=3 )t3 group by mid_id )t4; insert into table ads_user_topic select \'$do_date\', sum(if(login_date_last=\'$do_date\',1,0)), sum(if(login_date_first=\'$do_date\',1,0)), sum(if(payment_date_first=\'$do_date\',1,0)), sum(if(payment_count>0,1,0)), count(*), sum(if(login_date_last=\'$do_date\',1,0))/count(*), sum(if(payment_count>0,1,0))/count(*), sum(if(login_date_first=\'$do_date\',1,0))/sum(if(login_date_last=\'$do_date\',1, 0)) from dwt_user_topic; insert into table ads_user_action_convert_day select \'$do_date\', uv.day_count, ua.cart_count, ua.cart_count/uv.day_count*100 visitor2cart_convert_ratio, ua.order_count, ua.order_count/ua.cart_count*100 visitor2order_convert_ratio, ua.payment_count, ua.payment_count/ua.order_count*100 order2payment_convert_ratio from ( select \'$do_date\' dt, sum(if(cart_count>0,1,0)) cart_count, sum(if(order_count>0,1,0)) order_count, sum(if(payment_count>0,1,0)) payment_count from dws_user_action_daycount where dt=\'$do_date\' )ua join ads_uv_count uv on uv.dt=ua.dt; insert into table ads_product_info select \'$do_date\' dt, sku_num, spu_num from ( select \'$do_date\' dt, count(*) sku_num from dwt_sku_topic ) tmp_sku_num join ( select \'$do_date\' dt, count(*) spu_num from ( select spu_id from dwt_sku_topic group by spu_id ) tmp_spu_id ) tmp_spu_num on tmp_sku_num.dt=tmp_spu_num.dt; insert into table ads_product_sale_topN select \'$do_date\', sku_id, payment_amount from dws_sku_action_daycount where dt=\'$do_date\' order by payment_amount desc limit 10; insert into table ads_product_favor_topN select \'$do_date\', sku_id, favor_count from dws_sku_action_daycount where dt=\'$do_date\' order by favor_count limit 10; insert into table ads_product_cart_topN select \'$do_date\' dt, sku_id, cart_num from dws_sku_action_daycount where dt=\'$do_date\' order by cart_num limit 10; insert into table ads_product_refund_topN select \'$do_date\', sku_id, refund_last_30d_count/payment_last_30d_count*100 refund_ratio from dwt_sku_topic order by refund_ratio desc limit 10; insert into table ads_appraise_bad_topN select \'$do_date\' dt, sku_id, appraise_bad_count/(appraise_bad_count+appraise_good_count+appraise_mid_coun t+appraise_default_count)*100 appraise_bad_ratio from dws_sku_action_daycount where dt=\'$do_date\' order by appraise_bad_ratio desc limit 10; insert into table ads_order_daycount select \'$do_date\', sum(order_count), sum(order_amount), sum(if(order_count>0,1,0)) from dws_user_action_daycount where dt=\'$do_date\'; insert into table ads_payment_daycount select tmp_payment.dt, tmp_payment.payment_count, tmp_payment.payment_amount, tmp_payment.payment_user_count, tmp_skucount.payment_sku_count, tmp_time.payment_avg_time from ( select \'$do_date\' dt, sum(payment_count) payment_count, sum(payment_amount) payment_amount, sum(if(payment_count>0,1,0)) payment_user_count from dws_user_action_daycount where dt=\'$do_date\' )tmp_payment join ( select \'$do_date\' dt, sum(if(payment_count>0,1,0)) payment_sku_count from dws_sku_action_daycount where dt=\'$do_date\' )tmp_skucount on tmp_payment.dt=tmp_skucount.dt join ( select \'$do_date\' dt, sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60 payment_avg_time from dwd_fact_order_info where dt=\'$do_date\' and payment_time is not null )tmp_time on tmp_payment.dt=tmp_time.dt; insert into table ads_sale_tm_category1_stat_mn select mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name, sum(if(mn.order_count>=1,1,0)) buycount, sum(if(mn.order_count>=2,1,0)) buyTwiceLast, sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0)) buyTwiceLastRatio, sum(if(mn.order_count>=3,1,0)) buy3timeLast , sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0)) buy3timeLastRatio , date_format(\'$do_date\' ,\'yyyy-MM\') stat_mn, \'$do_date\' stat_date from ( select user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name, sum(order_count) order_count from dws_sale_detail_daycount sd where date_format(dt,\'yyyy-MM\')=date_format(\'$do_date\' ,\'yyyy-MM\') group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name ) mn group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name; " $hive -e "$sql"
2)增加脚本执行权限
chmod 770 dwt_to_ads.sh
3)执行脚本导入数据
dwt_to_ads.sh 2020-03-10