huanghanyu

一、数仓搭建 - ADS 层
1.1 设备主题
1.1.1 活跃设备数(日、周、月)

需求定义:
日活:当日活跃的设备数
周活:当周活跃的设备数
月活:当月活跃的设备数
1)建表语句

drop table if exists ads_uv_count;
create external table ads_uv_count(
`dt` string COMMENT \'统计日期\',
`day_count` bigint COMMENT \'当日用户数量\',
`wk_count` bigint COMMENT \'当周用户数量\',
`mn_count` bigint COMMENT \'当月用户数量\',
`is_weekend` string COMMENT \'Y,N 是否是周末,用于得到本周最终结果\',
`is_monthend` string COMMENT \'Y,N 是否是月末,用于得到本月最终结果\'
) COMMENT \'活跃设备数\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_uv_count/\';

2)导入数据

insert into table ads_uv_count
select
\'2020-03-10\' dt,
daycount.ct,
wkcount.ct,
mncount.ct,
if(date_add(next_day(\'2020-03-10\',\'MO\'),-1)=\'2020-03-10\',\'Y\',\'N\') ,
if(last_day(\'2020-03-10\')=\'2020-03-10\',\'Y\',\'N\')
from
(
select
\'2020-03-10\' dt,
count(*) ct
from dwt_uv_topic
where login_date_last=\'2020-03-10\'
)daycount join
(
select
\'2020-03-10\' dt,
count (*) ct
from dwt_uv_topic
where login_date_last>=date_add(next_day(\'2020-03-10\',\'MO\'),-7)
and login_date_last<= date_add(next_day(\'2020-03-10\',\'MO\'),-1)
) wkcount on daycount.dt=wkcount.dt
join
(
select
\'2020-03-10\' dt,
count (*) ct
from dwt_uv_topic
where
date_format(login_date_last,\'yyyy-MM\')=date_format(\'2020-03-10\',\'yyyy-MM\')
)mncount on daycount.dt=mncount.dt;

3)查询导入结果

1.1.2 每日新增设备
1)建表语句

drop table if exists ads_new_mid_count;
create external table ads_new_mid_count
(
`create_date` string comment \'创建时间\' ,
`new_mid_count` BIGINT comment \'新增设备数量\'
) COMMENT \'每日新增设备信息数量\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_new_mid_count/\';

2)导入数据

insert into table ads_new_mid_count
select
login_date_first,
count(*)
from dwt_uv_topic
where login_date_first=\'2020-03-10\'
group by login_date_first;

3)查询导入数据

select * from ads_new_mid_count;

1.1.3 沉默用户数
需求定义:
沉默用户:只在安装当天启动过,且启动时间是在 7 天前
1)建表语句

drop table if exists ads_silent_count;
create external table ads_silent_count(
`dt` string COMMENT \'统计日期\',
`silent_count` bigint COMMENT \'沉默设备数\'
)
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_silent_count\';

2)导入 2020-03-20 数据

insert into table ads_silent_count
select
\'2020-03-15\',
count(*)
from dwt_uv_topic
where login_date_first=login_date_last
and login_date_last<=date_add(\'2020-03-15\',-7);

3)查询导入数据

select * from ads_silent_count;

1.1.4 本周回流用户数

需求定义:
本周回流用户:上周未活跃,本周活跃的设备,且不是本周新增设备
1)建表语句

drop table if exists ads_back_count;
create external table ads_back_count(
`dt` string COMMENT \'统计日期\',
`wk_dt` string COMMENT \'统计日期所在周\',
`wastage_count` bigint COMMENT \'回流设备数\'
)
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_back_count\';

2)导入数据:

insert into table ads_back_count
select
\'2020-03-15\',
count(*)
from
(
select
mid_id
from dwt_uv_topic
where login_date_last>=date_add(next_day(\'2020-03-15\',\'MO\'),-7)
and login_date_last<= date_add(next_day(\'2020-03-15\',\'MO\'),-1)
and login_date_first<date_add(next_day(\'2020-03-15\',\'MO\'),-7)
)current_wk
left join
(
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day(\'2020-03-15\',\'MO\'),-7*2)
and dt<= date_add(next_day(\'2020-03-15\',\'MO\'),-7-1)
group by mid_id
)last_wk
on current_wk.mid_id=last_wk.mid_id
where last_wk.mid_id is null;

3)查询结果

select * from ads_back_count;

1.1.5 流失用户数
需求定义:
流失用户:最近 7 天未活跃的设备
1)建表语句

drop table if exists ads_wastage_count;
create external table ads_wastage_count(
`dt` string COMMENT \'统计日期\',
`wastage_count` bigint COMMENT \'流失设备数\'
)
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_wastage_count\';

2)导入 2020-03-20 数据

insert into table ads_wastage_count
select
\'2020-03-20\',
count(*)
from
(
select
mid_id
from dwt_uv_topic
where login_date_last<=date_add(\'2020-03-20\',-7)
group by mid_id
)t1;

3)查询结果

select * from ads_wastage_count;

1.1.6 留存率



1)建表语句

drop table if exists ads_user_retention_day_rate;
create external table ads_user_retention_day_rate
(
`stat_date` string comment \'统计日期\',
`create_date` string comment \'设备新增日期\',
`retention_day` int comment \'截止当前日期留存天数\',
`retention_count` bigint comment \'留存数量\',
`new_mid_count` bigint comment \'设备新增数量\',
`retention_ratio` decimal(10,2) comment \'留存率\'
) COMMENT \'每日用户留存情况\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_user_retention_day_rate/\';

2)导入数据

insert into table ads_user_retention_day_rate
select
\'2020-03-10\',--统计日期
date_add(\'2020-03-10\',-1),--新增日期
1,--留存天数
sum(if(login_date_first=date_add(\'2020-03-10\',-1) and
login_date_last=\'2020-03-10\',1,0)),--2020-03-09 的 1 日留存数
sum(if(login_date_first=date_add(\'2020-03-10\',-1),1,0)),--2020-03-09 新增
sum(if(login_date_first=date_add(\'2020-03-10\',-1) and
login_date_last=\'2020-03-10\',1,0))/sum(if(login_date_first=date_add(\'2020-03-10\',-
1),1,0))*100
from dwt_uv_topic
union all
select
\'2020-03-10\',--统计日期
date_add(\'2020-03-10\',-2),--新增日期
2,--留存天数
sum(if(login_date_first=date_add(\'2020-03-10\',-2) and
login_date_last=\'2020-03-10\',1,0)),--2020-03-08 的 2 日留存数
sum(if(login_date_first=date_add(\'2020-03-10\',-2),1,0)),--2020-03-08 新增
sum(if(login_date_first=date_add(\'2020-03-10\',-2) and
login_date_last=\'2020-03-10\',1,0))/sum(if(login_date_first=date_add(\'2020-03-10\',-
2),1,0))*100
from dwt_uv_topic
union all
select
\'2020-03-10\',--统计日期
date_add(\'2020-03-10\',-3),--新增日期
3,--留存天数
sum(if(login_date_first=date_add(\'2020-03-10\',-3) and
login_date_last=\'2020-03-10\',1,0)),--2020-03-07 的 3 日留存数
sum(if(login_date_first=date_add(\'2020-03-10\',-3),1,0)),--2020-03-07 新增
sum(if(login_date_first=date_add(\'2020-03-10\',-3) and
login_date_last=\'2020-03-10\',1,0))/sum(if(login_date_first=date_add(\'2020-03-10\',-
3),1,0))*100
from dwt_uv_topic;

3)查询导入数据

select * from ads_user_retention_day_rate;

1.1.7 最近连续三周活跃用户数

1)建表语句

drop table if exists ads_continuity_wk_count;
create external table ads_continuity_wk_count(
`dt` string COMMENT \'统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日
期\',
`wk_dt` string COMMENT \'持续时间\',
`continuity_count` bigint COMMENT \'活跃次数\'
)
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_continuity_wk_count\';

2)导入 2020-03-20 所在周的数据

insert into table ads_continuity_wk_count
select
\'2020-03-15\',
concat(date_add(next_day(\'2020-03-15\',\'MO\'),-7*3),\'_\',date_add(next_day(\'
2020-03-15\',\'MO\'),-1)),
count(*)
from
(
select
mid_id
from
(
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day(\'2020-03-10\',\'monday\'),-7)
and dt<=date_add(next_day(\'2020-03-10\',\'monday\'),-1)
group by mid_id
union all
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day(\'2020-03-10\',\'monday\'),-7*2)
and dt<=date_add(next_day(\'2020-03-10\',\'monday\'),-7-1)
group by mid_id
union all
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day(\'2020-03-10\',\'monday\'),-7*3)
and dt<=date_add(next_day(\'2020-03-10\',\'monday\'),-7*2-1)
group by mid_id
)t1
group by mid_id
having count(*)=3
)t2

3)查询

select * from ads_continuity_wk_count;

1.1.8 最近七天内连续三天活跃用户数

1)建表语句

drop table if exists ads_continuity_uv_count;
create external table ads_continuity_uv_count(
`dt` string COMMENT \'统计日期\',
`wk_dt` string COMMENT \'最近 7 天日期\',
`continuity_count` bigint
) COMMENT \'连续活跃设备数\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_continuity_uv_count\';

2)写出导入数据的 SQL 语句

insert into table ads_continuity_uv_count
select
\'2020-03-12\',
concat(date_add(\'2020-03-12\',-6),\'_\',\'2020-03-12\'),
count(*)
from
(
select mid_id
from
(
select mid_id
from
(
select
mid_id,
date_sub(dt,rank) date_dif
from
(
select
mid_id,
dt,
rank() over(partition by mid_id order by dt) rank
from dws_uv_detail_daycount
where dt>=date_add(\'2020-03-12\',-6) and
dt<=\'2020-03-12\'
)t1
)t2
group by mid_id,date_dif
having count(*)>=3
)t3
group by mid_id
)t4;

7.2 会员主题
7.2.1 会员主题信息

1)建表

drop table if exists ads_user_topic;
create external table ads_user_topic(
`dt` string COMMENT \'统计日期\',
`day_users` string COMMENT \'活跃会员数\',
`day_new_users` string COMMENT \'新增会员数\',
`day_new_payment_users` string COMMENT \'新增消费会员数\',
`payment_users` string COMMENT \'总付费会员数\',
`users` string COMMENT \'总会员数\',
`day_users2users` decimal(10,2) COMMENT \'会员活跃率\',
`payment_users2users` decimal(10,2) COMMENT \'会员付费率\',
`day_new_users2users` decimal(10,2) COMMENT \'会员新鲜度\'
) COMMENT \'会员主题信息表\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_user_topic\';

2)导入数据

insert into table ads_user_topic
select
\'2020-03-10\',
sum(if(login_date_last=\'2020-03-10\',1,0)),
sum(if(login_date_first=\'2020-03-10\',1,0)),
sum(if(payment_date_first=\'2020-03-10\',1,0)),
sum(if(payment_count>0,1,0)),
count(*),
sum(if(login_date_last=\'2020-03-10\',1,0))/count(*),
sum(if(payment_count>0,1,0))/count(*),
sum(if(login_date_first=\'2020-03-10\',1,0))/sum(if(login_date_last=\'2020-03-10\',1,0))
from dwt_user_topic

3)查询数据

hive (gmall)> select * from ads_user_topic;

4)vim ads_user_topic.sh
添加如下内容:

#!/bin/bash
APP=gmall
hive=/opt/modules/hive/bin/hive
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
with
tmp_day_users as
(
select
\'$do_date\' dt,
count(*) day_users
from
${APP}.dwt_user_topic
where
login_date_last=\'$do_date\'
),
tmp_day_new_users as
(
select
\'$do_date\' dt,
count(*) day_new_users
from
${APP}.dwt_user_topic
where
login_date_last=\'$do_date\' and login_date_first=\'$do_date\'
),
tmp_day_new_payment_users as
(
select
\'$do_date\' dt,
count(*) day_new_payment_users
from
${APP}.dwt_user_topic
where
payment_date_first=\'$do_date\'
),
tmp_payment_users as
(
select
\'$do_date\' dt,
count(*) payment_users
from
${APP}.dwt_user_topic
where
payment_date_first is not null
),
tmp_users as
(
select
\'$do_date\' dt,
count(*) users
from
${APP}.dwt_user_topic
tmp_users
)
insert into table ${APP}.ads_user_topic
select
\'$do_date\' dt,
day_users,
day_new_users,
day_new_payment_users,
payment_users,
users,
day_users/users,
payment_users/users,
day_new_users/users
from
tmp_day_users
join
tmp_day_new_users
on
tmp_day_users.dt=tmp_day_new_users.dt
join
tmp_day_new_payment_users
on
tmp_day_users.dt=tmp_day_new_payment_users.dt
join
tmp_payment_users
on
tmp_day_users.dt=tmp_payment_users.dt
join
tmp_users
on
tmp_day_users.dt=tmp_users.dt;
"
$hive -e "$sql"

5)增加脚本执行权限

chmod 770 ads_user_topic.sh

6)执行脚本导入数据

ads_user_topic.sh 2020-03-11

7)查看导入数据

select * from ads_user_topic;

7.2.2 漏斗分析

统计“浏览->购物车->下单->支付”的转化率
思路:统计各个行为的人数,然后计算比值
1)建表语句

drop table if exists ads_user_action_convert_day;
create external table ads_user_action_convert_day(
`dt` string COMMENT \'统计日期\',
`total_visitor_m_count` bigint COMMENT \'总访问人数\',
`cart_u_count` bigint COMMENT \'加入购物车的人数\',
`visitor2cart_convert_ratio` decimal(10,2) COMMENT \'访问到加入购物车转化率\',
`order_u_count` bigint COMMENT \'下单人数\',
`cart2order_convert_ratio` decimal(10,2) COMMENT \'加入购物车到下单转化率\',
`payment_u_count` bigint COMMENT \'支付人数\',
`order2payment_convert_ratio` decimal(10,2) COMMENT \'下单到支付的转化率\'
) COMMENT \'用户行为漏斗分析\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_user_action_convert_day/\';

2)数据装载

insert into table ads_user_action_convert_day
select
\'2020-03-10\',
uv.day_count,
ua.cart_count,
cast(ua.cart_count/uv.day_count as decimal(10,2)) visitor2cart_convert_ratio,
ua.order_count,
cast(ua.order_count/ua.cart_count as decimal(10,2)) visitor2order_convert_ratio,
ua.payment_count,
cast(ua.payment_count/ua.order_count as decimal(10,2)) order2payment_convert_ratio
from
(
select
dt,
sum(if(cart_count>0,1,0)) cart_count,
sum(if(order_count>0,1,0)) order_count,
sum(if(payment_count>0,1,0)) payment_count
from dws_user_action_daycount
where dt=\'2020-03-10\'
group by dt
)ua join ads_uv_count uv on uv.dt=ua.dt; 

3)查询加载数据

select * from ads_user_action_convert_day;

7.3 商品主题
7.3.1 商品个数信息
1)建表语句

drop table if exists ads_product_info;
create external table ads_product_info(
`dt` string COMMENT \'统计日期\',
`sku_num` string COMMENT \'sku 个数\',
`spu_num` string COMMENT \'spu 个数\'
) COMMENT \'商品个数信息\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_product_info\';

2)导入数据

insert into table ads_product_info
select
\'2020-03-10\' dt,
sku_num,
spu_num
from
(
select
\'2020-03-10\' dt,
count(*) sku_num
from
dwt_sku_topic
) tmp_sku_num
join
(
select
\'2020-03-10\' dt,
count(*) spu_num
from
(
select
spu_id
from
dwt_sku_topic
group by
spu_id
) tmp_spu_id
) tmp_spu_num
on
tmp_sku_num.dt=tmp_spu_num.dt;

3)查询结果数据

select * from ads_product_info;

7.3.2 商品销量排名

1)建表语句

drop table if exists ads_product_sale_topN;
create external table ads_product_sale_topN(
`dt` string COMMENT \'统计日期\',
`sku_id` string COMMENT \'商品 ID\',
`payment_amount` bigint COMMENT \'销量\'
) COMMENT \'商品个数信息\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_product_sale_topN\';

2)导入数据

insert into table ads_product_sale_topN
select
\'2020-03-10\' dt,
sku_id,
payment_amount
from
dws_sku_action_daycount
where
dt=\'2020-03-10\'
order by payment_amount desc
limit 10;

3)查询结果数据

select * from ads_product_sale_topN;

7.3.3 商品收藏排名

1)建表语句

drop table if exists ads_product_favor_topN;
create external table ads_product_favor_topN(
`dt` string COMMENT \'统计日期\',
`sku_id` string COMMENT \'商品 ID\',
`favor_count` bigint COMMENT \'收藏量\'
) COMMENT \'商品收藏 TopN\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_product_favor_topN\';

2)导入数据

insert into table ads_product_favor_topN
select
\'2020-03-10\' dt,
sku_id,
favor_count
from
dws_sku_action_daycount
where
dt=\'2020-03-10\'
order by favor_count desc
limit 10;

3)查询数据

select * from ads_product_favor_topN;

7.3.4 商品加入购物车排名

1)建表语句

drop table if exists ads_product_cart_topN;
create external table ads_product_cart_topN(
`dt` string COMMENT \'统计日期\',
`sku_id` string COMMENT \'商品 ID\',
`cart_num` bigint COMMENT \'加入购物车数量\'
) COMMENT \'商品加入购物车 TopN\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_product_cart_topN\';

2)导入数据

insert into table ads_product_cart_topN
select
\'2020-03-10\' dt,
sku_id,
cart_num
from
dws_sku_action_daycount
where
dt=\'2020-03-10\'
order by cart_num desc
limit 10;

3)查询数据

select * from ads_product_cart_topN;

7.3.5 商品退款率排名(最近 30 天)

1)建表语句

drop table if exists ads_product_refund_topN;
create external table ads_product_refund_topN(
`dt` string COMMENT \'统计日期\',
`sku_id` string COMMENT \'商品 ID\',
`refund_ratio` decimal(10,2) COMMENT \'退款率\'
) COMMENT \'商品退款率 TopN\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_product_refund_topN\';

2)导入数据

insert into table ads_product_refund_topN
select
\'2020-03-10\',
sku_id,
refund_last_30d_count/payment_last_30d_count*100 refund_ratio
from dwt_sku_topic
order by refund_ratio desc
limit 10;

3)查询数据

select * from ads_product_refund_topN;

7.3.6 商品差评率
1)建表语句

drop table if exists ads_appraise_bad_topN;
create external table ads_appraise_bad_topN(
`dt` string COMMENT \'统计日期\',
`sku_id` string COMMENT \'商品 ID\',
`appraise_bad_ratio` decimal(10,2) COMMENT \'差评率\'
) COMMENT \'商品差评率 TopN\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_appraise_bad_topN\';

2)导入数据

insert into table ads_appraise_bad_topN
select
\'2020-03-10\' dt,
sku_id,
appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_coun
t+appraise_default_count) appraise_bad_ratio
from
dws_sku_action_daycount
where
dt=\'2020-03-10\'
order by appraise_bad_ratio desc
limit 10;

3)查询数据

select * from ads_appraise_bad_topN;

7.4 营销主题(用户+商品+购买行为)
7.4.1 下单数目统计
需求分析:统计每日下单数,下单金额及下单用户数
1)建表语句

drop table if exists ads_order_daycount;
create external table ads_order_daycount(
dt string comment \'统计日期\',
order_count bigint comment \'单日下单笔数\',
order_amount decimal(10,2) comment \'单日下单金额\',
order_users bigint comment \'单日下单用户数\'
) comment \'每日订单总计表\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_order_daycount\';

2)导入数据

insert into table ads_order_daycount
select
\'2020-03-10\',
sum(order_count),
sum(order_amount),
sum(if(order_count>0,1,0))
from dws_user_action_daycount
where dt=\'2020-03-10\';

3)查询数据

select * from ads_order_daycount;

7.4.2 支付信息统计
每日支付金额、支付人数、支付商品数、支付笔数以及下单到支付的平均时长(取自 DWD)
1)建表

drop table if exists ads_payment_daycount;
create external table ads_payment_daycount(
dt string comment \'统计日期\',
payment_count bigint comment \'单日支付笔数\',
payment_amount decimal(10,2) comment \'单日支付金额\',
payment_user_count bigint comment \'单日支付人数\',
payment_sku_count bigint comment \'单日支付商品数\',
payment_avg_time double comment \'下单到支付的平均时长,取分钟数\'
) comment \'每日订单总计表\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_payment_daycount\';

2)导入数据

insert into table ads_payment_daycount
select
tmp_payment.dt,
tmp_payment.payment_count,
tmp_payment.payment_amount,
tmp_payment.payment_user_count,
tmp_skucount.payment_sku_count,
tmp_time.payment_avg_time
from
(
select
\'2020-03-15\' dt,
sum(payment_count) payment_count,
sum(payment_amount) payment_amount,
sum(if(payment_count>0,1,0)) payment_user_count
from dws_user_action_daycount
where dt=\'2020-03-15\'
)tmp_payment
join
(
select
\'2020-03-15\' dt,
sum(if(payment_count>0,1,0)) payment_sku_count
from dws_sku_action_daycount
where dt=\'2020-03-15\'
)tmp_skucount on tmp_payment.dt=tmp_skucount.dt
join
(
select
\'2020-03-15\' dt,
sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60
payment_avg_time
from dwd_fact_order_info
where dt=\'2020-03-15\'
and payment_time is not null
)tmp_time on tmp_payment.dt=tmp_time.dt

3)查询数据

select * from ads_payment_daycount;

7.4.3 复购率
1)建表语句

drop table ads_sale_tm_category1_stat_mn;
create external table ads_sale_tm_category1_stat_mn
(
tm_id string comment \'品牌 id\',
category1_id string comment \'1 级品类 id \',
category1_name string comment \'1 级品类名称 \',
buycount bigint comment \'购买人数\',
buy_twice_last bigint comment \'两次以上购买人数\',
buy_twice_last_ratio decimal(10,2) comment \'单次复购率\',
buy_3times_last bigint comment \'三次以上购买人数\',
buy_3times_last_ratio decimal(10,2) comment \'多次复购率\',
stat_mn string comment \'统计月份\',
stat_date string comment \'统计日期\'
) COMMENT \'复购率统计\'
row format delimited fields terminated by \'\t\'
location \'/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/\';

2)数据导入

insert into table ads_sale_tm_category1_stat_mn
select
mn.sku_tm_id,
mn.sku_category1_id,
mn.sku_category1_name,
sum(if(mn.order_count>=1,1,0)) buycount,
sum(if(mn.order_count>=2,1,0)) buyTwiceLast,
sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0))
buyTwiceLastRatio,
sum(if(mn.order_count>=3,1,0)) buy3timeLast ,
sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0))
buy3timeLastRatio ,
date_format(\'2020-03-10\' ,\'yyyy-MM\') stat_mn,
\'2020-03-10\' stat_date
from
(
        select
        user_id,
        sd.sku_tm_id,
        sd.sku_category1_id,
        sd.sku_category1_name,
        sum(order_count) order_count
        from dws_sale_detail_daycount sd
        where date_format(dt,\'yyyy-MM\')=date_format(\'2020-03-10\' ,\'yyyy-MM\')
        group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name
) mn
group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;

7.5 ADS 层导入脚本

1)vim dwt_to_ads.sh
在脚本中填写如下内容

#!/bin/bash
hive=/opt/modules/hive/bin/hive
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="use gmall;
insert into table ads_uv_count
select
\'$do_date\',
sum(if(login_date_last=\'$do_date\',1,0)),
sum(if(login_date_last>=date_add(next_day(\'$do_date\',\'monday\'),-7) and
login_date_last<=date_add(next_day(\'$do_date\',\'monday\'),-1) ,1,0)),
sum(if(date_format(login_date_last,\'yyyy-MM\')=date_format(\'$do_date\',\'yyyy-M
M\'),1,0)),
if(\'$do_date\'=date_add(next_day(\'$do_date\',\'monday\'),-1),\'Y\',\'N\'),
if(\'$do_date\'=last_day(\'$do_date\'),\'Y\',\'N\')
from dwt_uv_topic;


insert into table ads_new_mid_count
select
\'$do_date\',
count(*)
from dwt_uv_topic
where login_date_first=\'$do_date\';


insert into table ads_silent_count
select
\'$do_date\',
count(*)
from dwt_uv_topic
where login_date_first=login_date_last
and login_date_last<=date_add(\'$do_date\',-7);


insert into table ads_back_count
select
\'$do_date\',
concat(date_add(next_day(\'2020-03-10\',\'MO\'),-7),\'_\',date_add(next_day(\'2020-
03-10\',\'MO\'),-1)),
count(*)
from
(
select
mid_id
from dwt_uv_topic
where login_date_last>=date_add(next_day(\'$do_date\',\'MO\'),-7)
and login_date_last<= date_add(next_day(\'$do_date\',\'MO\'),-1)
and login_date_first<date_add(next_day(\'$do_date\',\'MO\'),-7)
)current_wk
left join
(
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day(\'$do_date\',\'MO\'),-7*2)
and dt<= date_add(next_day(\'$do_date\',\'MO\'),-7-1)
group by mid_id
)last_wk
on current_wk.mid_id=last_wk.mid_id
where last_wk.mid_id is null;


insert into table ads_wastage_count
select
\'$do_date\',
count(*)
from dwt_uv_topic
where login_date_last<=date_add(\'$do_date\',-7);


insert into table ads_user_retention_day_rate
select
\'$do_date\',
date_add(\'$do_date\',-3),
3,
sum(if(login_date_first=date_add(\'$do_date\',-3) and
login_date_last=\'$do_date\',1,0)),
sum(if(login_date_first=date_add(\'$do_date\',-3),1,0)),
sum(if(login_date_first=date_add(\'$do_date\',-3) and
login_date_last=\'$do_date\',1,0))/sum(if(login_date_first=date_add(\'$do_date\',
-3),1,0))*100
from dwt_uv_topic
union all
select
\'$do_date\',
date_add(\'$do_date\',-2),
2,
sum(if(login_date_first=date_add(\'$do_date\',-2) and
login_date_last=\'$do_date\',1,0)),
sum(if(login_date_first=date_add(\'$do_date\',-2),1,0)),
sum(if(login_date_first=date_add(\'$do_date\',-2) and
login_date_last=\'$do_date\',1,0))/sum(if(login_date_first=date_add(\'$do_date\',
-2),1,0))*100
from dwt_uv_topic
union all
select
\'$do_date\',
date_add(\'$do_date\',-1),
1,
sum(if(login_date_first=date_add(\'$do_date\',-1) and
login_date_last=\'$do_date\',1,0)),
sum(if(login_date_first=date_add(\'$do_date\',-1),1,0)),
sum(if(login_date_first=date_add(\'$do_date\',-1) and
login_date_last=\'$do_date\',1,0))/sum(if(login_date_first=date_add(\'$do_date\',
-1),1,0))*100
from dwt_uv_topic;


insert into table ads_continuity_wk_count
select
\'$do_date\',
concat(date_add(next_day(\'$do_date\',\'MO\'),-7*3),\'_\',date_add(next_day(\'$do_d
ate\',\'MO\'),-1)),
count(*)
from
(
select
mid_id
from
(
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day(\'$do_date\',\'monday\'),-7)
and dt<=date_add(next_day(\'$do_date\',\'monday\'),-1)
group by mid_id
union all
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day(\'$do_date\',\'monday\'),-7*2)
and dt<=date_add(next_day(\'$do_date\',\'monday\'),-7-1)
group by mid_id
union all
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day(\'$do_date\',\'monday\'),-7*3)
and dt<=date_add(next_day(\'$do_date\',\'monday\'),-7*2-1)
group by mid_id
)t1
group by mid_id
having count(*)=3
)t2;


insert into table ads_continuity_uv_count
select
\'$do_date\',
concat(date_add(\'$do_date\',-6),\'_\',\'$do_date\'),
count(*)
from
(
select mid_id
from
(
select mid_id
from
(
select
mid_id,
date_sub(dt,rank) date_dif
from
(
select
mid_id,
dt,
rank() over(partition by mid_id order by dt) rank
from dws_uv_detail_daycount
where dt>=date_add(\'$do_date\',-6) and dt<=\'$do_date\'
)t1
)t2
group by mid_id,date_dif
having count(*)>=3
)t3
group by mid_id
)t4;


insert into table ads_user_topic
select
\'$do_date\',
sum(if(login_date_last=\'$do_date\',1,0)),
sum(if(login_date_first=\'$do_date\',1,0)),
sum(if(payment_date_first=\'$do_date\',1,0)),
sum(if(payment_count>0,1,0)),
count(*),
sum(if(login_date_last=\'$do_date\',1,0))/count(*),
sum(if(payment_count>0,1,0))/count(*),
sum(if(login_date_first=\'$do_date\',1,0))/sum(if(login_date_last=\'$do_date\',1,
0))
from dwt_user_topic;


insert into table ads_user_action_convert_day
select
\'$do_date\',
uv.day_count,
ua.cart_count,
ua.cart_count/uv.day_count*100 visitor2cart_convert_ratio,
ua.order_count,
ua.order_count/ua.cart_count*100 visitor2order_convert_ratio,
ua.payment_count,
ua.payment_count/ua.order_count*100 order2payment_convert_ratio
from
(
select
\'$do_date\' dt,
sum(if(cart_count>0,1,0)) cart_count,
sum(if(order_count>0,1,0)) order_count,
sum(if(payment_count>0,1,0)) payment_count
from dws_user_action_daycount
where dt=\'$do_date\'
)ua join ads_uv_count uv on uv.dt=ua.dt;


insert into table ads_product_info
select
\'$do_date\' dt,
sku_num,
spu_num
from
(
select
\'$do_date\' dt,
count(*) sku_num
from
dwt_sku_topic
) tmp_sku_num
join
(
select
\'$do_date\' dt,
count(*) spu_num
from
(
select
spu_id
from
dwt_sku_topic
group by
spu_id
) tmp_spu_id
) tmp_spu_num
on tmp_sku_num.dt=tmp_spu_num.dt;


insert into table ads_product_sale_topN
select
\'$do_date\',
sku_id,
payment_amount
from dws_sku_action_daycount
where dt=\'$do_date\'
order by payment_amount desc
limit 10;


insert into table ads_product_favor_topN
select
\'$do_date\',
sku_id,
favor_count
from dws_sku_action_daycount
where dt=\'$do_date\'
order by favor_count
limit 10;


insert into table ads_product_cart_topN
select
\'$do_date\' dt,
sku_id,
cart_num
from dws_sku_action_daycount
where dt=\'$do_date\'
order by cart_num
limit 10;


insert into table ads_product_refund_topN
select
\'$do_date\',
sku_id,
refund_last_30d_count/payment_last_30d_count*100 refund_ratio
from dwt_sku_topic
order by refund_ratio desc
limit 10;


insert into table ads_appraise_bad_topN
select
\'$do_date\' dt,
sku_id,
appraise_bad_count/(appraise_bad_count+appraise_good_count+appraise_mid_coun
t+appraise_default_count)*100 appraise_bad_ratio
from dws_sku_action_daycount
where dt=\'$do_date\'
order by appraise_bad_ratio desc
limit 10;


insert into table ads_order_daycount
select
\'$do_date\',
sum(order_count),
sum(order_amount),
sum(if(order_count>0,1,0))
from dws_user_action_daycount
where dt=\'$do_date\';


insert into table ads_payment_daycount
select
tmp_payment.dt,
tmp_payment.payment_count,
tmp_payment.payment_amount,
tmp_payment.payment_user_count,
tmp_skucount.payment_sku_count,
tmp_time.payment_avg_time
from
(
select
\'$do_date\' dt,
sum(payment_count) payment_count,
sum(payment_amount) payment_amount,
sum(if(payment_count>0,1,0)) payment_user_count
from dws_user_action_daycount
where dt=\'$do_date\'
)tmp_payment
join
(
select
\'$do_date\' dt,
sum(if(payment_count>0,1,0)) payment_sku_count
from dws_sku_action_daycount
where dt=\'$do_date\'
)tmp_skucount on tmp_payment.dt=tmp_skucount.dt
join
(
select
\'$do_date\' dt,
sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60
payment_avg_time
from dwd_fact_order_info
where dt=\'$do_date\'
and payment_time is not null
)tmp_time on tmp_payment.dt=tmp_time.dt;


insert into table ads_sale_tm_category1_stat_mn
select
mn.sku_tm_id,
mn.sku_category1_id,
mn.sku_category1_name,
sum(if(mn.order_count>=1,1,0)) buycount,
sum(if(mn.order_count>=2,1,0)) buyTwiceLast,
sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0))
buyTwiceLastRatio,
sum(if(mn.order_count>=3,1,0)) buy3timeLast ,
sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0))
buy3timeLastRatio ,
date_format(\'$do_date\' ,\'yyyy-MM\') stat_mn,
\'$do_date\' stat_date
from
(
select
user_id,
sd.sku_tm_id,
sd.sku_category1_id,
sd.sku_category1_name,
sum(order_count) order_count
from dws_sale_detail_daycount sd
where date_format(dt,\'yyyy-MM\')=date_format(\'$do_date\' ,\'yyyy-MM\')
group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name
) mn
group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;
"
$hive -e "$sql"

2)增加脚本执行权限

chmod 770 dwt_to_ads.sh

3)执行脚本导入数据

dwt_to_ads.sh  2020-03-10

 

分类:

技术点:

相关文章:

  • 2021-09-25
  • 2021-11-24
  • 2021-09-25
  • 2021-12-10
  • 2021-11-12
  • 2021-10-05
  • 2021-10-01
  • 2021-09-11
猜你喜欢
  • 2021-09-25
  • 2021-09-25
  • 2021-09-25
  • 2021-10-26
  • 2021-10-26
  • 2021-09-25
  • 2021-09-25
相关资源
相似解决方案