五一的三天假期期间,ETL逻辑上出现了一些问题,导致每天需要装入DW的增量数据没有按设计装入。所以需要在ETL后对产生的增量数据进行检查,避免出现某天的增量数据出现丢失后自己被动的处理问题。
需求:如当天ETL的增量数据出现问题,需要让两方面的人或程序(DBA与BI)知晓并及时处理。
1. 在ODS服务器上建立如下库、表:
CREATE DATABASE `imm` /*!40100 DEFAULT CHARACTER SET utf8 */;
-- item_info记录不同项目名称与项目内的抽查表(前提:此表每天均有数据产生),以后如有新项目,只需在此表中添加记录
CREATE TABLE `item_info` (
`id` int(11) DEFAULT NULL,
`itemname` varchar(64) DEFAULT NULL,
`tabNameChk` char(32) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- icc表记录每个项目的检查时间与是否成功ETL的状态标志
CREATE TABLE `icc` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`entertime` datetime DEFAULT NULL,
`itemname` varchar(64) DEFAULT NULL,
`flag` smallint(6) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- item_info记录不同项目名称与项目内的抽查表(前提:此表每天均有数据产生),以后如有新项目,只需在此表中添加记录
CREATE TABLE `item_info` (
`id` int(11) DEFAULT NULL,
`itemname` varchar(64) DEFAULT NULL,
`tabNameChk` char(32) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- icc表记录每个项目的检查时间与是否成功ETL的状态标志
CREATE TABLE `icc` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`entertime` datetime DEFAULT NULL,
`itemname` varchar(64) DEFAULT NULL,
`flag` smallint(6) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
2.建立定时检查的procedure与event:
CREATE PROCEDURE cc_pro()
BEGIN
DECLARE v_max smallint;
DECLARE v_title bigint;
DECLARE v_itemN char(32);
DECLARE v_tab char(64);
DECLARE i smallint;
SET i=1;
SELECT max(id) INTO v_max FROM imm.item_info;
WHILE i <= v_max DO
SELECT itemname,tabNameChk INTO v_itemN,v_tab FROM imm.item_info WHERE id=i;
SET @dbname=v_itemN;
SET @tabname=v_tab;
SET @s_name = CONCAT(@dbname,'.',@tabname);
SET @tempsql = concat('SELECT count(db_id) INTO @v_title FROM ',@s_name,';');
PREPARE _stmt FROM @tempsql;
EXECUTE _stmt;
DEALLOCATE PREPARE _stmt;
IF(@v_title > 0) THEN
INSERT INTO imm.icc(entertime,itemname,flag) VALUES(now(),v_itemN,1);
ELSE
INSERT INTO imm.icc(entertime,itemname,flag) VALUES(now(),v_itemN,0);
END IF;
SET i=i+1;
END WHILE;
END
BEGIN
DECLARE v_max smallint;
DECLARE v_title bigint;
DECLARE v_itemN char(32);
DECLARE v_tab char(64);
DECLARE i smallint;
SET i=1;
SELECT max(id) INTO v_max FROM imm.item_info;
WHILE i <= v_max DO
SELECT itemname,tabNameChk INTO v_itemN,v_tab FROM imm.item_info WHERE id=i;
SET @dbname=v_itemN;
SET @tabname=v_tab;
SET @s_name = CONCAT(@dbname,'.',@tabname);
SET @tempsql = concat('SELECT count(db_id) INTO @v_title FROM ',@s_name,';');
PREPARE _stmt FROM @tempsql;
EXECUTE _stmt;
DEALLOCATE PREPARE _stmt;
IF(@v_title > 0) THEN
INSERT INTO imm.icc(entertime,itemname,flag) VALUES(now(),v_itemN,1);
ELSE
INSERT INTO imm.icc(entertime,itemname,flag) VALUES(now(),v_itemN,0);
END IF;
SET i=i+1;
END WHILE;
END
定时调用procedure,在此之前检查事件调度器是否开启:
+-----------------+-------+
| Variable_name | Value |
+-----------------+-------+
| event_scheduler | ON |
+-----------------+-------+
| Variable_name | Value |
+-----------------+-------+
| event_scheduler | ON |
+-----------------+-------+
CREATE EVENT `ccpro_exec` ON SCHEDULE
EVERY 1 DAY STARTS '2011-05-05 07:00:00'
ON COMPLETION NOT PRESERVE
ENABLE
COMMENT '在ETL执行完成后进行增量数据质量检查'
DO BEGIN
call imm.cc_pro2;
END
EVERY 1 DAY STARTS '2011-05-05 07:00:00'
ON COMPLETION NOT PRESERVE
ENABLE
COMMENT '在ETL执行完成后进行增量数据质量检查'
DO BEGIN
call imm.cc_pro2;
END
以上事件的作用是每天7点钟检视一次各项目的增量情况
3.接下来可以随便写个shell脚本,作用是对当天的检查结果进行邮件发送(以前我喜欢发送到移动139邮箱,不过时下有太多的邮箱提供手机绑定功能。所以象我这样偷懒的可以只发封邮件。)
#!/bin/bash
#scriptname:icc_result.sh
#author:liuming
#datetime:2011-05-05
function send_message(){
mysql -u$dbuser -p$passwd -D$dbname -e'select * from icc where date(entertime)=date(now());' > $r_txt
mail -s "`date +%Y-%m-%d_%H:%M` ETL(increment data) status " $to_email <$r_txt
}
function main(){
r_txt="`pwd`/icc_result.txt"
dbuser='xxxx'
passwd='xxxx'
dbname='imm'
tabname='icc'
to_email='xxxx@xxx.com'
send_message
}
main
#scriptname:icc_result.sh
#author:liuming
#datetime:2011-05-05
function send_message(){
mysql -u$dbuser -p$passwd -D$dbname -e'select * from icc where date(entertime)=date(now());' > $r_txt
mail -s "`date +%Y-%m-%d_%H:%M` ETL(increment data) status " $to_email <$r_txt
}
function main(){
r_txt="`pwd`/icc_result.txt"
dbuser='xxxx'
passwd='xxxx'
dbname='imm'
tabname='icc'
to_email='xxxx@xxx.com'
send_message
}
main
4.对于app方面,可提供相关表名,让其在计算之前进行判断。此处省去N个字...