192.168.0.6 日志服务器端脚本
1 [root@localhost hadoop]# cat hadoop_log.sh 2 #!/bin/bash 3 #Date:2013-09-17 4 #Created by ZhangLuYa 5 #Info:hadoop log 6 7 source /etc/profile 8 Path="/www/logs/back" 9 echo " " > ${Path}/hadoop/new/newlog.log 10 Yesterday=`date --date="1 days ago" +%Y%m%d` 11 echo "awk start time:$(date +'%F %T')" 12 /bin/awk -f ${Path}/hadoop/log.awk ${Path}/stat-z.xywy.com_${Yesterday}.log 13 echo "awk end time:$(date +'%F %T')" 14 15 if [ -e ${Path}/hadoop/new/newlog.tar.gz ];then 16 echo "tar start time:$(date +'%F %T')" 17 rm -rf ${Path}/hadoop/new/newlog.tar.gz 18 tar -czvPf ${Path}/hadoop/new/newlog.tar.gz ${Path}/hadoop/new/newlog.log 19 echo "tar end time:$(date +'%F %T')" 20 else 21 echo "tar start time:$(date +'%F %T')" 22 tar -czvPf ${Path}/hadoop/new/newlog.tar.gz ${Path}/hadoop/new/newlog.log 23 echo "tar end time:$(date +'%F %T')" 24 25 fi 26 27 chmod -R 777 ${Path}/hadoop/new
awk 对nginx日志处理脚本:
1 [root@localhost hadoop]# cat log.awk 2 #!/bin/awk -f 3 function getMonth(month_en) 4 { 5 if(month_en=="Jan")return "01" 6 if(month_en=="Feb")return "02" 7 if(month_en=="Mar")return "03" 8 if(month_en=="Apr")return "04" 9 if(month_en=="May")return "05" 10 if(month_en=="Jun")return "06" 11 if(month_en=="Jul")return "07" 12 if(month_en=="Aug")return "08" 13 if(month_en=="Sep")return "09" 14 if(month_en=="Oct")return "10" 15 if(month_en=="Nov")return "11" 16 if(month_en=="Dec")return "12" 17 return "" 18 } 19 20 function changeDate(date) 21 { 22 n = split(date,a1,"/") 23 m = split(a1[3],a2,":") 24 day = substr(a1[1],2); 25 month = getMonth(a1[2]) 26 year = a2[1] 27 hour = a2[2] 28 minuts = a2[3] 29 seconds = a2[4] 30 return year"-"month"-"day" "hour":"minuts":"seconds 31 } 32 33 function getRedoman(s) 34 { 35 if(index(s,"redoman")!=0) 36 { 37 r1 = index(s,"redoman=") 38 r2 = index(s,"&") 39 if(r2!=0) 40 { 41 offset = r2-r1-8 42 s2=substr(s,16,offset) 43 } 44 return s2 45 } 46 else 47 { 48 return "" 49 } 50 } 51 52 function getBrowser(s) 53 { 54 str = "not found" 55 if(index(s,"Mozilla/")!=0&&index(s,"AppleWebKit/")!=0&&index(s,"Safari/")!=0&&index(s,"Chrome/")!=0)str ="Chrome" 56 if(index(s,"Mozilla/")!=0&&index(s,"AppleWebKit/")!=0&&index(s,"Safari/")!=0&&index(s,"/")!=0)str ="Chrome" 57 if(index(s,"TaoBrowser")!=0)str ="TaoBao" 58 if(index(s,"LBBROWSER")!=0){str = "LieBao"} 59 if(index(s,"QQBrowser")!=0)str = "QQ" 60 if(index(s,"360SE")!=0)str = "360" 61 if(index(s,"Opera")!=0)str = "Opera" 62 if(index(s,"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1")!=0)str = "360" 63 if(index(s,"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1")!=0)str = "360" 64 if(index(s,"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0")!=0)str = "Sougou" 65 if(index(s,"Firefox/")!=0)str = "Firefox" 66 if(index(s,"andorid")!=0)str = "andorid" 67 if(index(s,"Andorid")!=0)str = "Andorid" 68 if(index(s,"UC")!=0)str = "UC" 69 if(index(s,"WAP")!=0)str = "WAP_Browser" 70 if(index(s,"Ipad")!=0)str = "iPad" 71 if(index(s,"iPad")!=0)str = "iPad" 72 if(index(s,"iPhone")!=0)str = "iPhone" 73 if(index(s,"MSIE 6.0")!=0)str = "MSIE 6.0" 74 if(index(s,"MSIE 7.0")!=0)str = "MSIE 7.0" 75 if(index(s,"MSIE 8.0")!=0)str = "MSIE 8.0" 76 if(index(s,"MSIE 9.0")!=0)str = "MSIE 9.0" 77 return str 78 } 79 80 function isSpider(s) 81 { 82 str = "0" 83 if(index(s,"spider")!=0)str ="1" 84 if(index(s,"bot")!=0)str ="1" 85 if(index(s,"Yahoo! Slurp")!=0)str ="1" 86 return str 87 } 88 89 90 function getP(s,key,lastkey){ 91 result = "" 92 if(index(s,key)!=0) 93 { 94 r1=index(s,key) 95 s=substr(s,r1+length(key)) 96 r2=index(s,lastkey)-1 97 if(r2!=0&&r2>1) 98 { 99 result=substr(s,1,r2) 100 } 101 } 102 return result 103 } 104 105 function rindex(string,find) 106 { 107 ns = length(string) 108 nf = length(find) 109 for(k=ns+1-nf;k>=1;k--) 110 { 111 if(substr(string,k,nf)==find)return k 112 } 113 return 0 114 } 115 116 function getPageId(s) 117 { 118 if(index(s,".htm")!=0) 119 { 120 if(rindex(s,"/")!=0) 121 { 122 index1 = index(s,".htm") 123 index2 = rindex(s,"/") 124 off = index1-index2-1 125 return substr(s,index2+1,off) 126 } 127 } 128 } 129 130 BEGIN{ 131 remoteip = "" 132 date = "" 133 redoman = "" 134 ref = "" 135 url = "" 136 brower = "" 137 pageid="" 138 } 139 { 140 if(index($7,"z.png")!=0) 141 { 142 remoteip = $1 143 date = changeDate($4) 144 dt = substr(date,1,index(date," ")-1) 145 redoman = getRedoman($7) 146 url=getP($7,"AcT=","AcT") 147 browser = getBrowser($0) 148 pageid = getPageId($11) 149 sid = getP($7,"Sid=","Sid") 150 pid = getP($7,"Pid=","Pid") 151 clientID = getP($7,"clientID=","clientID") 152 loginID = getP($7,"loginID=","&") 153 spider= isSpider($0) 154 ref = getP($7,"ref=","&clientID=") 155 x=($1"|"date"|"redoman"|"url"|"browser"|"pageid"|"sid"|"pid"|"clientID"|"loginID"|"spider"|"ref"|"dt"|"$0) 156 print x>>"/www/logs/back/hadoop/new/newlog.log" 157 } 158 else 159 { 160 161 } 162 }
10.0.1.53服务器脚本备份
1 [root@CHN-BJ-03-DR720-Hadoop shell]# cat hadoop_log.sh 2 #!/bin/bash 3 #Author:ZhangLuYa 4 #Date:2013-09-23 5 6 SRC="/data/logs/source_log/newlog.tar.gz" 7 DST="/data/logs/hadoop_log/" 8 9 if [ -e ${DST}today.log ];then 10 echo ${DST}today.log 11 rm -rf ${DST}today.log 12 rm -rf ${DST}newlog.tar.gz 13 /bin/cp ${SRC} ${DST} 14 #tar zvfx /data/logs/hadoop_log/newlog.tar.gz -C /data/logs/hadoop_log/ 15 tar zvfx ${DST}newlog.tar.gz -C ${DST} 16 /bin/mv ${DST}www/logs/back/hadoop/new/newlog.log ${DST}today.log 17 rm -rf ${DST}www 18 rm -rf ${DST}newlog.tar.gz 19 else 20 /bin/cp ${SRC} ${DST} 21 #tar zvfx /data/logs/hadoop_log/newlog.tar.gz -C /data/logs/hadoop_log/ 22 tar zvfx ${DST}newlog.tar.gz -C ${DST} 23 /bin/mv ${DST}www/logs/back/hadoop/new/newlog.log ${DST}today.log 24 rm -rf ${DST}www 25 fi 26 chmod -R hadoop:hadoop ${DST} 27 chown -R 777 ${DST}
hive脚本备份:
1 [root@CHN-BJ-03-DR720-Hadoop shell]# cat sql_hive.sh 2 #!/bin/bash 3 #Info:hive sql 4 #Set Hadoop Path 5 export HADOOP_HOME=/usr/local/hadoop 6 export PATH=$PATH:$HADOOP_HOME/bin 7 export HADOOP_HOME_WARN_SUPPRESS=1 8 /usr/local/hive-0.10.0/bin/hive -f '/data/logs/shell/hive.sql';
hive.sql备份:
1 [root@CHN-BJ-03-DR720-Hadoop shell]# cat hive.sql 2 load data local inpath "/data/logs/hadoop_log/today.log" into table pv; 3 load data local inpath "/data/logs/hadoop_log/today.log" overwrite into table pv_temp; 4 insert overwrite table pv_day_temp 5 select count(*),date_sub(from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1) from pv_temp; 6 add jar /usr/local/hive-0.10.0/lib/hive-contrib-0.10.0.jar; 7 add jar /usr/local/hive-0.10.0/lib/mysql-connector-java-5.1.26-bin.jar; 8 create temporary function dboutput as 'org.apache.hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput'; 9 select dboutput 10 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO pv_day(pv,time)VALUES(?,?)',pv,time) from pv_day_temp; 11 insert overwrite table uv_temp 12 select count(distinct clientID), date_sub (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1) from pv_temp; 13 select dboutput 14 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO uv_day(uv,time)VALUES(?,?)',uv,time) from uv_temp; 15 insert overwrite table ip_temp 16 select count(distinct remoteip), date_sub (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1) from pv_temp; 17 select dboutput 18 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO ip_day(ipnum,time)VALUES(?,?)',ip,time) from ip_temp; 19 insert overwrite table pid_pv 20 select count(pid)num,pid, date_sub 21 (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1)from pv_temp group by pid having pid!='' order by num desc limit 10; 22 select dboutput 23 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO pv_pid_day(pv,pid,time)VALUES(?,?,?)', pv,pid,time) from pid_pv; 24 insert overwrite table pid_uv 25 select count(pid)num,pid, date_sub 26 (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1) from pv_temp group by pid,clientID having pid!='' order by num desc limit 10; 27 select dboutput 28 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO uv_pid_day(uv,pid,time)VALUES(?,?,?)', uv,pid,time) from pid_uv; 29 insert overwrite table sid_pv 30 select count(sid)num,sid, date_sub 31 (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1)from pv_temp group by sid having sid!='' order by num desc limit 10; 32 select dboutput 33 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO pv_sid_day(pv,sid,time)VALUES(?,?,?)', pv,sid,time) from sid_pv; 34 insert overwrite table sid_uv 35 select count(sid)num,sid, date_sub 36 (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1) from pv_temp group by sid,clientID having sid!='' order by num desc limit 10; 37 select dboutput 38 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO uv_sid_day(uv,sid,time)VALUES(?,?,?)', uv,sid,time) from sid_uv;