192.168.0.6 日志服务器端脚本

 1 [root@localhost hadoop]# cat hadoop_log.sh 
 2 #!/bin/bash
 3 #Date:2013-09-17
 4 #Created by ZhangLuYa
 5 #Info:hadoop log
 6 
 7 source /etc/profile
 8 Path="/www/logs/back"
 9 echo " " > ${Path}/hadoop/new/newlog.log
10 Yesterday=`date --date="1 days ago" +%Y%m%d`
11 echo "awk start time:$(date +'%F %T')"
12 /bin/awk -f ${Path}/hadoop/log.awk ${Path}/stat-z.xywy.com_${Yesterday}.log
13 echo "awk end time:$(date +'%F %T')"
14 
15 if [ -e ${Path}/hadoop/new/newlog.tar.gz ];then
16    echo "tar start time:$(date +'%F %T')"
17    rm -rf ${Path}/hadoop/new/newlog.tar.gz
18    tar -czvPf ${Path}/hadoop/new/newlog.tar.gz ${Path}/hadoop/new/newlog.log
19    echo "tar end time:$(date +'%F %T')"
20 else
21    echo "tar start time:$(date +'%F %T')"
22    tar -czvPf ${Path}/hadoop/new/newlog.tar.gz ${Path}/hadoop/new/newlog.log
23    echo "tar end time:$(date +'%F %T')"
24 
25 fi
26 
27 chmod -R 777 ${Path}/hadoop/new

awk 对nginx日志处理脚本:

  1 [root@localhost hadoop]# cat log.awk 
  2 #!/bin/awk -f
  3 function getMonth(month_en)
  4 {
  5         if(month_en=="Jan")return "01"
  6         if(month_en=="Feb")return "02"
  7         if(month_en=="Mar")return "03"
  8         if(month_en=="Apr")return "04"
  9         if(month_en=="May")return "05"
 10         if(month_en=="Jun")return "06"
 11         if(month_en=="Jul")return "07"
 12         if(month_en=="Aug")return "08"
 13         if(month_en=="Sep")return "09"
 14         if(month_en=="Oct")return "10"
 15         if(month_en=="Nov")return "11"
 16         if(month_en=="Dec")return "12"
 17     return ""
 18 }
 19 
 20 function changeDate(date)
 21 {
 22     n = split(date,a1,"/")
 23     m = split(a1[3],a2,":")
 24     day = substr(a1[1],2);
 25     month = getMonth(a1[2])
 26     year = a2[1]
 27     hour = a2[2]
 28     minuts = a2[3]
 29     seconds = a2[4]
 30     return year"-"month"-"day" "hour":"minuts":"seconds
 31 }
 32 
 33 function getRedoman(s)
 34 {
 35     if(index(s,"redoman")!=0)
 36         {                    
 37             r1 = index(s,"redoman=")        
 38             r2 = index(s,"&")
 39             if(r2!=0)
 40             {
 41                 offset = r2-r1-8
 42                 s2=substr(s,16,offset)
 43             }
 44             return s2
 45         }
 46         else    
 47         {    
 48             return ""
 49         }
 50 }
 51 
 52 function getBrowser(s)
 53 {    
 54     str = "not found"
 55     if(index(s,"Mozilla/")!=0&&index(s,"AppleWebKit/")!=0&&index(s,"Safari/")!=0&&index(s,"Chrome/")!=0)str ="Chrome"
 56     if(index(s,"Mozilla/")!=0&&index(s,"AppleWebKit/")!=0&&index(s,"Safari/")!=0&&index(s,"/")!=0)str ="Chrome"
 57     if(index(s,"TaoBrowser")!=0)str ="TaoBao"
 58         if(index(s,"LBBROWSER")!=0){str = "LieBao"}
 59         if(index(s,"QQBrowser")!=0)str = "QQ"
 60         if(index(s,"360SE")!=0)str = "360"
 61     if(index(s,"Opera")!=0)str = "Opera"
 62         if(index(s,"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1")!=0)str = "360"
 63         if(index(s,"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1")!=0)str = "360"
 64     if(index(s,"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0")!=0)str = "Sougou"
 65         if(index(s,"Firefox/")!=0)str = "Firefox"
 66         if(index(s,"andorid")!=0)str = "andorid"
 67         if(index(s,"Andorid")!=0)str = "Andorid"
 68     if(index(s,"UC")!=0)str = "UC"
 69     if(index(s,"WAP")!=0)str = "WAP_Browser"
 70     if(index(s,"Ipad")!=0)str = "iPad"
 71     if(index(s,"iPad")!=0)str = "iPad"
 72     if(index(s,"iPhone")!=0)str = "iPhone"
 73         if(index(s,"MSIE 6.0")!=0)str = "MSIE 6.0"
 74         if(index(s,"MSIE 7.0")!=0)str = "MSIE 7.0"
 75         if(index(s,"MSIE 8.0")!=0)str = "MSIE 8.0"
 76     if(index(s,"MSIE 9.0")!=0)str = "MSIE 9.0"
 77     return str
 78 }
 79 
 80 function isSpider(s)
 81 {    
 82     str = "0"
 83     if(index(s,"spider")!=0)str ="1"
 84     if(index(s,"bot")!=0)str ="1"
 85     if(index(s,"Yahoo! Slurp")!=0)str ="1"
 86     return str
 87 }
 88 
 89 
 90 function getP(s,key,lastkey){    
 91     result = ""
 92     if(index(s,key)!=0)
 93     {
 94         r1=index(s,key)
 95         s=substr(s,r1+length(key))
 96         r2=index(s,lastkey)-1
 97         if(r2!=0&&r2>1)
 98         {
 99             result=substr(s,1,r2)
100         }
101     }
102     return result
103 }
104 
105 function rindex(string,find)
106 {
107     ns = length(string)
108     nf = length(find)
109     for(k=ns+1-nf;k>=1;k--)
110     {    
111         if(substr(string,k,nf)==find)return k
112     }
113     return 0
114 }
115 
116 function getPageId(s)
117 {    
118     if(index(s,".htm")!=0)
119     {
120         if(rindex(s,"/")!=0)
121         {    
122             index1 = index(s,".htm")
123             index2 = rindex(s,"/")
124             off = index1-index2-1
125             return substr(s,index2+1,off)
126         }
127     }
128 }
129 
130 BEGIN{
131     remoteip = ""
132     date = ""
133     redoman = ""
134     ref = ""
135     url = ""
136     brower = ""
137     pageid=""
138 }
139 {
140     if(index($7,"z.png")!=0)
141     {    
142         remoteip = $1
143         date = changeDate($4)
144         dt = substr(date,1,index(date," ")-1)
145         redoman = getRedoman($7)
146         url=getP($7,"AcT=","AcT")
147         browser = getBrowser($0)
148         pageid = getPageId($11)
149         sid = getP($7,"Sid=","Sid")
150         pid = getP($7,"Pid=","Pid")
151         clientID = getP($7,"clientID=","clientID")
152         loginID = getP($7,"loginID=","&")
153         spider= isSpider($0)
154         ref = getP($7,"ref=","&clientID=")
155         x=($1"|"date"|"redoman"|"url"|"browser"|"pageid"|"sid"|"pid"|"clientID"|"loginID"|"spider"|"ref"|"dt"|"$0)
156         print x>>"/www/logs/back/hadoop/new/newlog.log"
157     }
158     else
159     {
160         
161     }
162 }


10.0.1.53服务器脚本备份

 1 [root@CHN-BJ-03-DR720-Hadoop shell]# cat hadoop_log.sh 
 2 #!/bin/bash
 3 #Author:ZhangLuYa
 4 #Date:2013-09-23
 5 
 6 SRC="/data/logs/source_log/newlog.tar.gz"
 7 DST="/data/logs/hadoop_log/"
 8 
 9 if [ -e ${DST}today.log ];then
10    echo ${DST}today.log
11    rm -rf ${DST}today.log
12    rm -rf ${DST}newlog.tar.gz
13    /bin/cp ${SRC} ${DST}
14    #tar zvfx /data/logs/hadoop_log/newlog.tar.gz -C /data/logs/hadoop_log/
15    tar zvfx ${DST}newlog.tar.gz -C ${DST}
16    /bin/mv ${DST}www/logs/back/hadoop/new/newlog.log ${DST}today.log
17    rm -rf ${DST}www
18    rm -rf ${DST}newlog.tar.gz
19 else
20    /bin/cp ${SRC} ${DST}
21    #tar zvfx /data/logs/hadoop_log/newlog.tar.gz -C /data/logs/hadoop_log/
22    tar zvfx ${DST}newlog.tar.gz -C ${DST}
23    /bin/mv ${DST}www/logs/back/hadoop/new/newlog.log ${DST}today.log
24    rm -rf ${DST}www
25 fi
26 chmod -R hadoop:hadoop ${DST}
27 chown -R 777 ${DST}   

hive脚本备份:

1 [root@CHN-BJ-03-DR720-Hadoop shell]# cat sql_hive.sh 
2 #!/bin/bash
3 #Info:hive sql
4 #Set Hadoop Path
5 export HADOOP_HOME=/usr/local/hadoop
6 export PATH=$PATH:$HADOOP_HOME/bin
7 export HADOOP_HOME_WARN_SUPPRESS=1
8 /usr/local/hive-0.10.0/bin/hive -f '/data/logs/shell/hive.sql';


hive.sql备份:

 1 [root@CHN-BJ-03-DR720-Hadoop shell]# cat hive.sql 
 2 load data local inpath "/data/logs/hadoop_log/today.log" into table pv;
 3 load data local inpath "/data/logs/hadoop_log/today.log" overwrite into table pv_temp;
 4 insert overwrite table pv_day_temp  
 5 select count(*),date_sub(from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1) from pv_temp;
 6 add jar /usr/local/hive-0.10.0/lib/hive-contrib-0.10.0.jar;
 7 add jar /usr/local/hive-0.10.0/lib/mysql-connector-java-5.1.26-bin.jar;
 8 create temporary function dboutput as 'org.apache.hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput';
 9 select dboutput
10 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO pv_day(pv,time)VALUES(?,?)',pv,time) from  pv_day_temp;
11 insert overwrite table uv_temp  
12 select count(distinct clientID), date_sub (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1) from pv_temp;
13 select dboutput
14 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO uv_day(uv,time)VALUES(?,?)',uv,time) from  uv_temp;
15 insert overwrite table ip_temp  
16 select count(distinct remoteip), date_sub (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1) from pv_temp;
17 select dboutput
18 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO ip_day(ipnum,time)VALUES(?,?)',ip,time) from  ip_temp;
19 insert overwrite table pid_pv  
20 select count(pid)num,pid, date_sub
21 (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1)from pv_temp group by pid having pid!='' order by num desc limit 10;
22 select dboutput
23 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO pv_pid_day(pv,pid,time)VALUES(?,?,?)', pv,pid,time) from pid_pv;
24 insert overwrite table pid_uv  
25 select count(pid)num,pid, date_sub
26 (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1) from pv_temp group by pid,clientID having pid!='' order by num desc limit 10;
27 select dboutput
28 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO uv_pid_day(uv,pid,time)VALUES(?,?,?)', uv,pid,time) from pid_uv;
29 insert overwrite table sid_pv  
30 select count(sid)num,sid, date_sub
31 (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1)from pv_temp group by sid having sid!='' order by num desc limit 10;
32 select dboutput
33 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO pv_sid_day(pv,sid,time)VALUES(?,?,?)', pv,sid,time) from sid_pv;
34 insert overwrite table sid_uv  
35 select count(sid)num,sid, date_sub
36 (from_unixtime(unix_timestamp(),'yyyy-MM-dd'),1) from pv_temp group by sid,clientID having sid!='' order by num desc limit 10;
37 select dboutput
38 ('jdbc:mysql://10.0.1.53/reports','zhangyi_hd4','oVQrij#Xp5kS@yb.mT','INSERT INTO uv_sid_day(uv,sid,time)VALUES(?,?,?)', uv,sid,time) from sid_uv;

 

 

 

 

相关文章: