onlyou13
 1 # coding: utf-8
 2 
 3 import urllib2
 4 import re
 5 import time
 6 
 7 def getDL(page):
 8     url = \'http://www.xicidaili.com/nt/{}\'.format(page)
 9     header = {
10         \'User-Agent\':\'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36\'
11     }
12 
13     req=urllib2.Request(url, headers=header)
14     res=urllib2.urlopen(req)
15     html=res.read()
16 
17     srclist=re.findall(r\'<tr class=(.|\n)*?<td>(\d+\.\d+\.\d+\.\d+)</td>(.|\n)*?<td>(\d+)</td>(.|\n)*?<td>(HTTP|HTTPS)</td>\', html)
18     xlist = []
19     for item in srclist:
20         xlist.append((item[5],item[1],item[3]))
21     return xlist
22 
23 def testDL(ipstr):
24     proxy= urllib2.ProxyHandler({\'http\':"{}:{}".format(ipstr[1], ipstr[2])})
25     opener=urllib2.build_opener(proxy)
26     urllib2.install_opener(opener)
27 
28     try:
29         testUrl = \'http://httpbin.org/ip\'
30         testUrl = \'http://2017.ip138.com/ic.asp\'
31         req=urllib2.Request(testUrl)
32         res=urllib2.urlopen(req).read()
33         print "********************* √ {}    -- {}".format(ipstr, res)
34 
35         with open("ok.txt","a") as f:
36             f.write("{} {} {}\n".format(ipstr[0], ipstr[1], ipstr[2]))
37             f.close()
38     except Exception as e:
39         print "******** ×, {} -- {}".format(ipstr, e)
40     time.sleep(1)
41 
42 def startTask():
43     for page in xrange(5):
44         list=getDL(page+1)
45         for item in list:
46             testDL(item)
47 
48 if __name__ == \'__main__\':
49     startTask()

 

分类:

技术点:

相关文章: