hangdali
import requests 
import re
import pymysql
import struct
import socket
from bs4 import BeautifulSoup
db     = pymysql.connect(\'localhost\',\'root\',\'oracle\',\'xici_proxy\',use_unicode=True, charset="utf8")
cursor = db.cursor()
cursor.execute("DROP TABLE IF EXISTS IPLIST")
sql    = \'\'\'create table iplist(
            id INT NOT NULL AUTO_INCREMENT,
            ip long,
            port int,
            address char(40),
            anony char(20),
            protocol char(20),
            speed char(40),
            time char(40),
            PRIMARY KEY ( id )
            );
        \'\'\'
cursor.execute(sql)
for page in range(1,3):
    url=\'http://www.xicidaili.com/nn/{}\'.format(page)
    headers={\'User-Agent\':\'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36\'}
    data=requests.get(url=url,headers=headers).text
    #print (url,requests.get(url=url,headers=headers).status_code)
    soup=BeautifulSoup(data,\'html.parser\')
    trs1 = soup.find(\'table\',id=\'ip_list\')
    #print (trs1)
    trs = trs1.find_all(\'tr\')
    for tr in trs[1:]:
        tds = tr.find_all(\'td\')
        if tds[1].find(\'img\') is None :
            nation = \'未知\'
            locate = \'未知\'
        else:
            nation =   tds[1].find(\'img\')[\'alt\'].strip()
            locate  =   tds[4].text.strip()
        ip      =   tds[1].text.strip()
        ip_num=p=socket.ntohl(struct.unpack("I",socket.inet_aton(ip))[0])
        port    =   tds[2].text.strip()
        address =   tds[3].text.strip()
        anony   =   tds[4].text.strip()
        protocol=   tds[5].text.strip()
        speed   =   tds[6].find(\'div\')[\'title\'].strip()
        time    =   tds[9].text.strip()
        sql1    =\'\'\'insert into iplist
                    (ip,port,address,anony,protocol,speed,time)
                    values({},{},\'{}\',\'{}\',\'{}\',\'{}\',\'{}\');\'\'\'.format(ip_num,port,address,anony,protocol,speed,time)
        print (sql1)
        try:
            cursor.execute(sql1)
            db.commit()
        except:
            db.rollback()
            print(\'回滚\')
            print (\'ip:\',ip_num,\'port:\',port,\'address:\',address,\'anony:\',anony,\'protocol:\',protocol,\'speed:\',speed,\'time:\',time)
db.close()

 

分类:

技术点:

相关文章: