contiune

python+requests+pyquery+threading 多线程测试代理可用不可用

import requests
from requests.exceptions import RequestException #请求出现异常的模块
from pyquery import PyQuery as pq #解析模块
import threading #多线程模块
from queue import Queue #队列模块
class ProXiesAD(threading.Thread): #创建一个线程类
    def __init__(self,set_queue):
        super(ProXiesAD, self).__init__()
        self.set_queue = set_queue
    def run(self):
        global KY_HOST
        while not PARSE_EXIT: #循环取队列一直将队列取空
            try:
                host = self.set_queue.get()
                proxies = {\'https\':host,\'http\':host}
                get_code = requests.get(url=\'https://www.baidu.com/\',proxies=proxies,timeout=5,headers=headers).status_code #返回请求百度的状态码
            except RequestException:
                print("不可用")
            else:
                if get_code == 200:
                    KY_HOST.append(host)
                    print(\'可用\'+host)
                else:
                    print(\'请求超时\')
        print(KY_HOST)
headers = {\'User-Agent\':\'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36\'} #
HOST = []
KY_HOST = []
PARSE_EXIT = False
def url_text(url):
    try:
        texts = requests.get(url,headers=headers)
    except RequestException:
        print(\'请求错误\')
    else:
        if texts.status_code==200: #状态码为200说明请求成功
            return texts.text
def re_response(text):
    global HOST#全局变量声明
    doc = pq(text)
    for i in doc(\'tr\').items():
        ip = i(\'tr td:lt(3)\').text().replace(\' \',"",1).replace(\' \',\':\')#解析
        if ip.find("."):
            HOST.append(ip)#存到列表里面
    del HOST[0]
def main(ints):
    url = \'http://www.xicidaili.com/nn/\'+ints
    text = url_text(url)
    re_response(text)
    pagequeue = Queue()#创建队列
    threadkeep=[]
    for i in HOST:
        pagequeue.put(i)#将ip放到队列里面去
    keepList = ["保存线程一号","保存线程二号","保存线程三号"]
    for threadName in keepList: #创建三个线程
        thread = ProXiesAD(pagequeue)
        thread.start()
        threadkeep.append(thread)

    while not pagequeue.empty(): #等待队列为空
        pass
    global PARSE_EXIT
    PARSE_EXIT = True #队列为空循环结束
    for thread in threadkeep:#主线程等待子线程结束在结束
        thread.join()
if __name__ == \'__main__\':
    main("1")

 

python+requests+pyquery+threading 多线程测试代理可用不可用python+requests+pyquery+threading 多线程测试代理可用不可用python+requests+pyquery+threading 多线程测试代理可用不可用

分类:

技术点:

相关文章:

  • 2021-12-19
  • 2021-12-29
  • 2022-12-23
  • 2021-12-29
  • 2022-01-02
  • 2022-01-14
  • 2021-12-29
猜你喜欢
  • 2021-12-19
  • 2021-12-19
  • 2021-10-04
  • 2021-08-28
  • 2021-12-19
相关资源
相似解决方案