一)

import  requests

def download(url, num_tries=2, user_agent='wswp', proxies=None):
    '''
    下载指定url并返回网页内容
    :param url:  请求URL
    :param num_tries: 重试次数
    :param user_agent: 用户代理
    :param proxies: 代理(字典): 键(http/https)
    :return:
    '''
    headers = {'User-Agent':user_agent}
    try:
        res = requests.get(url, headers=headers, proxies=proxies)
        res.encoding = 'utf-8'
        html = res.text
        if res.status_code >= 400:
            html = None
            if num_tries and 500 <= res.status_code < 600: # 发生5xx错误
                return download(url,num_tries-1)
    except requests.exceptions.RequestException as e:
        print('Downlaod error:',e)
        html = None
    return html

print(download('http://www.baidu.com'))

 

相关文章:

  • 2022-01-08
  • 2022-12-23
  • 2021-12-23
  • 2022-12-23
  • 2022-12-23
  • 2021-12-18
  • 2021-10-05
  • 2021-08-04
猜你喜欢
  • 2022-03-08
  • 2021-11-05
  • 2021-07-18
  • 2021-09-02
  • 2022-02-10
  • 2021-08-03
相关资源
相似解决方案