#!/usr/local/bin/python3.7

import re
import urllib.request
import urllib.parse
import os
import time

"""
@File    :   qiushibaike.py
@Time    :   2020/04/06
@Author  :   Mozili

"""

"""
爬取糗事百科中指定页码的图片

"""
def handler_request(url):
    headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1 Safari/605.1.15'
    } 
    # 创建请求
    req = urllib.request.Request(url=url, headers=headers)
    # 发送请求
    rep = urllib.request.urlopen(req)
    # 获取返回内容
    cont = rep.read().decode()
    return cont

def download_image(content):
    pattern = re.compile(r'<div class="thumb">.*?<img src="(.*?)" .*?">.*?</div>', re.S)
    ret = pattern.findall(content)
    # print(ret)
    # 生成图片url,下载图片到本地
    for image_url in ret:
        image_url = 'https:' + image_url
        # 创建文件夹保存下载下来的文件
        driname  = 'Reptile/images'
        if not os.path.exists(driname):
            os.mkdir(driname)
        # 创建保存路径
        image_name = image_url.split('/')[-1]
        image_path = driname + '/' + image_name
        # 直接将返回的内容保存
        print('图片{}开始下载....'.format(image_name))
        time.sleep(1)
        resp = urllib.request.urlretrieve(image_url, image_path)

def main():
    # 提示输入爬取第几页到第几页的图片
    start_page = int(input('请输入起始页码:'))
    end_page = int(input('请输入结束页码:'))

    for i in range(start_page, end_page + 1):
        url = 'https://www.qiushibaike.com/imgrank/page/' + str(i) + '/'
        # 生成一个请求
        content = handler_request(url)
        # 从返回内容中获取图片链接,下载图片
        download_image(content)
        print('第%s页下载结束...'%i)
        time.sleep(1)
        print()
        print()

if __name__ == "__main__":
    main()

 

相关文章:

  • 2021-09-17
  • 2021-09-07
  • 2021-07-02
  • 2022-02-04
  • 2021-06-21
  • 2021-05-22
  • 2022-12-23
  • 2021-07-14
猜你喜欢
  • 2022-12-23
  • 2021-06-25
  • 2022-02-11
  • 2021-11-18
  • 2021-12-10
  • 2021-04-18
  • 2021-10-05
相关资源
相似解决方案