import requests
import os
import re
from lxml import etree
from urllib import request


def get_detail(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Safari/537.36"
    }
    rep = requests.get(url, headers=headers)
    html = etree.HTML(rep.text)
    imgs = html.xpath('//div[@class="page-content text-center"]//img[@class!="gif"]')
    for img in imgs:
        img_url = img.get("data-original")
        # 获取图片名称
        img_name = img.get("alt")
        # 过滤特殊字符
        img_name = re.sub(r'[\??\.,。!!]', "", img_name)
        # 获取图片后缀名
        suffix = os.path.splitext(img_url)[1].split("!")[0]
        filename = img_name + suffix
        # 开始下载到本地
        request.urlretrieve(img_url, "imgs/" + filename)


def main():
    for i in range(1, 101):
        url = "http://www.doutula.com/photo/list/?page={}".format(i)
        get_detail(url)


if __name__ == '__main__':
    main()
View Code

相关文章:

  • 2022-01-03
  • 2022-12-23
  • 2021-07-01
  • 2021-12-10
  • 2021-10-05
  • 2021-09-17
  • 2021-11-14
  • 2021-11-12
猜你喜欢
  • 2021-11-23
  • 2022-01-03
  • 2021-08-10
  • 2021-12-12
  • 2021-11-11
  • 2022-12-23
相关资源
相似解决方案