protogenoi

目标,豆瓣读书,

下载页面书籍图片。

import urllib.request 
import re                   #使用正则表达式


def getJpg(date):
    jpgList = re.findall(r\'(img src="http.+?.jpg")([\s\S]*?)(.+?.alt=".+?.")\',date)
    return jpgList

def downLoad(jpgUrl,sTitle,n):
    try:  
        urllib.request.urlretrieve(jpgUrl,\
            \'C:\\Users\\74172\\source\\repos\\Python\\spidertest1\\images\\book.douban\\%s.jpg\'  %sTitle)
    except Exception as e:  
        print(e)  
    finally:  
        print(\'图片%s下载操作完成\' % n)  

def getTitle(date):
    titleList = re.findall(r\'title=".">\',date)
    return titleList


if __name__ == \'__main__\':     
    url = \'https://book.douban.com/\'
    res = urllib.request.urlopen(url)   
    date = res.read().decode(\'utf-8\')
    date_jpg = getJpg(date)
    imageTitle = getTitle(date)
    global n
    n = 1                      
    for jpginfo in date_jpg:
        s = re.findall(r\'http.+?.jpg\',str(jpginfo))
        print(n,\'--- url -->\',str(s)[2:-2])
        sTitleInfo = re.findall(r\'alt=".+?."\',str(jpginfo))
        sTitleL = re.findall(r\'".+?."\',str(sTitleInfo))
        sTitle = str(sTitleL)[3:-3]
        downLoad(s[0],sTitle,n)
        n = n + 1 
        

 又做了点修改,并将书名写入txt文件中

import urllib.request 
import re                   #使用正则表达式


def getJpg(html):
    jpgList = re.findall(r\'(img src="http.+?.jpg")([\s\S]*?)(.+?.alt=".+?.")\',html)
    jpgList = re.findall(r\'http.+?.jpg\',str(jpgList))
    return jpgList

def downLoad(jpgUrl,sTitle,n):
    try:  
        urllib.request.urlretrieve(jpgUrl,\
            \'C:/Users/74172/source/repos/Python/spidertest1/images/book.douban/%s.jpg\'  %sTitle)
    finally:  
        print(\'图片---%s----下载操作完成\' % sTitle)  

def getTitle(html):
    titleList = re.findall(r\'(img src="http.+?.jpg")([\s\S]*?)(.+?.alt=".+?.")\',html)
    titleList = re.findall(r\'alt=".+?."\',str(titleList))
    titleList = re.findall(r\'".+?."\',str(titleList))
    return titleList

def writeTxt(imageTitle):
    try:
        #目录建立txt文件
        f = open((url[8:-5]+\'.txt\'),"a",encoding="utf-8")
        #写入
        f.write(imageTitle+\'\n\') 
    finally:
        if f:
            #关闭文件 
            f.close()

if __name__ == \'__main__\':     
    url = \'https://book.douban.com/\'
    res = urllib.request.urlopen(url)   
    html = res.read().decode(\'utf-8\')
    urlJpgs = getJpg(html)
    imageTitle = getTitle(html)
    n = 0                      
    for urlJpg in urlJpgs:
        print(n,\'--- url -->\',urlJpg)
        downLoad(urlJpg,imageTitle[n][1:-1],n)
        writeTxt(imageTitle[n][1:-1])
        n = n + 1 
   

 

分类:

技术点:

相关文章: