import urllib.request;
import re;
\'\'\'
爬取指定地址的页面内容
\'\'\'
def getHtmlCode(url):
page = urllib.request.urlopen(url)
htmlCode = page.read()
return htmlCode.decode(\'utf-8\')
# htmlCon = getHtml("https://tieba.baidu.com/p/1753935195")
# htmlCon = htmlCon.decode(\'utf-8\')
# pageFile = open("xh.txt", \'w\')
# pageFile.write(htmlCon)
# pageFile.close()
\'\'\'
获取页面内的所有图片并下载到本地
\'\'\'
def getImg(htmlCode):
reg = r\'src="(.+?\.jpg)" width\'
regImg = re.compile(reg)
imgList = regImg.findall(htmlCode)
x = 0
for img in imgList:
urllib.request.urlretrieve(img, \'%s.jpg\' % x)
x += 1
# htmlCode = getHtmlCode("https://tieba.baidu.com/p/1753935195")
# htmlCode = htmlCode.decode(\'utf-8\')
print(u\'---------网页图片抓取------------\')
print(u\'请输入url:\')
url = input()
if url:
pass
else:
print(u\'---------没有输入地址,使用默认地址。--------\')
url = "https://tieba.baidu.com/p/1753935195"
print(u\'-------正在抓取网页----------\')
htmlCode = getHtmlCode(url);
print(u\'-------正在下载图片---------\')
getImg(htmlCode);
print(u\'-------下载图片完成-------\')
input(\'Press Enter to exit\')
print(\'hello world\')
学习来源:https://www.cnblogs.com/Axi8/p/5757270.html 贴吧图片爬取