import re import urllib.request def getHtml(url): page = urllib.request.urlopen(url) html = page.read() return html def getImg(html): html = html.decode('GBK') reg = r'src="(.*?\.jpg)" width'... 展开
我有更好的答案
import re
import urllib.request
def Schedule(a,b,c):
per = 100.0 * a * b / c
if per>100:
per = 100
print('完成!')
print('%.2f%%' % per)
def getHtml(url):
page = urllib.request.urlopen(url)
html = page.read()
return html
def getImg(html):
html = html.decode('utf-8')
reg = r'src="(.*?\.jpg)" width'
imgre = re.compile(reg)
imglist = imgre.findall(html)
x = 0
for imgurl in imglist:
# 写好你的路径
urllib.request.urlretrieve(imgurl,'D:\\test\\%s.jpg' % x,Schedule)
x += 1
html = getHtml('http://tieba.baidu.com/p/741081023')
print(getImg(html))
|
运行结果: