准备工具:
Python 3.4.3
图片网站
import re
import time
from bs4 import BeautifulSoup
from urllib.request import urlopen, urlretrieve
def unzip(data, charset='utf8'):
import gzip
return gzip.decompress(data).decode(charset)
def getHtml(url, charset='utf8'):
print(url)
resp = urlopen(url)
encoding = resp.info().get('Content-Encoding')
if 'gzip' == encoding:
return unzip(resp.read(), charset)
return resp.read().decode(charset)
def getImg(url):
html = getHtml(url)
soup = BeautifulSoup(html, 'html.parser')
imgs = soup.findAll(name="img", attrs={"src":re.compile("\.jpg")})
for img in imgs:
imgUrl = img['src']
print(imgUrl)
urlretrieve(imgUrl, '%s.jpg'%time.time())
def main():
getImg('http://www.mmjpg.com/')
if __name__=='__main__':
main()
贴个执行结果图:(逃ing...)
转载请注明来源【IT黑名单】