从"百度图片(http://image.baidu.com/)"的首页下载图片
# -*- coding: utf-8 -*- import urllib import re import sys url = 'http://image.baidu.com/' # 获取URL的源代码 def get_html(url): page = urllib.urlopen(url) html = page.read() return html # 下载图片到本地 def get_image(): pattern_image = re.compile('img src="(.+\.jpg)') image_list = re.findall(pattern_image, get_html(url)) for image in image_list: temp = [] temp = image.split('/') filename = './image/' + temp[-1] print '开始下载',image try: urllib.urlretrieve(image, filename, download) except: print "Unexpected error:", sys.exc_info()[0] finally: print '下载完成' print '总共下载:',len(image_list),'张图片' # 展示下载进度 def download(a, b, c): per = 100.0 * a * b / c if per > 100 : per = 100 print '%.2f%%' % per get_image()