因为无法直接使用遨游的图片保存工具,所以自己用python写了一个。保存文件和文件名生成参考了别人的代码,加上了用BeautifulSoup解析网页内容的部分。
#coding=utf-8
from BeautifulSoup import BeautifulSoup
import urllib
import urllib2
import httplib, urlparse
import re
import random
save_path=u"d:\\temp\\"
save_title=u""
def gGetFileName(url):
if url==None:
return None
if url=="" :
return ""
arr=url.split("/")
return arr[len(arr)-1]
def gRandFilename(type):
fname = \'\'
for i in range(16):
fname = fname + chr(random.randint(65,90))
fname = fname + chr(random.randint(48,57))
if type=="":
type="jpg"
if type==None:
type="jpg"
return fname + \'.\' + type
def gDownload(url):
try:
urlopen=urllib.URLopener()
fp = urlopen.open(url)
data = fp.read()
fp.close()
file=open(save_path + gRandFilename("jpeg"),\'w+b\')
file.write(data)
file.close()
print \'download successfully:\'+ url
except IOError:
print "download error!"+ url
def geturl(url):
page=urllib2.urlopen(url)
soup=BeautifulSoup(page,fromEncoding="GBK")
script=soup.findAll(\'input\',type="image")
for ele in script:
img_src=ele[\'src\']
gDownload(img_src)
if __name__=="__main__":
url="http://www.baidu.com/htm_data/16/1011/600682.html"
geturl(url)
from BeautifulSoup import BeautifulSoup
import urllib
import urllib2
import httplib, urlparse
import re
import random
save_path=u"d:\\temp\\"
save_title=u""
def gGetFileName(url):
if url==None:
return None
if url=="" :
return ""
arr=url.split("/")
return arr[len(arr)-1]
def gRandFilename(type):
fname = \'\'
for i in range(16):
fname = fname + chr(random.randint(65,90))
fname = fname + chr(random.randint(48,57))
if type=="":
type="jpg"
if type==None:
type="jpg"
return fname + \'.\' + type
def gDownload(url):
try:
urlopen=urllib.URLopener()
fp = urlopen.open(url)
data = fp.read()
fp.close()
file=open(save_path + gRandFilename("jpeg"),\'w+b\')
file.write(data)
file.close()
print \'download successfully:\'+ url
except IOError:
print "download error!"+ url
def geturl(url):
page=urllib2.urlopen(url)
soup=BeautifulSoup(page,fromEncoding="GBK")
script=soup.findAll(\'input\',type="image")
for ele in script:
img_src=ele[\'src\']
gDownload(img_src)
if __name__=="__main__":
url="http://www.baidu.com/htm_data/16/1011/600682.html"
geturl(url)