No-Host-Given

import urllib

import urllib2

import re

import time

path="G:/123/"

path_file="1.txt"

def geturllist(text):

text= text.decode('utf-8')

urllist = re.findall(r'(?=https:)+[^ ]+(?<=hd.jpg)',text)

return urllist

with open(path+path_file, "r") as f:

textall=f.read()

urllist=geturllist(textall)

f.close()

urllistonly = {}.fromkeys(urllist).keys()#删除数组里的重复值

for i in range(len(urllistonly)):

pic = urllistonly[i]

pic =pic .replace('002','')

pic =pic .replace('https:\\\\','http://')

pic =pic .replace('\\','/')

urllib.urlretrieve(pic ,path+str(i)+".jpg")

print 'End!'

注意

https:\\pic2.com\50\vc9ddaa4c92da.jpg

地址是无效的，所以会报错：no host given。

http://pic2.com/50/vc9ddaa4c92da.jpg

改成这样就不报错了。