第一个爬虫代码

# !/usr/bin/python
#coding=GBK
import urllib.request
import re


#file=open("F:/python_workspace/爬虫/图片/0.jpg","wb")
#url="http://desk.zol.com.cn/2560x1600/"
def gethtml(url):
    header={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0"}
    req=urllib.request.Request(url,headers=header)
    res=urllib.request.urlopen(req)
    html=res.read()
    return html

def getcata(html,reg):
    #reg = r\'href="(/[a-z]+/.*?2560x1600/)\'
    imgre = re.compile(reg)
    #html0 = gethtml("http://desk.zol.com.cn/2560x1600/").decode(\'utf-8\')
    cata_list=imgre.findall(html)
    return cata_list

def geturl(url):
    url=url
    html=gethtml(url)
    urllist=getcata(html.decode("GBK"),r\'imgsrc":"(http:.*?\.png|http:.*?\.jpg)\')
    return urllist

def getpicurl(picurl,num):
    file = open(\'F:/python_workspace/爬虫/图片/\'+str(num)+\'.png\', "wb")
    reg1 = r\'(\\)\'
    reg2 = r\'(##SIZE##)\'
    imgre1 = re.compile(reg1)
    res1, num1 = imgre1.subn("", picurl)

    imgre2 = re.compile(reg2)
    res2, num2 = imgre2.subn("2560x1600", res1)
   # x=0
    res=urllib.request.urlopen(res2,timeout=10)
    res=res.read()
    data=file.write(res)
    file.close()





#html=gethtml("http://desk.zol.com.cn/2560x1600/").decode(\'GBK\')
#cata_list=getcata(html,r\'href="(/[a-z]+/.*?2560x1600/)\')
#for i in cata_list:
 #   geturl(i)
#ss="http:\/\/desk.fd.zol-img.com.cn\/t_s##SIZE##\/g5\/M00\/0D\/03\/ChMkJlmVBaOIK26rAAJ3foZd400AAfwAADpPesAAneW914.jpg"
#getpicurl(ss)
domain="http://desk.zol.com.cn"
count=0
for urlcount in range(1,47):
    url=\'http://desk.zol.com.cn/2560x1600/\'+str(urlcount)+\'.html\'
    try:
        html=gethtml(url).decode(\'GBK\')
        cata_list=getcata(html,r\'href="(/bizhi/.*?\.html)" target="_blank" hidefocus="true"><img width="208px"\')
    except:
        print ("gethtml method error!")
        continue

    for i in cata_list:
        i = domain + i;
        try:
            picurllist=geturl(i)
        except:
            print("picurllist method error!")
            continue

        for j in picurllist:
            try:
                getpicurl(j,count)
            except:
                print("getpicurl method error!")
                continue
            count=count+1
            print (j)