Soar-Pang

一、工具vs2015 +python3.5

import urllib.request 
import urllib.error
import re


def getcontent(url,page):
     head=("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko")
     opener=urllib.request.build_opener()
     opener.addheaders=[head]
     urllib.request.install_opener(opener)
     try:
       data=urllib.request.urlopen(url).read().decode("utf-8")
       contentpat=\'<div class="content">(.*?)</div>\'
       contentlist=re.compile(contentpat,re.S).findall(data)
       for cont in contentlist:
          print(cont)
     except urllib.error.URLError as e:
       print(e.reason)


for i in range(1,29):
    url="https://www.qiushibaike.com/8hr/page/"+str(i)
    getcontent(url,i)

 模仿浏览访问,正则表达式匹配内容,打印结果

分类:

技术点:

相关文章: