.request import ssl import re import os #博客地址:https://blog.csdn.net/qq_36374896 def writeFile1Bytes(htmlBytes,toPath): with open(toPath,"wb") as f: f.write(htmlBytes) def writeFile1Str(htmlBytes,toPath): with open(toPath,"wb") as f: f.write(htmlBytes) def getHtmlBytes(url): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36" } req = urllib.request.Request(url, headers=headers) context = ssl._create_unverified_context() response = urllib.request.urlopen(req,context=context) return response.read() def qqCrawler(url, toPath): htmlBytes = getHtmlBytes(url) # writeFile1Bytes(htmlBytes,r"C:\Users\admin\Desktop\360学习\爬虫\image\qq1.html") # writeFile1Str(htmlBytes,r"C:\Users\admin\Desktop\360学习\爬虫\image\qq2.txt") htmlStr = str(htmlBytes) pat = r"[1-9]\d{4,9}" re_qq= re.compile(pat) qqsList = re_qq.findall(htmlStr) qqsList = list(set(qqsList)) print(qqsList) print(len(qqsList)) url="http://tieba.baidu.com/p/5471533241?traceid=" toPath=r"C:\Users\admin\Desktop\360学习\爬虫\image\qq.txt" qqCrawler(url,toPath)

代码年代久远,注释当时没写,现在懒得写了

相关文章:

  • 2021-08-15
  • 2021-05-31
  • 2022-01-19
  • 2021-12-20
  • 2022-01-11
  • 2021-11-21
猜你喜欢
  • 2022-12-23
  • 2021-12-01
  • 2021-11-23
  • 2022-01-01
  • 2022-02-22
  • 2021-09-05
相关资源
相似解决方案