from urllib import request
import random

def spider(url):

    user_agent_list = [
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36"
    ]
    user_agent = random.choice(user_agent_list)

    print(user_agent,url)


    headers = {
        "User-Agent":user_agent
    }

    req = request.Request(url,headers=headers)
    response = request.urlopen(req)
    html = response.read()
    html = html.decode("utf-8")
    # print(html)

    l = url.split("/")
    fileName = "05_"+l[-1]



    with open(fileName,"w",encoding="utf-8") as f:
        f.write(html)


if __name__ == "__main__":
    url_list = ["http://www.langlang2017.com/index.html","http://www.langlang2017.com/route.html","http://www.langlang2017.com/FAQ.html"]
    for url in url_list:
        spider(url)

 

相关文章:

  • 2021-10-20
  • 2021-05-22
  • 2022-03-07
  • 2022-12-23
  • 2022-12-23
  • 2021-12-21
  • 2022-12-23
猜你喜欢
  • 2021-06-21
  • 2021-09-15
  • 2021-07-12
  • 2022-12-23
  • 2022-02-05
  • 2021-05-28
  • 2021-12-04
相关资源
相似解决方案