zhujunzoe

1
# -*- coding=utf-8 -*- 2 import time 3 from selenium import webdriver 4 from selenium.webdriver import * 5 6 7 # 设置请求头 8 dcap = dict(DesiredCapabilities.PHANTOMJS) 9 dcap[\'phantomjs.page.settings.userAgent\'] = ( 10 \'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36\' 11 ) 12 url = \'https://www.guazi.com/www/buy/i7/\' 13 14 driver = webdriver.PhantomJS() 15 driver.get(url) 16 while True: 17 for i in range(1, 41): 18 title_xpath = "/html/body/div[4]/ul/li[" + str(i) + "]/a/h2" 19 info_xpath = "/html/body/div[4]/ul/li[" + str(i) + "]/a/div[@class=\'t-i\']" 20 price_xpath = "/html/body/div[4]/ul/li[" + str(i) + "]/a/div[2]/p" 21 title = driver.find_element_by_xpath(title_xpath).text 22 info = driver.find_element_by_xpath(info_xpath).text 23 price = driver.find_element_by_xpath(price_xpath).text 24 print "正在保存数据 ------" + title 25 with open("C:\Users\Administrator\Desktop\guazi.csv", \'a\') as f: 26 f.write(\'{},{},{}\n\'.format(title, info, price).encode(\'gbk\')) 27 try: 28 driver.find_element_by_class_name("next").click() 29 time.sleep(1.5) 30 except: 31 break 32 driver.quit()

 

利用requests爬的时候出现response <203>,于是用selenium+PhantomJS。

分类:

技术点:

相关文章:

  • 2021-11-04
  • 2021-07-18
  • 2022-12-23
  • 2021-11-04
  • 2021-12-12
  • 2022-12-23
  • 2021-05-31
猜你喜欢
  • 2021-11-04
  • 2021-12-15
  • 2021-11-18
  • 2021-11-04
  • 2021-11-04
  • 2021-05-06
相关资源
相似解决方案