【发布时间】:2018-07-17 09:39:10
【问题描述】:
如何从 csv 中的这个亚马逊模板 (https://www.amazon.com/Windsor-Glider-Ottoman-White-Cushion/dp/B017XRDV5S/ref=sr_1_1?s=home-garden&ie=UTF8&qid=1520265105&sr=1-1&keywords=-gggg&th=1) 中提取平均评分(4.0 颗星,5 颗星)信息。此信息位于左侧标题下方。我认为这是一个使用 javascript 的动态信息(使这个平均星数)。我附上了代码。感谢您的帮助。
import csv
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
from lxml import html
import sys
links = [
'https://www.amazon.com/Windsor-Glider-Ottoman-White-Cushion/dp/B017XRDV5S/ref=sr_1_1?s=home-garden&ie=UTF8&qid=1520265105&sr=1-1&keywords=-gggg&th=1'
]
proxies = {
'http': 'http://218.50.2.102:8080',
'https': 'http://185.93.3.123:8080'
}
def get_information(driver,urls):
with open('csv/sort_products.csv', "w", newline="", encoding="utf-8") as infile:
writer = csv.writer(infile)
writer.writerow(['Review' ,'Link'])
for url in urls:
driver.get(url)
soup = BeautifulSoup(driver.page_source,"lxml")
try:
review = driver.find_element_by_xpath('//div[@id="averageCustomerReviews"]/span/span/span/a').text
except:
review='No review'
print('No review')
writer.writerow([review,url])
print(f'{url}\n')
if __name__ == '__main__':
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server="%s"' % ';'.join(['%s=%s' % (k, v) for k, v in proxies.items()]))
driver = webdriver.Chrome(executable_path="C:\\Users\Andrei-PC\Downloads\webdriver\chromedriver.exe",
chrome_options=chrome_options)
get_information(driver,links)
driver.quit()
【问题讨论】:
标签: python selenium selenium-webdriver web-scraping