【发布时间】:2023-04-07 12:35:01
【问题描述】:
我正在使用 python 中的 selenium 进行抓取项目,但是当我尝试打印从 XPath 元素获取的文本时遇到错误。错误说:
打印(AdditionalCerts.text) AttributeError: 'str' 对象没有属性 'text'
这是我的代码:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome("/Users/nzalle/Downloads/chromedriver")
driver.get("https://directory.bcsp.org/")
count = int(input("Number of Pages to Scrape: "))
body = driver.find_element_by_xpath("//body")
profile_count = driver.find_elements_by_xpath("//div[@align='right']/a")
while len(profile_count) < count: # Get links up to "count"
body.send_keys(Keys.END)
sleep(1)
profile_count = driver.find_elements_by_xpath("//div[@align='right']/a")
for link in profile_count: # Calling up links
temp = link.get_attribute('href') # temp for
driver.execute_script("window.open('');") # open new tab
driver.switch_to.window(driver.window_handles[1]) # focus new tab
driver.get(temp)
# Scraping Code
try:
Name = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[1]/div[2]/div')
IssuedBy = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[1]/td[1]/div[2]')
CertificationNumber = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[1]/td[3]/div[2]')
CertfiedSince = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[3]/td[1]/div[2]')
RecertificationCycle = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[3]/td[3]/div[2]')
Expires = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[5]/td[1]/div[2]')
AccreditedBy = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[5]/td[3]/div[2]/a')
AdditionalCerts = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[1]/div[8]/div[1]/a/div/div')
except NoSuchElementException:
Name = "N/A"
IssuedBy = "N/A"
CertificationNumber = "N/A"
CertfiedSince = "N/A"
RecertificationCycle = "N/A"
Expires = "N/A"
AccreditedBy = "N/A"
AdditionalCerts = "N/A"
print(Name.text + " ; " + IssuedBy.text + " : " + CertificationNumber.text + " : " + CertfiedSince.text + " : " + RecertificationCycle + " : " + Expires.text + " : " + AccreditedBy.text + " : " + AdditionalCerts.text)
driver.close()
driver.switch_to.window(driver.window_handles[0])
driver.close()
请告诉我如何查看我的姓名、颁发者、认证编号、分机中的文本。谢谢你:)
【问题讨论】:
标签: python selenium selenium-webdriver web-scraping