【发布时间】:2021-05-12 16:53:01
【问题描述】:
请参阅下面的代码。我认为除了最后一行之外,您几乎可以忽略所有内容。
from selenium import webdriver
import os
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import xlsxwriter
from datetime import datetime
import time
chrome_driver = os.path.abspath('C:/Users/ross/Desktop/chromedriver.exe')
browser = webdriver.Chrome(chrome_driver)
browser.get('https://finra-markets.morningstar.com/BondCenter/Default.jsp')
WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#TabContainer > div > div.rtq-tab-wrap > div.rtq-tab-menus-wrap > ul > li:nth-child(3) > a > span'))).click()
WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#firscreener-cusip'))).send_keys("STWD")
WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-advanced-search-form > div.ms-finra-advanced-search-btn > input:nth-child(2)"))).click()
WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-agreement > input"))).click()
WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-grid-hd > div > div:nth-child(7) > div"))).click()
time.sleep(2)
WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-grid-hd > div > div:nth-child(7) > div"))).click()
time.sleep(2)
whole_chart = WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll"))).text
parent = browser.find_element_by_xpath('//*[@id="ms-finra-search-results"]/div/div[3]/div[1]/div[1]/div[2]/div[2]/div')
count_divs = len(parent.find_elements_by_xpath("./div"))
for row_num in range(1):
symbol = WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(3)"))).text
maturity = WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(7)"))).text
moody_rating = WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(8)"))).text
sandp_rating = WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(9)"))).text
bond_yield = WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(11)"))).text
if symbol.strip() and maturity.strip() and moody_rating.strip() and sandp_rating.strip() and bond_yield.strip() and moody_rating != "WR" and sandp_rating != "NR":
WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(2) > div > a"))).click()
WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#tradeHistory_link"))).click()
基本上,我会访问此页面:https://finra-markets.morningstar.com/BondCenter/BondDetail.jsp?ticker=C724231&symbol=STWD4571190,我希望 Selenium 单击其上的“交易历史”链接,因此我引用了选择器“#tradeHistory_link”。出于某种原因,它没有点击,当我尝试获取文本时,它也不会得到。我也尝试通过在页面上查找“交易历史”然后单击来单击。那没有用。最后,我认为页面可能还没有完全加载,所以我尝试在单击“#tradeHistory_link”之前执行 time.sleep(5),但无济于事。
这是怎么回事?
【问题讨论】:
-
请注意,Finra 网站的使用条款明确禁止从其网站抓取、收集和/或创建您自己的数据库。见finra.org/terms-of-use
-
使用
for row_num in range(1):是什么意思? -
@vitaliis 我计划遍历所有的 count_div,但想确保一切都在 for 语句下工作,然后再继续进行
标签: python html selenium web-scraping clickable