【发布时间】:2020-09-07 08:18:10
【问题描述】:
示例:https://apps1.lavote.net/camp/comm.cfm?&cid=82
使用 Selenium,我单击第一个 497 表格。在我的浏览器中,将打开一个新的 pdf 选项卡。在 selenium 中,似乎什么都没有发生。
这是我的代码,有些部分已编辑。
def scrape(session_key=None):
options = Options()
options.headless = True
profile = webdriver.FirefoxProfile()
profile.set_preference("browser.download.dir", os.path.join(base_dir, 'reports'))
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("browser.helperApps.alwaysAsk.force", False);
profile.set_preference("browser.download.manager.showAlertOnComplete", False)
profile.set_preference("browser.download.manager.showWhenStarting", False);
profile.set_preference('browser.helperApps.neverAsk.saveToDisk','application/zip,application/octet-stream,application/x-zip-compressed,multipart/x-zip,application/x-rar-compressed, application/octet-stream,application/msword,application/vnd.ms-word.document.macroEnabled.12,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/rtf,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.ms-excel,application/vnd.ms-word.document.macroEnabled.12,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/xls,application/msword,text/csv,application/vnd.ms-excel.sheet.binary.macroEnabled.12,text/plain,text/csv/xls/xlsb,application/csv,application/download,application/vnd.openxmlformats-officedocument.presentationml.presentation,application/octet-stream')
profile.set_preference("pdfjs.disabled", True)
profile.set_preference("plugin.disable_full_page_plugin_for_types", "application/pdf")
driver = webdriver.Firefox(firefox_profile=profile, options=options)
driver.get(magic_url)
committee_table = driver.find_elements_by_css_selector('table')[2]
links = [link.get_attribute('href') for link in committee_table.find_elements_by_tag_name('a')]
driver.get('https://apps1.lavote.net/camp/comm.cfm?&cid=82')
forms_table = driver.find_elements_by_css_selector('table')[1]
forms_table_trs = forms_table.find_elements_by_css_selector('tr')
for i, row in enumerate(forms_table_trs):
if i > 0:
cells = row.find_elements_by_css_selector('td')
print(1)
try:
link = cells[2].find_elements_by_tag_name('a')[0]
link.click()
pdfs = glob.glob(os.path.join(base_dir, 'scraper/*.pdf'))
latest_pdf_file = max(pdfs, key=os.path.getctime)
parse_funcs[form_type](latest_pdf_file)
except Exception as e:
print(e)
您可能已经猜到,没有 pdf。它们没有被下载。这就是我在这里的原因。我该怎么做?
【问题讨论】: