【发布时间】:2018-05-01 10:13:44
【问题描述】:
我是 python 新手,但需要修改其他人创建的代码。我无法发布完整的代码,但我在下面发布了大部分代码:
from bs4 import BeautifulSoup
import datetime
import getpass
from gmail import Gmail
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import ElementNotVisibleException
from time import sleep
from selenium.common.exceptions import NoAlertPresentException
from selenium.webdriver.support import expected_conditions as EC
def soupify(session, url):
"""
Makes parse-able HTML from any given URL.
:param session: requests.Session()
:param url: str
:return: BeautifulSoup object
"""
while True:
try:
r = session.get(url)
break
except Exception as e:
print(e)
return BeautifulSoup(r.content, 'html.parser')
def create_http_session():
"""
Quick little function for returning a requests.Session() instance
with a properly set User-Agent header.
:return: requests.Session()
"""
session = requests.Session()
session.headers.update({'User-Agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
})
return session
def retrieve_hidden_input(soup, name):
return soup.find('input', attrs={
'type': 'hidden',
'name': name
}).get('value')
class AmericanHomesScraper:
def __init__(self, username, password, testing):
self.username = username
self.password = password
self.testing = testing
self.chrome_session = webdriver.PhantomJS()
self.chrome_session.maximize_window()
self.g = Gmail()
self.g.login(username, password)
self.index = 0
def check_for_new_emails_from_sender(self,
sender='info@mailer.netflix.com'):
self.index += 1
if self.index % 1000 == 0:
print('Checking emails - {0}.'.format(datetime.datetime.now()))
elif self.index == 1:
print('Checking emails - {0}.'.format(datetime.datetime.now()))
else:
pass
for s in sender:
messages = self.g.inbox().mail(sender=s,
unread=True)
for message in messages:
message.read()
print(
'Email from {0}: {1}.'.format(s, datetime.datetime.now()))
self.check_for_listings()
self.g.logout()
self.g = Gmail()
self.g.login(self.username, self.password)
def login(self, username, ahsPassword):
self.ahsPassword = ahsPassword
self.chrome_session.get('https://www.aaa.com/Login.aspx')
self.chrome_session.find_element_by_xpath(
'//*[@id="txtUsername"]'
).send_keys(username)
self.chrome_session.find_element_by_xpath(
'//*[@id="txtPassword"]'
).send_keys(ahsPassword)
self.chrome_session.find_element_by_xpath(
'//*[@id="btnSubmit"]'
).click()
def check_for_listings(self):
#code block
links = self.chrome_session.find_elements_by_class_name('link-record')
links = [(link.text, link.get_attribute('href').decode('utf-8'))
for link in links]
if len(links) == 0:
print("No work orders available at {0}".format(
datetime.datetime.now())
)
else:
for link_text, link_url in links:
print("Clicking work order {0} at {1}".format(link_text,datetime.datetime.now()))
self.chrome_session.get(link_url)
print("Attempting to accept at {0}".format(datetime.datetime.now()))
try:
self.chrome_session.find_element_by_xpath("//input[@value='Accept']").click()
try:
WebDriverWait(self.chrome_session, 1).until(EC.alert_is_present)
self.chrome_session.switch_to().alert().accept()
print("Accepted work order {0} at {1}.".format(link_text,datetime.datetime.now()))
except:
print "no alert"
except ElementNotVisibleException:
print("Accept input not found at {0}".format(datetime.datetime.now()))
self.chrome_session.back()
def main():
username = raw_input(
'Please enter the username of the GMail account you want to monitor.\n>')
password = getpass.getpass(
'Please enter the password of the GMail account you want to monitor.\n>')
ahsPassword = getpass.getpass('Please enter the password for ahs. \n>')
ahs = AmericanHomesScraper(username, password, testing)
ahs.login(username,ahsPassword)
print("Starting script")
while True:
ahs.check_for_new_emails_from_sender([
'crm@aaa.com',
'ds@aaa.com',
])
if __name__ == '__main__':
main()
这会正确找到并单击“接受”输入按钮。单击接受后,将打开一个 javascript 警报(确定/取消)以确认接受。但是,该脚本找不到生成的警报。或者,至少它不能接受它,因为调用了异常。
如您所见,我已尝试switch_to().alert(),但这不起作用。
我做错了什么?非常感谢您的帮助,我已经为此工作了好几个小时。
更新
此代码在虚拟服务器上运行,不使用 UI。我刚刚意识到驱动程序是 PhantomJS,而不是 Chrome。所以,这可能就是它失败的原因。我已经更新了问题以反映这一点。
我已经尝试过类似问题的建议,但它不起作用。第 3 方站点需要警报确认才能完成该过程,但我的脚本无法看到它,因为它是无头的。
【问题讨论】:
-
至少给个网址
-
@Bahrom,正如您从代码中看到的那样,我已尝试单击您文章中提到的警报。它不工作。
-
(这不是修辞,要求我自己学习)从.click()返回接受按钮时,警报是否保证立即存在?
标签: javascript python selenium-webdriver beautifulsoup phantomjs