【发布时间】:2020-08-24 03:41:26
【问题描述】:
我开发了以下代码。通常它工作正常。但是,有时在下载记录(record())时,由于服务器无法响应或互联网断开连接,它会失败。为了避免破坏代码,我使用了“try-exceptions”,但是我不能同时从 record() 和外部函数中跳出。所以,寻找解决方案。第 197 行附近的代码无助于从外部 while 循环继续代码。代码在这里:
import glob
import datetime
import cv2
import base64
from PIL import Image
from io import BytesIO
import time
import selenium
import self as self
from pytesseract import pytesseract
from selenium.webdriver.common.keys import Keys
import os
from selenium.webdriver.support import expected_conditions as EC, expected_conditions
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException, \
WebDriverException, ElementNotInteractableException, UnexpectedAlertPresentException
main_Directory = r'/home/sangharshmanuski/Documents/e_courts/mha/downloads4'
log_Directory = r'/home/sangharshmanuski/Documents/e_courts/mha/log'
driver = selenium.webdriver.Firefox()
url = r'https://districts.ecourts.gov.in/'
driver.get(url)
# create wait time variable for regular, short and mid
wait = WebDriverWait(driver, 180)
waitShort = WebDriverWait(driver, 3)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#sateist > option:nth-child(22)")))
select = Select(driver.find_element_by_css_selector('#sateist'))
options = select.options
select.select_by_visible_text('Maharashtra')
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.region')))
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#sateist')))
districtListDropdown = Select(driver.find_element_by_css_selector("#sateist"))
distOptions = districtListDropdown.options
# iterate over each district
i = 1
while i < len(distOptions):
try:
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#sateist')))
newDistDropDown = Select(driver.find_element_by_css_selector("#sateist"))
except:
continue
newDistOptions = newDistDropDown.options
distName = newDistOptions[i].text
print(distName)
newDistDropDown.select_by_index(i)
# for creating directory as per each district.
district_directory = os.path.join(
main_Directory, distName) # create new
if not os.path.exists(district_directory): # if not directory exists, create one
os.mkdir(district_directory)
district_log_directory = os.path.join(log_Directory, distName)
if not os.path.exists(district_log_directory): # if not directory exists, create one
os.mkdir(district_log_directory)
headingDist = driver.find_element_by_css_selector('.heading')
if headingDist.text.lower() == distName.lower():
wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, 'button.accordion2:nth-child(2)'))).click()
current = driver.window_handles[0]
wait.until(EC.element_to_be_clickable(
(By.CSS_SELECTOR,
'div.panel:nth-child(3) > ul:nth-child(1) > li:nth-child(6) > a:nth-child(1)'))).click()
# wait until new tab opens.
wait.until(EC.number_of_windows_to_be(2))
# define new tab by differentiating from current tab.
newWindow = [window for window in driver.window_handles if window != current][0]
# switch to the new tab. ref: https://stackoverflow.com/questions/41571217/python-3-5-selenium-how-to-handle-a-new-window-and-wait-until-it-is-fully-lo
driver.switch_to.window(newWindow)
# wait till court complex list appears.
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#court_complex_code')))
# create list of all court complex.
# 2 approaches - 1 select 2 click.
time.sleep(3)
def complex_and_act():
this = driver.current_window_handle
def imgtotxt():
elem = driver.find_element_by_id("captcha_image")
loc = elem.location
size = elem.size
left = loc['x']
top = loc['y']
width = size['width']
height = size['height']
box = (int(left), int(top), int(left + width), int(top + height))
screenshot = driver.get_screenshot_as_base64()
img = Image.open(BytesIO(base64.b64decode(screenshot)))
area = img.crop(box)
area.save('/home/sangharshmanuski/Documents/e_courts/captcha/file_trial.png', 'PNG')
fullPath = r'/home/sangharshmanuski/Documents/e_courts/captcha'
f = os.listdir(fullPath)
desPath = r"/home/sangharshmanuski/Documents/e_courts/editC"
img = cv2.imread(os.path.join(fullPath, 'file_trial.png'), 0)
ret, thresh1 = cv2.threshold(img, 111, 255, cv2.THRESH_BINARY)
cv2.imwrite('/home/sangharshmanuski/Documents/e_courts/editC/oneDisNoLoop.png', thresh1)
# know the text with pytesseract
captchaText = pytesseract.image_to_string(
Image.open('/home/sangharshmanuski/Documents/e_courts/editC/oneDisNoLoop.png'))
captcha = driver.find_element_by_id('captcha')
captcha.send_keys(captchaText)
driver.find_element_by_css_selector('input.button:nth-child(1)').click()
time.sleep(1)
def proceed():
while True:
try:
waitShort.until(EC.alert_is_present())
driver.switch_to.alert.accept()
driver.switch_to.window(this)
driver.find_element_by_css_selector(
'#captcha_container_2 > div:nth-child('
'1) > div:nth-child(1) > span:nth-child(3) > a:nth-child(7) > img:nth-child(1)').click()
log_file = open(os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
log_file.write('alert was present' + '\n')
print('alert was present')
imgtotxt()
except:
# if the waitmsg is on, wait for 5 sec
log_file = open(os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
log_file.write('no alert' + '\n')
print('no alert')
waitmsg = 0
while driver.find_element_by_css_selector('#waitmsg').is_displayed():
if waitmsg < 7:
log_file = open(
os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
log_file.write('wait' + '\n')
print('waitmsg')
time.sleep(1)
waitmsg += 1
else:
log_file = open(os.path.join(
log_Directory, nameCourtComp + '.txt'), 'a')
log_file.write('waiting finished' + '\n')
print('waiting finished')
break
invalidCaptcha = "Invalid Captcha"
norecord = "Record Not Found"
try:
waitShort.until(
EC.presence_of_element_located((By.CSS_SELECTOR, '#errSpan > p:nth-child(1)')))
incorrect = driver.find_element_by_css_selector('#errSpan > p:nth-child(1)').text
if incorrect == invalidCaptcha:
log_file = open(
os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
log_file.write('Invalid Captcha' + '\n')
print('invalid captcha')
imgtotxt()
continue
else:
if incorrect == norecord:
log_file = open(
os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
log_file.write('Record not Found' + '\n')
return print('record not found')
except:
pass
def record():
log_file = open(
os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
log_file.write('Record Found' + '\n')
print('record fun started')
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.someclass')))
listAllView = driver.find_elements_by_css_selector(
'a.someclass')
# make new dirctory by name of Court Complex
distDir2 = os.path.join(
main_Directory, distName, nameCourtComp)
if not os.path.exists(distDir2):
os.makedirs(distDir2)
x = 0
for view in listAllView:
try:
view.click()
wait.until(EC.presence_of_element_located((By.ID, 'back_top')))
openFile = open(
os.path.join(distDir2, "file_" + str(x) + ".html"), "w")
openFile.write(driver.page_source)
openFile.close()
back = driver.find_element_by_id('back_top')
back.click()
x += 1
except (TimeoutException, ElementNotInteractableException):
driver.refresh()
log_file = open(
os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
log_file.write(
'While Downloading record for '
+ nameCourtComp + ' error occured, retrying now...' + '\n')
nonlocal courtComp
courtComp -= 1
return print(
'While Downloading record for '
+ nameCourtComp + ' error occured, retrying now...')
log_file = open(
os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
log_file.write('record completed, ' + str(x) + ' records found' + '\n')
print('record completed, ' + str(x) + ' records found')
return
record()
return
courtComp = 1
courtComplexDownload = Select(
driver.find_element_by_css_selector('#court_complex_code'))
courtComplexDownloadList = courtComplexDownload.options
courtComplexLen = len(courtComplexDownloadList)
while courtComp < courtComplexLen:
nameCourtComp = courtComplexDownloadList[courtComp].text
log_file = open(os.path.join(log_Directory, nameCourtComp + '.txt'), 'w')
log_file.write(nameCourtComp + '\n' + '\n')
print(nameCourtComp)
courtComplexDownload.select_by_index(courtComp)
acts = Select(driver.find_element_by_css_selector('#actcode'))
actsOpt = acts.options
act = 0
while len(actsOpt) < 2:
if act < 10:
time.sleep(1)
act += 1
else:
#if there is no list to populate break out of this loop & go to next complex
raise Exception()
try:
acts.select_by_value('33')
except NoSuchElementException:
print('PoA not applicable')
log_file = open(
os.path.join(log_Directory, nameCourtComp + '.txt'), 'a')
log_file.write('No PoA' + '\n')
courtComp += 1
continue
imgtotxt()
proceed()
courtComp += 1
complex_and_act()
driver.close()
print("all court complexes in " + distName + " completed")
driver.switch_to.window(current)
driver.back()
else:
time.sleep(5)
continue
i += 1
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#sateist > option:nth-child(22)")))
select = Select(driver.find_element_by_css_selector('#sateist'))
options = select.options
select.select_by_visible_text('Maharashtra')
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.region')))
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#sateist')))
【问题讨论】:
-
如果您发布smaller bit of code 来帮助我们了解问题,将会有所帮助。听起来您过早地处理异常。您是否尝试过在要中断/继续的外部函数中处理异常?
标签: python python-3.x selenium