# -*- coding: utf-8 -*- import time import requests import re import pytesseract from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from PIL import Image, ImageEnhance from settings import USERNAME, PASSWORD, PACKAGE_PATH, CHROME_WEBDRIVER_PATH from selenium.webdriver.chrome.options import Options class WebAuto(object): def __init__(self): # 加上以下代码就是无窗口化运行 chrome_options = Options() chrome_options.add_argument(\'--no-sandbox\') chrome_options.add_argument(\'--disable-dev-shm-usage\') chrome_options.add_argument(\'--headless\') # ######################## self.wd = webdriver.Chrome(CHROME_WEBDRIVER_PATH, options=chrome_options) self.wd.implicitly_wait(20) self.ac = ActionChains(self.wd) self.wd.get(\'http://xxx.xxx.xxx/\') def get_picture(self): """通过截图获取验证码图片""" self.wd.implicitly_wait(3) time.sleep(3) self.wd.save_screenshot(\'picture.png\') # 截图屏幕 page_snap_obj = Image.open(\'picture.png\') # 打开截图 elements = self.wd.find_elements_by_xpath("/html/body/div/div/div/*") element = elements[1] elements = element.find_elements_by_class_name(\'el-form-item\') element = elements[2] elements = element.find_elements_by_xpath(\'//form/div/div/div/img\') # img = elements[0] img = element.find_element_by_class_name(\'validcode\') location = img.location size = img.size left = location[\'x\'] * 2 top = location[\'y\'] * 2 right = left + size[\'width\'] * 2 bottom = top + size[\'height\'] * 2 img_obj = page_snap_obj.crop((left, top, right, bottom)) time.sleep(2) return img_obj def get_picture2(self): """通过动态获取图片src属性获取验证码图片--准确率高""" elements = self.wd.find_elements_by_class_name(\'validcode\') time.sleep(3) element = elements[0] url = element.get_attribute(\'src\') r = requests.get(url) with open(\'picture.png\', \'wb\')as f: f.write(r.content) img_obj = Image.open(\'picture.png\') return img_obj def processing_image(self): """处理图片""" img = self.get_picture() img = img.convert("L") # 转灰度 pixdata = img.load() w, h = img.size threshold = 160 # 遍历所有像素,大于阈值的为黑色 for y in range(h): for x in range(w): if pixdata[x, y] < threshold: pixdata[x, y] = 0 else: pixdata[x, y] = 255 return img def delete_spot(self): """处理图片""" images = self.processing_image() data = images.getdata() w, h = images.size black_point = 0 for x in range(1, w - 1): for y in range(1, h - 1): mid_pixel = data[w * y + x] # 中央像素点像素值 if mid_pixel < 50: # 找出上下左右四个方向像素点像素值 top_pixel = data[w * (y - 1) + x] left_pixel = data[w * y + (x - 1)] down_pixel = data[w * (y + 1) + x] right_pixel = data[w * y + (x + 1)] if top_pixel < 10: black_point += 1 if left_pixel < 10: black_point += 1 if down_pixel < 10: black_point += 1 if right_pixel < 10: black_point += 1 if black_point < 1: images.putpixel((x, y), 255) black_point = 0 return images def image_str(self): """验证码图片转字符串""" image = self.delete_spot() # pytesseract.pytesseract.tesseract_cmd = r\'/usr/local/Cellar/tesseract/4.1.1/tesseract\' result = pytesseract.image_to_string(image) resultj = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", result) result_four = resultj[0:4] print(\'验证码:\', resultj) return result_four # def image_str2(self): # """验证码图片转字符串--识别效率太低""" # image = self.delete_spot() # image.load() # image.split() # vcode = pytesseract.image_to_string(image) # print(len(vcode)) # return vcode # # def image_str3(self): # """验证码图片转字符串--识别效率太低""" # img = self.get_picture2() # img = img.convert(\'RGB\') # 这里也可以尝试使用L # enhancer = ImageEnhance.Color(img) # enhancer = enhancer.enhance(0) # enhancer = ImageEnhance.Brightness(enhancer) # enhancer = enhancer.enhance(2) # enhancer = ImageEnhance.Contrast(enhancer) # enhancer = enhancer.enhance(8) # enhancer = ImageEnhance.Sharpness(enhancer) # img = enhancer.enhance(20) # code = pytesseract.image_to_string(img) # return code def login(self): """登陆""" # 登陆流程 elements = self.wd.find_elements_by_xpath("/html/body/div/div/div/*") input = elements[1].find_elements_by_class_name(\'el-input__inner\') # 输入用户名,密码 input[0].send_keys(USERNAME) input[1].send_keys(PASSWORD) code = self.image_str() input[2].send_keys(code) time.sleep(5) elements = elements[1].find_elements_by_xpath(\'//div/form/*\') button = elements[7].find_elements_by_xpath(\'//div/button\') login_container = self.wd.find_element_by_id("loginFormId") login_container.find_elements_by_class_name("form-item-tittle") # 点击登陆 button[0].click() time.sleep(5) # 获取所有cookie cookies = self.wd.get_cookies() if len(cookies) == 1: # 登陆失败 cookie只有一条 self.wd.quit() self.__init__() self.login() else: # 登陆成功 我测试cookie有三条 pass def create(self): """创建策略""" # 防止页面未跳转 就已经开始寻找标签 会报错 time.sleep(3) # 点击下拉菜单 print(\'登陆成功\') menus = self.wd.find_elements_by_class_name(\'el-submenu__title\') menu = menus[1] e1 = menu.find_elements_by_class_name("el-icon-ijiami-aab")[0] self.ac.click(e1).perform() time.sleep(3) # 点击 elements = self.wd.find_elements_by_class_name(\'el-menu-item\') element = elements[4] element.click() time.sleep(3) # create_button create_button = self.wd.find_elements_by_class_name(\'el-form-item__content\') create_button[0].click() time.sleep(3) # 上传aab文件 inputs = self.wd.find_elements_by_class_name("el-upload__input") input = inputs[0] time.sleep(3) # 策略名=包名 # path = package_path path = PACKAGE_PATH input.send_keys(path) time.sleep(3) # 用.el-dialog__footer的数量来判断是否上传完成 # 上传完成3个/未上传2个 while 1: footer = self.wd.find_elements_by_class_name(\'el-dialog__footer\') time.sleep(2) if len(footer) == 3: print(\'上传文件完成\') break # 策略名 name = path.split(\'/\')[-1] print(\'策略名:\', name) inputs = self.wd.find_elements_by_class_name(\'el-input__inner\') input = inputs[8] input.send_keys(name) # 配置策略 settings = self.wd.find_elements_by_class_name(\'el-radio-group\') s1 = settings[10] # 加密全部 s2 = settings[13] # hook框架检测 s3 = settings[15] # 防trace分析 s1.click() s2.click() s3.click() # 提交 submits = self.wd.find_elements_by_class_name(\'el-button--primary\') submit = submits[4] submit.click() print("创建策略成功") def main(self): # self.get_picture2() self.login() self.create() self.wd.quit() if __name__ == \'__main__\': webauto = WebAuto() webauto.main()
-- 本次的需求是自动化登陆网页并有一定的操作,包括上传文件等等,整个过程相对比较简单,但是有几个小点需要注意
-- webdriver是分版本的,它有macos/linux/windows三个版本,刚开始在macos上写代码,最后在linux上跑代码,需要切换webdriver
-- python3的PIL其实就是Pillow只不过python2叫PIL
-- 要让selemium在linux命令行运行,有两种方式,第一是虚拟化一个窗口,第二是设置无窗口的运行,本次采用的是第二种吗,在配置webdriver之前要加如下代码
# 加上以下代码就是无窗口化运行 chrome_options = Options() chrome_options.add_argument(\'--no-sandbox\') chrome_options.add_argument(\'--disable-dev-shm-usage\') chrome_options.add_argument(\'--headless\') # ######################## self.wd = webdriver.Chrome(CHROME_WEBDRIVER_PATH, options=chrome_options)
-- 获取验证码尽量采取src属性的形式来获取,如果采用截图的方式来获取,出问题的概率比较大
-- tesseract-orc是识别验证码转字符串的库,python对于的是pytesseract,但是这个python库是依赖与tesseract-orc的,而且这个程序分版本,要先下载好
-- 判断页面跳转我用的是cookies,在此代码中登陆之前用户只有一条cookie,登陆之后用户有三条cookie,用此来判断用户是不是已经登陆成功了。对应的selenium是webdriver.get_cookies()方法,它的返回值是一个列表。
-- 在页面点击无效的情况下可以试试ActionChains如下方法,我也不清楚这个cilick和原始的click有什么区别,但是这个确实实现了。
from selenium.webdriver.common.action_chains import ActionChains
self.wd = webdriver.Chrome(CHROME_WEBDRIVER_PATH, options=chrome_options) self.wd.implicitly_wait(20) self.ac = ActionChains(self.wd) self.ac.click()
-- python中的input输入后是带等引号的,防止被坑(如果input路径这种,直接输入就好了,不用加引号)
-- 拷贝文件到阿里云的时候,阿里云pwd出来的路径居然拷贝的时候报错了,最后在root前加了一个home (/home/root)
-- pytesseract识别图片验证码是有概率的,不是百分之百成功的。
-- 在用selenium寻找元素的时候,要小心frame,然后可以用chrome自带的开发这工具中的寻找方法先在浏览器中测试是否能根据class/id/css等找到,再在代码中测,效率比较高
-- chrome中ctrl+f寻找的时候比如输入.a1 出现了十个结果,那么你可以找到你要的结果比如是第十个,那在代码中它的索引就是第九个。很方便。
-- 还有写代码一定要用虚拟环境,写完一定要在代码中创建requirments.txt文件,改写的注释要写,变量名见名知意。