gokublog
# -*- coding: utf-8 -*-
import time
import requests
import re
import pytesseract
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from PIL import Image, ImageEnhance
from settings import USERNAME, PASSWORD, PACKAGE_PATH, CHROME_WEBDRIVER_PATH
from selenium.webdriver.chrome.options import Options


class WebAuto(object):
    def __init__(self):

        # 加上以下代码就是无窗口化运行
        chrome_options = Options()
        chrome_options.add_argument(\'--no-sandbox\')
        chrome_options.add_argument(\'--disable-dev-shm-usage\')
        chrome_options.add_argument(\'--headless\')
        # ########################

        self.wd = webdriver.Chrome(CHROME_WEBDRIVER_PATH, options=chrome_options)
        self.wd.implicitly_wait(20)
        self.ac = ActionChains(self.wd)
        self.wd.get(\'http://xxx.xxx.xxx/\')

    def get_picture(self):
        """通过截图获取验证码图片"""
        self.wd.implicitly_wait(3)
        time.sleep(3)
        self.wd.save_screenshot(\'picture.png\')  # 截图屏幕
        page_snap_obj = Image.open(\'picture.png\')  # 打开截图
        elements = self.wd.find_elements_by_xpath("/html/body/div/div/div/*")
        element = elements[1]
        elements = element.find_elements_by_class_name(\'el-form-item\')
        element = elements[2]
        elements = element.find_elements_by_xpath(\'//form/div/div/div/img\')
        # img = elements[0]
        img = element.find_element_by_class_name(\'validcode\')
        location = img.location
        size = img.size
        left = location[\'x\'] * 2
        top = location[\'y\'] * 2
        right = left + size[\'width\'] * 2
        bottom = top + size[\'height\'] * 2
        img_obj = page_snap_obj.crop((left, top, right, bottom))
        time.sleep(2)
        return img_obj

    def get_picture2(self):
        """通过动态获取图片src属性获取验证码图片--准确率高"""
        elements = self.wd.find_elements_by_class_name(\'validcode\')
        time.sleep(3)
        element = elements[0]
        url = element.get_attribute(\'src\')
        r = requests.get(url)
        with open(\'picture.png\', \'wb\')as f:
            f.write(r.content)
        img_obj = Image.open(\'picture.png\')
        return img_obj

    def processing_image(self):
        """处理图片"""
        img = self.get_picture()
        img = img.convert("L")  # 转灰度
        pixdata = img.load()
        w, h = img.size
        threshold = 160
        # 遍历所有像素,大于阈值的为黑色
        for y in range(h):
            for x in range(w):
                if pixdata[x, y] < threshold:
                    pixdata[x, y] = 0
                else:
                    pixdata[x, y] = 255
        return img

    def delete_spot(self):
        """处理图片"""
        images = self.processing_image()
        data = images.getdata()
        w, h = images.size
        black_point = 0
        for x in range(1, w - 1):
            for y in range(1, h - 1):
                mid_pixel = data[w * y + x]  # 中央像素点像素值
                if mid_pixel < 50:  # 找出上下左右四个方向像素点像素值
                    top_pixel = data[w * (y - 1) + x]
                    left_pixel = data[w * y + (x - 1)]
                    down_pixel = data[w * (y + 1) + x]
                    right_pixel = data[w * y + (x + 1)]
                    if top_pixel < 10:
                        black_point += 1
                    if left_pixel < 10:
                        black_point += 1
                    if down_pixel < 10:
                        black_point += 1
                    if right_pixel < 10:
                        black_point += 1
                    if black_point < 1:
                        images.putpixel((x, y), 255)
                    black_point = 0
        return images

    def image_str(self):
        """验证码图片转字符串"""
        image = self.delete_spot()
        # pytesseract.pytesseract.tesseract_cmd = r\'/usr/local/Cellar/tesseract/4.1.1/tesseract\'
        result = pytesseract.image_to_string(image)
        resultj = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", result)
        result_four = resultj[0:4]
        print(\'验证码:\', resultj)
        return result_four

    # def image_str2(self):
    #     """验证码图片转字符串--识别效率太低"""
    #     image = self.delete_spot()
    #     image.load()
    #     image.split()
    #     vcode = pytesseract.image_to_string(image)
    #     print(len(vcode))
    #     return vcode
    #
    # def image_str3(self):
    #     """验证码图片转字符串--识别效率太低"""
    #     img = self.get_picture2()
    #     img = img.convert(\'RGB\')  # 这里也可以尝试使用L
    #     enhancer = ImageEnhance.Color(img)
    #     enhancer = enhancer.enhance(0)
    #     enhancer = ImageEnhance.Brightness(enhancer)
    #     enhancer = enhancer.enhance(2)
    #     enhancer = ImageEnhance.Contrast(enhancer)
    #     enhancer = enhancer.enhance(8)
    #     enhancer = ImageEnhance.Sharpness(enhancer)
    #     img = enhancer.enhance(20)
    #     code = pytesseract.image_to_string(img)
    #     return code

    def login(self):
        """登陆"""
        # 登陆流程
        elements = self.wd.find_elements_by_xpath("/html/body/div/div/div/*")
        input = elements[1].find_elements_by_class_name(\'el-input__inner\')

        # 输入用户名,密码
        input[0].send_keys(USERNAME)
        input[1].send_keys(PASSWORD)
        code = self.image_str()
        input[2].send_keys(code)
        time.sleep(5)
        elements = elements[1].find_elements_by_xpath(\'//div/form/*\')
        button = elements[7].find_elements_by_xpath(\'//div/button\')
        login_container = self.wd.find_element_by_id("loginFormId")
        login_container.find_elements_by_class_name("form-item-tittle")

        # 点击登陆
        button[0].click()
        time.sleep(5)

        # 获取所有cookie
        cookies = self.wd.get_cookies()
        if len(cookies) == 1:
            # 登陆失败 cookie只有一条
            self.wd.quit()
            self.__init__()
            self.login()
        else:
            # 登陆成功 我测试cookie有三条
            pass

    def create(self):
        """创建策略"""
        # 防止页面未跳转 就已经开始寻找标签 会报错
        time.sleep(3)

        # 点击下拉菜单
        print(\'登陆成功\')
        menus = self.wd.find_elements_by_class_name(\'el-submenu__title\')
        menu = menus[1]
        e1 = menu.find_elements_by_class_name("el-icon-ijiami-aab")[0]
        self.ac.click(e1).perform()
        time.sleep(3)

        # 点击
        elements = self.wd.find_elements_by_class_name(\'el-menu-item\')
        element = elements[4]
        element.click()
        time.sleep(3)

        # create_button
        create_button = self.wd.find_elements_by_class_name(\'el-form-item__content\')
        create_button[0].click()
        time.sleep(3)

        # 上传aab文件
        inputs = self.wd.find_elements_by_class_name("el-upload__input")
        input = inputs[0]
        time.sleep(3)

        # 策略名=包名
        # path = package_path
        path = PACKAGE_PATH
        input.send_keys(path)
        time.sleep(3)

        # 用.el-dialog__footer的数量来判断是否上传完成
        # 上传完成3个/未上传2个
        while 1:
            footer = self.wd.find_elements_by_class_name(\'el-dialog__footer\')
            time.sleep(2)
            if len(footer) == 3:
                print(\'上传文件完成\')
                break

        # 策略名
        name = path.split(\'/\')[-1]
        print(\'策略名:\', name)
        inputs = self.wd.find_elements_by_class_name(\'el-input__inner\')
        input = inputs[8]
        input.send_keys(name)

        # 配置策略
        settings = self.wd.find_elements_by_class_name(\'el-radio-group\')
        s1 = settings[10]  # 加密全部
        s2 = settings[13]  # hook框架检测
        s3 = settings[15]  # 防trace分析
        s1.click()
        s2.click()
        s3.click()

        # 提交
        submits = self.wd.find_elements_by_class_name(\'el-button--primary\')
        submit = submits[4]
        submit.click()
        print("创建策略成功")

    def main(self):
        # self.get_picture2()
        self.login()
        self.create()
        self.wd.quit()


if __name__ == \'__main__\':
    webauto = WebAuto()
    webauto.main()

-- 本次的需求是自动化登陆网页并有一定的操作,包括上传文件等等,整个过程相对比较简单,但是有几个小点需要注意

  -- webdriver是分版本的,它有macos/linux/windows三个版本,刚开始在macos上写代码,最后在linux上跑代码,需要切换webdriver

  -- python3的PIL其实就是Pillow只不过python2叫PIL

  -- 要让selemium在linux命令行运行,有两种方式,第一是虚拟化一个窗口,第二是设置无窗口的运行,本次采用的是第二种吗,在配置webdriver之前要加如下代码  

# 加上以下代码就是无窗口化运行
chrome_options = Options()
chrome_options.add_argument(\'--no-sandbox\')
chrome_options.add_argument(\'--disable-dev-shm-usage\')
chrome_options.add_argument(\'--headless\')
# ########################

self.wd = webdriver.Chrome(CHROME_WEBDRIVER_PATH, options=chrome_options)

  -- 获取验证码尽量采取src属性的形式来获取,如果采用截图的方式来获取,出问题的概率比较大

  -- tesseract-orc是识别验证码转字符串的库,python对于的是pytesseract,但是这个python库是依赖与tesseract-orc的,而且这个程序分版本,要先下载好

  -- 判断页面跳转我用的是cookies,在此代码中登陆之前用户只有一条cookie,登陆之后用户有三条cookie,用此来判断用户是不是已经登陆成功了。对应的selenium是webdriver.get_cookies()方法,它的返回值是一个列表。

  -- 在页面点击无效的情况下可以试试ActionChains如下方法,我也不清楚这个cilick和原始的click有什么区别,但是这个确实实现了。

from selenium.webdriver.common.action_chains import ActionChains
self.wd = webdriver.Chrome(CHROME_WEBDRIVER_PATH, options=chrome_options)
self.wd.implicitly_wait(20)
self.ac = ActionChains(self.wd)
self.ac.click()

  -- python中的input输入后是带等引号的,防止被坑(如果input路径这种,直接输入就好了,不用加引号)

  -- 拷贝文件到阿里云的时候,阿里云pwd出来的路径居然拷贝的时候报错了,最后在root前加了一个home (/home/root)

  -- pytesseract识别图片验证码是有概率的,不是百分之百成功的。

  -- 在用selenium寻找元素的时候,要小心frame,然后可以用chrome自带的开发这工具中的寻找方法先在浏览器中测试是否能根据class/id/css等找到,再在代码中测,效率比较高

  -- chrome中ctrl+f寻找的时候比如输入.a1 出现了十个结果,那么你可以找到你要的结果比如是第十个,那在代码中它的索引就是第九个。很方便。

  -- 还有写代码一定要用虚拟环境,写完一定要在代码中创建requirments.txt文件,改写的注释要写,变量名见名知意。

分类:

技术点:

相关文章: