Erick-L
# coding=utf-8
import os
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from PIL import Image
import xlsxwriter, xlrd
import pandas as pd


def login():
    chrome_options = Options()
    chrome_options.add_argument(\'--headless\')
    driver = webdriver.Chrome(executable_path=\'./chromedriver\', chrome_options=chrome_options)
    driver.set_window_size(1200, 741)
    driver.implicitly_wait(2)
    print(\'初始化中...\')
    driver.get("http://xxxve")
    print(\'填写登录信息中...\')
    acc = driver.find_element_by_id(\'login-email\')
    pwd = driver.find_element_by_id(\'login-pass\')
    btn = driver.find_element_by_tag_name(\'button\')
    acc.send_keys(\'zhxxxm\')
    pwd.send_keys(\'LONxxxxx$\')
    btn.click()
    print(\'跳转到验证码页面中...\')
    time.sleep(2)
    capta = driver.find_element_by_id(\'code\')
    capta_input = input(\'请输入两步验证码:\')
    capta.send_keys(capta_input)
    btn1 = driver.find_element_by_tag_name(\'button\')
    btn1.click()
    time.sleep(2)
    print(\'跳转到创意编辑页面中...\')
    return driver


faild_url = []


def parse_img(driver, url):
    cid = url.split(\'/\')[-2]
    try:
        driver.get_screenshot_as_file("./screen_shot/{}.png".format(cid))
        element = driver.find_element_by_class_name("AdvertViewer-item")
        # print(element.location)  # 打印元素坐标
        # print(element.size)  # 打印元素大小

        left = element.location[\'x\']
        top = element.location[\'y\']
        right = element.location[\'x\'] + element.size[\'width\']
        bottom = element.location[\'y\'] + element.size[\'height\']

        im = Image.open("./screen_shot/{}.png".format(cid))
        im = im.crop((left, top, right, bottom))
        im.save("./screen_shot/{}.png".format(cid))
        print("创意-->{}.png 已经保存".format(cid))
    except Exception as e:
        faild_url.append(url)


def get_screen(driver, url, tem):
    try:
        driver.get(url)
        time.sleep(10)
        parse_img(driver,url)

    except Exception as e:
        print(e, url)
        faild_url.append(url)


def new_xlsx():
    # 打开excel文件
    data = xlrd.open_workbook(\'全xxx意.xlsx\')
    book = xlsxwriter.Workbook(\'全行xxx意-fina.xlsx\')
    # 获取第一张工作表(通过索引的方式)
    tables = data.sheets()
    for table in tables:
        worksheet = book.add_worksheet(table.name)
        nrows = table.nrows

        # 表头
        rows = table.row_values(0)
        for i, v in enumerate(rows):
            if v != \'\':
                worksheet.write(0, i, v)
        # 表体
        for k in range(1, nrows - 1):
            rows = table.row_values(k)
            for i, v in enumerate(rows):
                if v != \'\':
                    if str(int(rows[6])) + \'.png\' in os.listdir(\'./screen_shot/\'):
                        image_width, image_height = Image.open(\'./screen_shot/\' + str(int(rows[6])) + \'.png\').size
                        worksheet.set_column(\'H:H\', width=58)
                        worksheet.set_row(k, height=image_height*0.8)
                        if i == 7:
                            worksheet.insert_image(\'H\' + str(k + 1), \'./screen_shot/\' + str(int(rows[6])) + \'.png\',
                                                   {\'x_offset\': 6, \'y_offset\': 3})
                        else:
                            worksheet.write(k, i, v)
                    else:
                        worksheet.write(k, i, v)

    book.close()


if __name__ == \'__main__\':
    df = pd.read_excel(\'./全行xxxx.xlsx\')
    driver1 = login()
    link_list = df[\'link\'].tolist()
    tem_list = df[\'样式\'].tolist()
    dict_url = zip(link_list, tem_list)

    for url, tem in dict_url:
        count = 1
        get_screen(driver1, url, tem)
        count += 1
        print(\'还剩 %s 个\' % str(len(link_list) - count))
    driver1.quit()

    print(\'失败的url:\', faild_url)
    print(\'所有抓取结束\')
    new_xlsx()
    print(\'插入表格结束\')

 

分类:

技术点:

相关文章:

  • 2021-08-07
  • 2021-12-29
  • 2021-07-05
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2021-05-19
猜你喜欢
  • 2021-05-05
  • 2022-12-23
  • 2021-08-21
  • 2021-06-16
  • 2021-06-05
  • 2022-12-23
  • 2021-12-15
相关资源
相似解决方案