# coding=utf-8 import os import time from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from PIL import Image import xlsxwriter, xlrd import pandas as pd def login(): chrome_options = Options() chrome_options.add_argument(\'--headless\') driver = webdriver.Chrome(executable_path=\'./chromedriver\', chrome_options=chrome_options) driver.set_window_size(1200, 741) driver.implicitly_wait(2) print(\'初始化中...\') driver.get("http://xxxve") print(\'填写登录信息中...\') acc = driver.find_element_by_id(\'login-email\') pwd = driver.find_element_by_id(\'login-pass\') btn = driver.find_element_by_tag_name(\'button\') acc.send_keys(\'zhxxxm\') pwd.send_keys(\'LONxxxxx$\') btn.click() print(\'跳转到验证码页面中...\') time.sleep(2) capta = driver.find_element_by_id(\'code\') capta_input = input(\'请输入两步验证码:\') capta.send_keys(capta_input) btn1 = driver.find_element_by_tag_name(\'button\') btn1.click() time.sleep(2) print(\'跳转到创意编辑页面中...\') return driver faild_url = [] def parse_img(driver, url): cid = url.split(\'/\')[-2] try: driver.get_screenshot_as_file("./screen_shot/{}.png".format(cid)) element = driver.find_element_by_class_name("AdvertViewer-item") # print(element.location) # 打印元素坐标 # print(element.size) # 打印元素大小 left = element.location[\'x\'] top = element.location[\'y\'] right = element.location[\'x\'] + element.size[\'width\'] bottom = element.location[\'y\'] + element.size[\'height\'] im = Image.open("./screen_shot/{}.png".format(cid)) im = im.crop((left, top, right, bottom)) im.save("./screen_shot/{}.png".format(cid)) print("创意-->{}.png 已经保存".format(cid)) except Exception as e: faild_url.append(url) def get_screen(driver, url, tem): try: driver.get(url) time.sleep(10) parse_img(driver,url) except Exception as e: print(e, url) faild_url.append(url) def new_xlsx(): # 打开excel文件 data = xlrd.open_workbook(\'全xxx意.xlsx\') book = xlsxwriter.Workbook(\'全行xxx意-fina.xlsx\') # 获取第一张工作表(通过索引的方式) tables = data.sheets() for table in tables: worksheet = book.add_worksheet(table.name) nrows = table.nrows # 表头 rows = table.row_values(0) for i, v in enumerate(rows): if v != \'\': worksheet.write(0, i, v) # 表体 for k in range(1, nrows - 1): rows = table.row_values(k) for i, v in enumerate(rows): if v != \'\': if str(int(rows[6])) + \'.png\' in os.listdir(\'./screen_shot/\'): image_width, image_height = Image.open(\'./screen_shot/\' + str(int(rows[6])) + \'.png\').size worksheet.set_column(\'H:H\', width=58) worksheet.set_row(k, height=image_height*0.8) if i == 7: worksheet.insert_image(\'H\' + str(k + 1), \'./screen_shot/\' + str(int(rows[6])) + \'.png\', {\'x_offset\': 6, \'y_offset\': 3}) else: worksheet.write(k, i, v) else: worksheet.write(k, i, v) book.close() if __name__ == \'__main__\': df = pd.read_excel(\'./全行xxxx.xlsx\') driver1 = login() link_list = df[\'link\'].tolist() tem_list = df[\'样式\'].tolist() dict_url = zip(link_list, tem_list) for url, tem in dict_url: count = 1 get_screen(driver1, url, tem) count += 1 print(\'还剩 %s 个\' % str(len(link_list) - count)) driver1.quit() print(\'失败的url:\', faild_url) print(\'所有抓取结束\') new_xlsx() print(\'插入表格结束\')