import os import re import logging from bs4 import BeautifulSoup from openpyxl import Workbook from openpyxl.utils import get_column_letter import datetime logging.basicConfig(level=logging.INFO,#控制台打印的日志级别 filename=\'food.log\', filemode=\'a\',##模式,有w和a,w就是写模式,每次都会重新写日志,覆盖之前的日志,#a是追加模式,默认如果不写的话,就是追加模式 format= \'%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s\' #日志格式 ) def Insert2Excel(allinfo): # 插入数据 try: tableTitle = [\'name\', \'time\', \'score\'] wb = Workbook() ws = wb.active ws.title = \'restaurants\' ws.append(tableTitle) work_name = \'restaurantsinfo.xlsx\' for i in range(1, ws.max_column + 1): ws.column_dimensions[get_column_letter(i)].width = 15 for info in allinfo : ws.append(info) wb.save(work_name) return \'Insert Excel succcessfully!\' except: return \'Insert Excel failed!\' if __name__ == \'__main__\': start = datetime.datetime.now().replace(microsecond=0) print(\'Start: \', start) path = os.getcwd() allinfo = [] # url = \'https://food.grab.com/sg/en/restaurants\' ######################### test ################## with open(\'food.txt\', \'rb\') as f: # 设置文件对象 html = f.read() # 可以是随便对文件的操作 soup = BeautifulSoup(html, \'html.parser\') tag = soup.find(\'div\', attrs={\'class\': \'ant-row-flex RestaurantListRow___1SbZY\'}) print(len(tag)) for restaurant in tag: resinfo = [] name = restaurant.find(\'h6\', attrs={\'class\': \'name___2epcT\'}).get_text() resinfo.append(name) lst = restaurant.find_all(\'div\', attrs={\'class\': \'numbersChild___2qKMV\'}) if len(lst) == 2: score = lst[0].get_text() time = re.findall("\d+",lst[1].get_text())[0] else: score = \'0\' aa = re.findall("\d+",lst[0].get_text()) time = aa[0] resinfo.append(time) resinfo.append(score) allinfo.append(resinfo) print(Insert2Excel(allinfo)) end = datetime.datetime.now().replace(microsecond=0) print(\'End:\', end) print(\'Running time: %s Seconds\' % (end - start))