ouzai
import os
import re
import logging
from bs4 import BeautifulSoup
from openpyxl import  Workbook
from openpyxl.utils import get_column_letter
import datetime

logging.basicConfig(level=logging.INFO,#控制台打印的日志级别
                    filename=\'food.log\',
                    filemode=\'a\',##模式,有w和a,w就是写模式,每次都会重新写日志,覆盖之前的日志,#a是追加模式,默认如果不写的话,就是追加模式
                    format= \'%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s\' #日志格式
                    )

def Insert2Excel(allinfo):
    # 插入数据
    try:
        tableTitle = [\'name\', \'time\', \'score\']
        wb = Workbook()
        ws = wb.active
        ws.title = \'restaurants\'
        ws.append(tableTitle)
        work_name = \'restaurantsinfo.xlsx\'
        for i in range(1, ws.max_column + 1):
            ws.column_dimensions[get_column_letter(i)].width = 15
        for info in allinfo :
            ws.append(info)
        wb.save(work_name)
        return \'Insert Excel succcessfully!\'
    except:
        return \'Insert Excel failed!\'
if __name__ == \'__main__\':
    start = datetime.datetime.now().replace(microsecond=0)
    print(\'Start: \', start)
    path = os.getcwd()
    allinfo = []
    # url = \'https://food.grab.com/sg/en/restaurants\'
    #########################  test  ##################
    with open(\'food.txt\', \'rb\') as f:  # 设置文件对象
        html = f.read()  # 可以是随便对文件的操作

    soup = BeautifulSoup(html, \'html.parser\')
    tag = soup.find(\'div\', attrs={\'class\': \'ant-row-flex RestaurantListRow___1SbZY\'})
    print(len(tag))
    for restaurant in tag:
        resinfo = []
        name = restaurant.find(\'h6\', attrs={\'class\': \'name___2epcT\'}).get_text()        
        resinfo.append(name)
        lst = restaurant.find_all(\'div\', attrs={\'class\': \'numbersChild___2qKMV\'})
        if len(lst) == 2:
            score = lst[0].get_text()
            time = re.findall("\d+",lst[1].get_text())[0]
        else:
            score = \'0\'
            aa = re.findall("\d+",lst[0].get_text())
            time = aa[0]
        resinfo.append(time)
        resinfo.append(score)
        allinfo.append(resinfo)
    print(Insert2Excel(allinfo))
    end = datetime.datetime.now().replace(microsecond=0)
    print(\'End:\', end)
    print(\'Running time: %s Seconds\' % (end - start))

 

分类:

技术点:

相关文章: