记得我们第三关的时候爬取了豆瓣TOP250的电影名/评分/推荐语/链接,现在呢,我们要把它们存储下来,记得用今天课上学的csv和excel,分别存储下来哦~
1 import csv 2 import openpyxl 3 import requests 4 from bs4 import BeautifulSoup 5 6 # 保存成CSV文件 7 8 with open(\'02.csv\',\'w\',newline=\'\',encoding=\'utf-8\') as csv_file: 9 writer = csv.writer(csv_file) 10 writer.writerow([\'编号\',\'电影名\',\'评分\',\'推荐语\',\'链接\']) 11 12 for page in range(0,250,25): 13 res = requests.get(\'https://movie.douban.com/top250?start={}&filter=\'.format(page)) 14 soup = BeautifulSoup(res.text,\'html.parser\') 15 items = soup.find(class_=\'grid_view\').find_all(\'li\') 16 for item in items: 17 num = item.find(\'em\').text 18 name = item.find(\'span\').text 19 rate = item.find(class_=\'rating_num\').text 20 try: 21 inq = item.find(class_=\'inq\').text 22 except: 23 inq = \'\' 24 url = item.find(\'a\')[\'href\'] 25 26 writer.writerow([num,name,rate,inq,url]) 27 28 # 保存成excel 29 30 wb = openpyxl.Workbook() 31 sheet = wb.active 32 sheet.title = \'TOP250\' 33 sheet[\'A1\'] = \'编号\' 34 sheet[\'B1\'] = \'电影名\' 35 sheet[\'C1\'] = \'评分\' 36 sheet[\'D1\'] = \'推荐语\' 37 sheet[\'E1\'] = \'链接\' 38 39 for page in range(0,250,25): 40 res = requests.get(\'https://movie.douban.com/top250?start={}&filter=\'.format(page)) 41 soup = BeautifulSoup(res.text,\'html.parser\') 42 items = soup.find(class_=\'grid_view\').find_all(\'li\') 43 for item in items: 44 num = item.find(\'em\').text 45 name = item.find(\'span\').text 46 rate = item.find(class_=\'rating_num\').text 47 try: 48 inq = item.find(class_=\'inq\').text 49 except: 50 inq = \'\' 51 url = item.find(\'a\')[\'href\'] 52 53 sheet.append([num,name,rate,inq,url]) 54 55 wb.save(\'02.xlsx\')