bevol 平台爬取
import requests
import time
import os
import openpyxl
class Base_params:
@property
def headers(self):
"""
headers
:return:headers
"""
return {
# "User-Agent": \'okhttp/3/10.0\'
"User-Agent": \'Dalvik/2.1.0 (Linux; U; Android 7.1.2; Redmi 2A MIUI/V9.6.2.0.NCKCNFD) Resolution/720*1280 Version/5.45.0 Build/5450095 Device/(Xiaomi;Redmi 2A\',
}
@property
def now_time_stamp(self):
"""
:return:当前时间戳
"""
ntime = str(round(time.time(), 3))
return ntime.replace(\'.\', \'\')
# 首页排行榜
class Spider_beovl(Base_params):
def __init__(self, r, pager, dataCategoryListsId, sheet):
self.pager = pager
self.dataCategoryListsId = dataCategoryListsId
self.sheet = sheet
self.good_list = []
self.excel_name = \'bevol.xlsx\'
self.r = r
@property
def payload(self):
"""
基本配置
:return:配置参数
"""
return {
\'uid\': \'10547754\',
\'uuid\': \'866174733832030\',
\'model\': \'OPPO R11 Plus\',
\'o\': \'Android\',
\'pageSize\': 20,
\'req_timestamp\': self.now_time_stamp,
\'pager\': self.pager,
\'dataCategoryListsId\': self.dataCategoryListsId
}
def synthesize_top_data(self):
"""
综合榜
:param url: 综合榜 url
:return: 爬取的数据
"""
url = \'https://api.bevol.com/data_category/list/\'
resp = requests.post(url, headers=self.headers, data=self.payload).json()
try:
for i in range(20):
title = resp[\'result\'][\'list\'][i][\'title\']
grade = resp[\'result\'][\'list\'][i][\'grade\']
like_num = resp[\'result\'][\'list\'][i][\'likeNum\']
not_like_num = resp[\'result\'][\'list\'][i][\'notLikeNum\']
collection_num = resp[\'result\'][\'list\'][i][\'collectionNum\']
comment_num = resp[\'result\'][\'list\'][i][\'commentNum\']
price_capacity = resp[\'result\'][\'list\'][i][\'standardPriceCapacity\']
image_url = resp[\'result\'][\'list\'][i][\'imageSrc\']
goods_info = {
f\'{title}\':[f\'{grade}\', f\'{like_num}\', f\'{not_like_num}\', f\'{collection_num}\', f\'{comment_num}\', f\'{price_capacity}\', f\'{image_url}\']
}
self.good_list.append(goods_info)
except Exception as e:
print(\'已到底\')
return self.good_list
def save_excel(self):
"""
存入Excel
:return:是否存入成功
"""
if not os.path.exists(self.excel_name):
self.create_excel()
workbook = openpyxl.load_workbook(self.excel_name)
wb = workbook[self.sheet]
wb[\'A1\'] = \'名称\'
wb[\'B1\'] = \'评分\'
wb[\'C1\'] = \'喜欢数\'
wb[\'D1\'] = \'不喜欢数\'
wb[\'E1\'] = \'收藏数\'
wb[\'F1\'] = \'评论数\'
wb[\'G1\'] = \'容量/价格\'
wb[\'H1\'] = \'图片链接\'
for i in self.good_list:
self.r += 1
for k, v in i.items():
wb[f\'A{self.r}\'] = k
wb[f\'B{self.r}\'] = v[0]
wb[f\'C{self.r}\'] = v[1]
wb[f\'D{self.r}\'] = v[2]
wb[f\'E{self.r}\'] = v[3]
wb[f\'F{self.r}\'] = v[4]
wb[f\'G{self.r}\'] = v[5]
wb[f\'H{self.r}\'] = v[6]
workbook.save(self.excel_name)
def create_excel(self):
"""
创建表格
:param sheet: sheet 名称
:return:
"""
workbook = openpyxl.Workbook()
ws = workbook.active
ws.title = self.sheet
ws[\'A1\'] = \'名称\'
ws[\'B1\'] = \'评分\'
ws[\'C1\'] = \'喜欢数\'
ws[\'D1\'] = \'不喜欢数\'
ws[\'E1\'] = \'收藏数\'
ws[\'F1\'] = \'评论数\'
ws[\'G1\'] = \'容量/价格\'
ws[\'H1\'] = \'图片链接\'
workbook.save(self.excel_name)
# 第几行 第几页 categoryId 什么榜
# run = Spider_beovl(41, 2, 26, \'新品榜\')
# run.synthesize_top_data()
# run.save_excel()
# 首页推荐
class Home_recommend():
def __init__(self, sheet, excel_name):
self.sheet = sheet
self.excel_name = excel_name
self.good_list = []
@property
def headers(self):
"""
headers
:return:headers
"""
return {
"User-Agent": \'Dalvik/2.1.0 (Linux; U; Android 7.1.2; Redmi 2A MIUI/V9.6.2.0.NCKCNFD) Resolution/720*1280 Version/5.45.0 Build/5450095 Device/(Xiaomi;Redmi 2A\',
}
@property
def payload(self):
return {
\'uid\': \'10547754\',
\'uuid\': \'866174733832030\',
\'imei\': \'1cae65c555f22ad73561b62b3451ede8\',
\'model\': \'OPPO R11 Plus\',
\'o\': \'Android\',
\'sys_v\':\'5.1.1\',
\'v\': \'4.1.1\',
\'channel\': \'yingyongbao\',
\'opentime\': \'1594864081\',
\'req_timestamp\': 1594864163324,
\'pager\': \'1\',
\'pageNum\': \'1\',
\'signature\': \'2f56f216eea8f12848d5373052737fb7\'
}
def home_recommend_crawl(self):
url = \'https://api.bevol.com/auto/data2\'
resp = requests.post(url, headers=self.headers, data=self.payload).json()
# print(resp)
for i in range(40):
try:
title = resp[\'result\'][\'entityMap\'][\'article\'][i][\'title\']
article_url = resp[\'result\'][\'entityMap\'][\'article\'][i][\'h5url\']
skinname = resp[\'result\'][\'entityMap\'][\'article\'][i][\'skinName\']
try:
claim = resp[\'result\'][\'entityMap\'][\'goods\'][i][\'entityTag\'][0][\'name\']
country = resp[\'result\'][\'entityMap\'][\'goods\'][i][\'entityTag\'][1][\'name\']
except Exception as e:
claim = \'\'
country = \'\'
entity_image_src = resp[\'result\'][\'entityMap\'][\'article\'][i][\'entityImage\']
content = resp[\'result\'][\'entityMap\'][\'comment\'][i][\'article\']
goods_info = {
f\'{title}\': [f\'{article_url}\']
}
self.good_list.append(goods_info)
except Exception as e:
print(e)
def save_excel(self):
"""
存入Excel
:return:是否存入成功
"""
r = 1
print(self.good_list)
if not os.path.exists(self.excel_name):
self.create_excel()
workbook = openpyxl.load_workbook(self.excel_name)
wb = workbook[self.sheet]
wb[\'A1\'] = \'名称\'
wb[\'B1\'] = \'文章链接\'
wb[\'C1\'] = \'皮肤特色\'
wb[\'D1\'] = \'图片链接\'
wb[\'E1\'] = \'评论内容\'
wb[\'F1\'] = \'图片链接\'
for i in self.good_list:
r += 1
for k, v in i.items():
ws = workbook.active
ws.title = self.sheet
wb[f\'A{r}\'] = k
wb[f\'B{r}\'] = v[0]
wb[f\'C{r}\'] = v[1]
wb[f\'D{r}\'] = v[2]
wb[f\'E{r}\'] = v[3]
wb[f\'F{r}\'] = v[4]
workbook.save(self.excel_name)
def create_excel(self):
"""
创建表格
:param sheet: sheet 名称
:return:
"""
workbook = openpyxl.Workbook()
ws = workbook.active
ws.title = self.sheet
ws[\'A1\'] = \'名称\'
ws[\'B1\'] = \'文章链接\'
ws[\'C1\'] = \'皮肤特色\'
ws[\'D1\'] = \'图片链接\'
ws[\'E1\'] = \'评论内容\'
ws[\'F1\'] = \'图片链接\'
workbook.save(self.excel_name)
# home_rec = Home_recommend(\'主页文章\', \'主页推荐.xlsx\')
# home_rec.home_recommend_crawl()
# home_rec.save_excel()
# 优惠低价中心
class Low_price_centre(Base_params):
def __init__(self, sheet, excel_name):
self.sheet = sheet
self.excel_name = excel_name
self.good_list = []
def low_price(self):
url = \'https://api.bevol.com/cps/cpsWelfareList?uid=10547754&uuid=866174733832030&imei=1cae65c555f22ad73561b62b3451ede8&model=OPPO%20R11%20Plus&sys_v=5.1.1&v=4.1.1&o=Android&channel=yingyongbao&opentime=1594893672&req_timestamp=1594896096406&pager=3&pageSize=20&signature=1e9f9733dd0f9e92a19404c07b5d5773\'
resp = requests.post(url, headers=self.headers).json()
for i in range(20):
try:
title = resp[\'result\'][i][\'title\']
grade = resp[\'result\'][i][\'grade\']
reservePrice = resp[\'result\'][i][\'reservePrice\']
discountPrice = resp[\'result\'][i][\'discountPrice\']
channelLink = resp[\'result\'][i][\'channelLink\']
goodsVolume = resp[\'result\'][i][\'goodsVolume\']
content = resp[\'result\'][i][\'content\']
goods_info = {
f\'{title}\': [f\'{grade}\', f\'{reservePrice}\', f\'{discountPrice}\', f\'{channelLink}\', f\'{goodsVolume}\',
f\'{content}\']
}
self.good_list.append(goods_info)
print(title, grade)
except Exception as e:
pass
def save_excel(self):
"""
存入Excel
:return:是否存入成功
"""
r = 1
print(self.good_list)
if not os.path.exists(self.excel_name):
self.create_excel()
workbook = openpyxl.load_workbook(self.excel_name)
wb = workbook[self.sheet]
wb[\'A1\'] = \'名称\'
wb[\'B1\'] = \'评分\'
wb[\'C1\'] = \'预计价格\'
wb[\'D1\'] = \'打折价格\'
wb[\'E1\'] = \'平台链接\'
wb[\'F1\'] = \'商品体积\'
wb[\'G1\'] = \'商品介绍\'
for i in self.good_list:
r += 1
for k, v in i.items():
ws = workbook.active
ws.title = self.sheet
wb[f\'A{r}\'] = k
wb[f\'B{r}\'] = v[0]
wb[f\'C{r}\'] = v[1]
wb[f\'D{r}\'] = v[2]
wb[f\'E{r}\'] = v[3]
wb[f\'F{r}\'] = v[4]
wb[f\'G{r}\'] = v[5]
workbook.save(self.excel_name)
def create_excel(self):
"""
创建表格
:param sheet: sheet 名称
:return:
"""
workbook = openpyxl.Workbook()
ws = workbook.active
ws.title = self.sheet
ws[\'A1\'] = \'名称\'
ws[\'B1\'] = \'评分\'
ws[\'C1\'] = \'预计价格\'
ws[\'D1\'] = \'打折价格\'
ws[\'E1\'] = \'平台链接\'
ws[\'F1\'] = \'商品体积\'
ws[\'G1\'] = \'商品介绍\'
workbook.save(self.excel_name)
run = Low_price_centre(\'有\', \'优惠中心.xlsx\')
run.low_price()
run.save_excel()