#!coding=utf-8 import requests import re import time import json from requests.packages.urllib3.exceptions import InsecureRequestWarning import pandas as pd requests.packages.urllib3.disable_warnings(InsecureRequestWarning) ###禁止提醒SSL警告 class tb(object):####手机端 def __init__(self,path,seach): ###保存数据路径 self.path = path ###保存数据路径 self.seach= seach ##搜索词 self.s = requests.session() headers = { \'Host\':\'s.m.taobao.com\', \'Accept-Encoding\':\'br, gzip, deflate\', \'Connection\':\'keep-alive\', \'Accept\':\'application/json\', \'User-Agent\':\'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/10.6b8836 Mobile/16A366 Safari/605.1.15\', \'Accept-Language\':\'zh-cn\', \'X-Requested-With\':\'XMLHttpRequest\', } self.s.headers.update(headers) ##插入头信息 def seachdata(self): for i in range(0,100): time.sleep(1.25) url=\'https://s.m.taobao.com/search?event_submit_do_new_search_auction=1&_input_charset=utf-8&topSearch=1&atype=b&searchfrom=1&action=home%3Aredirect_app_action&from=1&q={}&sst=1&n=20&buying=buyitnow&m=api4h5&abtest=18&wlsort=18&style=list&closeModues=nav%2Cselecthot%2Conesearch&page={}\'.format(self.seach,i) ##爬取的网址 print(i) req = self.s.get(url=url, verify=False).text #爬取页面结果 try: js=json.loads(req) print(js) except: print(\'err\') listItem=js[\'listItem\'] title=[] ##名称 sold=[] ##月销量 commentCount=[] ##评论量 item_id=[] ##商品ID userId=[] ##商家ID nick=[] ##商家名称 location=[] ##商家地址 pic_path=[] ##图片 itemNumId=[] ##商品NID originalPrice=[] ##原价 price=[] ##售价 category=[] ##类别ID itemurl=[] ##商品链接 if listItem==[]: break for j in listItem: ##数据提取 title.append(j[\'title\']) sold.append(j[\'sold\']) try: commentCount.append(j[\'commentCount\']) except: commentCount.append(\'\') item_id.append(j[\'item_id\']) userId.append(j[\'userId\']) nick.append(j[\'nick\']) location.append(j[\'location\']) pic_path.append(j[\'pic_path\']) itemNumId.append(j[\'itemNumId\']) originalPrice.append(j[\'originalPrice\']) price.append(j[\'price\']) try: category.append(j[\'category\']) except: category.append(\'\') itemurl.append(j[\'url\']) data={ \'title_名称\':title, \'sold_月销量\': sold, \'commentCount_评论量\': commentCount, \'item_id_商品ID\': item_id, \'userId_商家ID\': userId, \'nick_商家名称\': nick, \'location_商家地址\': location, \'pic_path_图片\': pic_path, \'itemNumId_商品NID\': itemNumId, \'originalPrice_原价\': originalPrice, \'price_售价\': price, \'category_类别ID\': category, \'itemurl_商品链接\': itemurl, } df=pd.DataFrame(data) if i==0: df.to_csv(self.path+r\'\out.csv\', index=False, header=1, encoding="GB18030") else: df.to_csv(self.path+r\'\out.csv\', index=False, header=0, mode=\'a\', encoding="GB18030")###保存文件 if __name__ == \'__main__\': t=tb(r\'E:\taobao\',\'手机\') t.seachdata()