-hao-

爬取京东手机信息:品牌、型号、总评数量、好评数量、中评数量、差评数量。并保存为CSV格式文件

只有首页的数据

import requests
from bs4 import BeautifulSoup
import csv
import codecs
#import pandas as pd




header = {\'User-Agent\': \'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36\',
           }

url="https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&wq=%E6%89%8B%E6%9C%BA&pvid=5282edb11fad4ce1ad0214c0d89a0031"
html = requests.get(url=url,headers=header)
soup = BeautifulSoup(html.content,\'html.parser\')
items = soup.select(\'li.gl-item\')
#print(items)
results = []
for item in items:
    id = item.find(\'div\', class_=\'p-focus\').find(\'a\')[\'data-sku\']
    name = item.find("div",class_="p-name p-name-type-2").find("em").text
    commit_url = "https://sclub.jd.com/comment/productPageComments.action?productId="+str(id)+"&sortType=5&score=0&page=0&pageSize=10"
    commit_count = requests.get(commit_url).json()["productCommentSummary"]["commentCountStr"]
    good_commit = requests.get(commit_url).json()["productCommentSummary"]["goodCountStr"]
    general_commit = requests.get(commit_url).json()["productCommentSummary"]["generalCountStr"]
    poor_commit = requests.get(commit_url).json()["productCommentSummary"]["poorCountStr"]
    results.append([name,id,commit_url,commit_count,good_commit,general_commit,poor_commit])
#print(results)
#return results
for result in results:
    print(result)

with open(\'write.csv\', \'w\', newline=\'\', encoding=\'utf8\') as csv_file:   # a+:追加    w+:覆盖添加
    csv_writer = csv.writer(csv_file)
    for list in results:
        csv_writer.writerow(list)
csv_file.close()

 

分类:

技术点:

相关文章: