qshhl

知识点:

  • requests 发送网络请求
  • parsel 解析数据
  • csv 保存数据

第三方库

  • requests >>> pip install requests
  • parsel >>> pip install parsel

开发环境:

  • 版 本:anaconda5.2.0(python3.6.5)
  • 编辑器:pycharm

image

 

爬虫代码

导入模块

import requests
import parsel
import csv

 

发送请求

url = f\'https://www.dongchedi.com/usedcar/x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x?sh_city_name=%E5%85%A8%E5%9B%BD&page=1\'
html_data = requests.get(url).text

 

解析数据 筛选数据

selector = parsel.Selector(html_data)
lis = selector.css(\'#__next > div:nth-child(2) > div.new-main.new > div > div > div.wrap > ul > li\')
for li in lis:
    title = li.css(\'dl dt p::text\').get()
    info_list = li.css(\'dl dd\')
    info = \'\'.join(info_list.css(\'dd:nth-child(2)::text\').getall()).split(\'|\')
    car_age = info[0]
    mileage = info[1].replace(\'万公里\', \'\')
    city = info[2]

 

做一个判断

if len(info_list) == 4:
    dcd_auth = info_list.css(\'dd span::text\').get()
    price = info_list.css(\'dd:nth-child(4)::text\').get()
    original_price = info_list.css(\'dd:nth-child(5)::text\').get()
else:
    dcd_auth = \'无认证\'
    price = info_list.css(\'dd:nth-child(3)::text\').get()
    original_price = info_list.css(\'dd:nth-child(4)::text\').get()
price = price.replace(\'\', \'\')
original_price = original_price.replace(\'新车含税价: \', \'\').replace(\'\', \'\')
print(title, car_age, mileage, city, dcd_auth, price, original_price)

 

保存数据

csv_dcd = open(\'dcd.csv\', mode=\'a\', encoding=\'utf-8\', newline=\'\')
csv_write = csv.writer(csv_dcd)
csv_write.writerow([\'品牌\', \'车龄\', \'里程(万公里)\', \'城市\', \'认证\', \'售价(万元)\', \'原价(万元)\'])

 

image

数据可视化

导入模块

import pandas as pd
from pyecharts.charts import *
from pyecharts.commons.utils import JsCode
from pyecharts import options as opts

 

读取数据

df = pd.read_csv(\'dcd.csv\', encoding = \'utf-8\')
df.head()

 

各省市二手车数量柱状图

bar=(
    Bar(init_opts=opts.InitOpts(height=\'500px\',width=\'1000px\',theme=\'dark\'))
    .add_xaxis(counts.index.tolist())
    .add_yaxis(
        \'城市二手车数量\',
        counts.values.tolist(),
        label_opts=opts.LabelOpts(is_show=True,position=\'top\'),
        itemstyle_opts=opts.ItemStyleOpts(
            color=JsCode("""new echarts.graphic.LinearGradient(
            0, 0, 0, 1,[{offset: 0,color: \'rgb(255,99,71)\'}, {offset: 1,color: \'rgb(32,178,170)\'}])
            """
            )
        )
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title=\'各个城市二手车数量柱状图\'),
            xaxis_opts=opts.AxisOpts(name=\'书籍名称\',
            type_=\'category\',                                           
            axislabel_opts=opts.LabelOpts(rotate=90),
        ),
        yaxis_opts=opts.AxisOpts(
            name=\'数量\',
            min_=0,
            max_=1400.0,
            splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(type_=\'dash\'))
        ),
        tooltip_opts=opts.TooltipOpts(trigger=\'axis\',axis_pointer_type=\'cross\')
    )

    .set_series_opts(
        markline_opts=opts.MarkLineOpts(
            data=[
                opts.MarkLineItem(type_=\'average\',name=\'均值\'),
                opts.MarkLineItem(type_=\'max\',name=\'最大值\'),
                opts.MarkLineItem(type_=\'min\',name=\'最小值\'),
            ]
        )
    )
)
bar.render_notebook()

 

image

各省市二手车平均价格柱状图

bar=(
    Bar(init_opts=opts.InitOpts(height=\'500px\',width=\'1000px\',theme=\'dark\'))
    .add_xaxis(means.index.tolist())
    .add_yaxis(
        \'城市二手车平均价格\',
        means.values.tolist(),
        label_opts=opts.LabelOpts(is_show=True,position=\'top\'),
        itemstyle_opts=opts.ItemStyleOpts(
            color=JsCode("""new echarts.graphic.LinearGradient(
            0, 0, 0, 1,[{offset: 0,color: \'rgb(255,99,71)\'}, {offset: 1,color: \'rgb(32,178,170)\'}])
            """
            )
        )
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title=\'各个城市二手车平均价格柱状图\'),
            xaxis_opts=opts.AxisOpts(name=\'城市名称\',
            type_=\'category\',                                           
            axislabel_opts=opts.LabelOpts(rotate=90),
        ),
        yaxis_opts=opts.AxisOpts(
            name=\'平均价格\',
            min_=0,
            max_=40.0,
            splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(type_=\'dash\'))
        ),
        tooltip_opts=opts.TooltipOpts(trigger=\'axis\',axis_pointer_type=\'cross\')
    )

    .set_series_opts(
        markline_opts=opts.MarkLineOpts(
            data=[
                opts.MarkLineItem(type_=\'average\',name=\'均值\'),
                opts.MarkLineItem(type_=\'max\',name=\'最大值\'),
                opts.MarkLineItem(type_=\'min\',name=\'最小值\'),
            ]
        )
    )
)
bar.render_notebook()

 

image

二手车品牌占比情况

pie1 = (
    Pie(init_opts=opts.InitOpts(theme=\'dark\',width=\'1000px\',height=\'600px\'))
    .add(\'\', datas_pair_1, radius=[\'35%\', \'60%\'])
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="懂车帝二手车\n\n数量占比区间", 
            pos_left=\'center\', 
            pos_top=\'center\',
            title_textstyle_opts=opts.TextStyleOpts(
                color=\'#F0F8FF\',
                font_size=20,
                font_weight=\'bold\'
            ),
        )
    )
)
pie1.render_notebook() 

 

image

二手车里程区间

pie1 = (
    Pie(init_opts=opts.InitOpts(theme=\'dark\',width=\'1000px\',height=\'600px\'))
    .add(\'\', datas_pair_1, radius=[\'35%\', \'60%\'])
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="懂车帝二手车\n\n里程占比区间", 
            pos_left=\'center\', 
            pos_top=\'center\',
            title_textstyle_opts=opts.TextStyleOpts(
                color=\'#F0F8FF\',
                font_size=20,
                font_weight=\'bold\'
            ),
        )
    )
)
pie1.render_notebook() 

 

image

分类:

技术点:

相关文章: