知识点:
- requests 发送网络请求
- parsel 解析数据
- csv 保存数据
- requests >>> pip install requests
- parsel >>> pip install parsel
- 版 本:anaconda5.2.0(python3.6.5)
- 编辑器:pycharm
import requests import parsel import csv
url = f\'https://www.dongchedi.com/usedcar/x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x?sh_city_name=%E5%85%A8%E5%9B%BD&page=1\' html_data = requests.get(url).text
selector = parsel.Selector(html_data) lis = selector.css(\'#__next > div:nth-child(2) > div.new-main.new > div > div > div.wrap > ul > li\') for li in lis: title = li.css(\'dl dt p::text\').get() info_list = li.css(\'dl dd\') info = \'\'.join(info_list.css(\'dd:nth-child(2)::text\').getall()).split(\'|\') car_age = info[0] mileage = info[1].replace(\'万公里\', \'\') city = info[2]
if len(info_list) == 4: dcd_auth = info_list.css(\'dd span::text\').get() price = info_list.css(\'dd:nth-child(4)::text\').get() original_price = info_list.css(\'dd:nth-child(5)::text\').get() else: dcd_auth = \'无认证\' price = info_list.css(\'dd:nth-child(3)::text\').get() original_price = info_list.css(\'dd:nth-child(4)::text\').get() price = price.replace(\'万\', \'\') original_price = original_price.replace(\'新车含税价: \', \'\').replace(\'万\', \'\') print(title, car_age, mileage, city, dcd_auth, price, original_price)
csv_dcd = open(\'dcd.csv\', mode=\'a\', encoding=\'utf-8\', newline=\'\') csv_write = csv.writer(csv_dcd) csv_write.writerow([\'品牌\', \'车龄\', \'里程(万公里)\', \'城市\', \'认证\', \'售价(万元)\', \'原价(万元)\'])
import pandas as pd from pyecharts.charts import * from pyecharts.commons.utils import JsCode from pyecharts import options as opts
df = pd.read_csv(\'dcd.csv\', encoding = \'utf-8\') df.head()
bar=( Bar(init_opts=opts.InitOpts(height=\'500px\',width=\'1000px\',theme=\'dark\')) .add_xaxis(counts.index.tolist()) .add_yaxis( \'城市二手车数量\', counts.values.tolist(), label_opts=opts.LabelOpts(is_show=True,position=\'top\'), itemstyle_opts=opts.ItemStyleOpts( color=JsCode("""new echarts.graphic.LinearGradient( 0, 0, 0, 1,[{offset: 0,color: \'rgb(255,99,71)\'}, {offset: 1,color: \'rgb(32,178,170)\'}]) """ ) ) ) .set_global_opts( title_opts=opts.TitleOpts( title=\'各个城市二手车数量柱状图\'), xaxis_opts=opts.AxisOpts(name=\'书籍名称\', type_=\'category\', axislabel_opts=opts.LabelOpts(rotate=90), ), yaxis_opts=opts.AxisOpts( name=\'数量\', min_=0, max_=1400.0, splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(type_=\'dash\')) ), tooltip_opts=opts.TooltipOpts(trigger=\'axis\',axis_pointer_type=\'cross\') ) .set_series_opts( markline_opts=opts.MarkLineOpts( data=[ opts.MarkLineItem(type_=\'average\',name=\'均值\'), opts.MarkLineItem(type_=\'max\',name=\'最大值\'), opts.MarkLineItem(type_=\'min\',name=\'最小值\'), ] ) ) ) bar.render_notebook()
bar=( Bar(init_opts=opts.InitOpts(height=\'500px\',width=\'1000px\',theme=\'dark\')) .add_xaxis(means.index.tolist()) .add_yaxis( \'城市二手车平均价格\', means.values.tolist(), label_opts=opts.LabelOpts(is_show=True,position=\'top\'), itemstyle_opts=opts.ItemStyleOpts( color=JsCode("""new echarts.graphic.LinearGradient( 0, 0, 0, 1,[{offset: 0,color: \'rgb(255,99,71)\'}, {offset: 1,color: \'rgb(32,178,170)\'}]) """ ) ) ) .set_global_opts( title_opts=opts.TitleOpts( title=\'各个城市二手车平均价格柱状图\'), xaxis_opts=opts.AxisOpts(name=\'城市名称\', type_=\'category\', axislabel_opts=opts.LabelOpts(rotate=90), ), yaxis_opts=opts.AxisOpts( name=\'平均价格\', min_=0, max_=40.0, splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(type_=\'dash\')) ), tooltip_opts=opts.TooltipOpts(trigger=\'axis\',axis_pointer_type=\'cross\') ) .set_series_opts( markline_opts=opts.MarkLineOpts( data=[ opts.MarkLineItem(type_=\'average\',name=\'均值\'), opts.MarkLineItem(type_=\'max\',name=\'最大值\'), opts.MarkLineItem(type_=\'min\',name=\'最小值\'), ] ) ) ) bar.render_notebook()
pie1 = ( Pie(init_opts=opts.InitOpts(theme=\'dark\',width=\'1000px\',height=\'600px\')) .add(\'\', datas_pair_1, radius=[\'35%\', \'60%\']) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%")) .set_global_opts( title_opts=opts.TitleOpts( title="懂车帝二手车\n\n数量占比区间", pos_left=\'center\', pos_top=\'center\', title_textstyle_opts=opts.TextStyleOpts( color=\'#F0F8FF\', font_size=20, font_weight=\'bold\' ), ) ) ) pie1.render_notebook()
pie1 = ( Pie(init_opts=opts.InitOpts(theme=\'dark\',width=\'1000px\',height=\'600px\')) .add(\'\', datas_pair_1, radius=[\'35%\', \'60%\']) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%")) .set_global_opts( title_opts=opts.TitleOpts( title="懂车帝二手车\n\n里程占比区间", pos_left=\'center\', pos_top=\'center\', title_textstyle_opts=opts.TextStyleOpts( color=\'#F0F8FF\', font_size=20, font_weight=\'bold\' ), ) ) ) pie1.render_notebook()