hsh17

整体思路是从8684公交网上抓取城市的公交名列表,然后在高德中利用api交互来抓包获取站点的数据,有{ UID,线路,站点名,站点x坐标,站点y坐标 }。

这里爬8684用了requests和BeautifulSoup,比较简单就不阐述了。最后的存储同时存了csv和xlsx两种格式。

import requests
from bs4 import BeautifulSoup
import pandas as pd
import xlrd

#城市总列表
citys = ["hefei","wuhu","bengbu","huainan","maanshan","huaibei","tongling","anqing","huangshan","chuzhou","fuyang","suzhou2","liuan","bozhou","chizhou","xuancheng"]

#获取首字母
def getInitial(cityName):
    url = \'https://{}.8684.cn/list1\'.format(cityName)
    headers = {\'User-Agent\':"自己的UA"}
    data = requests.get(url,headers=headers)
    soup = BeautifulSoup(data.text, \'lxml\')
    initial = soup.find_all(\'div\',{\'class\':\'tooltip-inner\'})[3]
    initial = initial.find_all(\'a\')
    ListInitial = []
    for i in initial:
        ListInitial.append(i.get_text())
    return ListInitial
#ListInitial = [\'1\', \'2\', \'3\', \'4\', \'5\', \'6\', \'7\', \'8\', \'9\',
#\'A\', \'B\', \'C\', \'D\', \'F\', \'G\', \'H\', \'K\', \'L\', \'M\', \'N\', \'P\',
#\'Q\', \'S\', \'T\', \'W\', \'X\', \'Y\', \'Z\']


#根据ListInitial的各项爬取各项的首字母公交
def getLine(cityName,n):
    url = \'https://{}.8684.cn/list{}\'.format(cityName,n)
    headers = {\'User-Agent\':"自己的UA"}
    data = requests.get(url,headers=headers)
    soup = BeautifulSoup(data.text, \'lxml\')
    busline = soup.find(\'div\',{\'class\':\'list clearfix\'})
    busline = busline.find_all(\'a\')
    for i in busline:
        lines.append(i.get_text())


#存储,传参lines,csv转xlsx
def storageCsv(listBus,cityName):
    result = pd.DataFrame(listBus)
    result.to_csv("{}.csv".format(cityName),index=False,na_rep="NULL",)
def csv_to_xlsx_pd(cityName):
    csv = pd.read_csv(\'{}.csv\'.format(cityName), encoding=\'utf-8\')
    csv.to_excel(\'{}.xlsx\'.format(cityName),header=False,index=False)

#对citys列表中的每一个城市i
for i in citys:
    #创建公交线路空列表
    lines = []
    #爬取首字母列表,返回首字母列表ListInitial
    ListInitial = getInitial(i)
    #根据首字母列表for循环爬取1-Z首字母线路,append入全局变量lines
    for n in ListInitial:
        getLine(i,n)
    #存储lines进入csv和xlsx
    storageCsv(lines,i)
    csv_to_xlsx_pd(i)
    print(i,"中了")

 得到的csv如图:

 

分类:

技术点:

相关文章: