renoyuan

技术点:

selenium 爬数据

csv 转换数据格式

 

网易云没换爬虫策略的话代码应该是可以直接跑的

爬虫底线试了一下还是很简单的上手一天差不多可以直接爬了

案例代码:

from selenium import webdriver
import time
import json
import csv


class WangyiMusicSpider():

    def __init__(self, url):
        self.browser = webdriver.Chrome(\'chromedriver\')
        self.browser.get(url)
        time.sleep(2)

    def json_to_scv(self):
        with open("wangyimisic.json", "r", encoding="utf-8") as r:
            results = json.load(r)
        f = open(\'wangyimusic.csv\', \'w\', encoding=\'utf-8\')
        csv_writer = csv.writer(f)
        csv_writer.writerow(results[0].keys())
        for result in results:
            csv_writer.writerow(result.values())
        f.close()


    def main(self):
    
     
        iframe_element = self.browser.find_element_by_id(\'g_iframe\')
        # 2> 切换 iframe
        self.browser.switch_to.frame(iframe_element)
   
        data_list = self.browser.find_elements_by_xpath(\'.//ul[@id="m-pl-container"]/li\')

        resuilts = []

        for li in data_list:
            item = {}
           
            item[\'photo\'] = li.find_element_by_xpath(\'.//img\').get_attribute(\'src\')
            item[\'music_link\'] = li.find_element_by_xpath(\'.//div/a\').get_attribute(\'href\')
            item[\'hot\'] = li.find_element_by_xpath(\'.//span[@class="nb"]\').text
            item[\'title\'] = li.find_element_by_xpath(\'./p/a\').text
            item[\'name\'] = li.find_element_by_xpath(\'.//p[last()]/a\').text
            resuilts.append(item)
           

        resuilts_json = json.dumps(resuilts)
        with open(\'wangyimisic.json\', \'w\', encoding=\'utf-8\') as a:
            a.write(resuilts_json)

        self.json_to_scv()

    def quit(self):
        self.browser.quit()


if __name__ == \'__main__\':
    url = \'https://music.163.com/#/discover/playlist\'
    obj = WangyiMusicSpider(url)
    obj.main()
    obj.quit()

  

分类:

技术点:

相关文章: