tsw123

python 抓取百度音乐

# coding:utf-8

import urllib2
import re
import urllib
import chardet
from json import *

category = \'经典老歌\'
url = \'http://music.baidu.com/tag/\' + category
url_songs = \'http://play.baidu.com/data/music/songlink\'
try:
    request = urllib2.Request(url)
    response = urllib2.urlopen(request)
    content = response.read()
    patt_str = \'<li data-songitem = \\'{&quot;songItem&quot;:{&quot;sid&quot;:(.*?),.*?</li>\'
    pattern = re.compile(patt_str,re.S)
    songIds = re.findall(pattern,content)
    #for songId in songIds:
    #print songIds
    formdata = {"songIds":",".join(songIds)}
    #print formdata
    data_encoded = urllib.urlencode(formdata)
    songList = urllib2.urlopen(url_songs,data_encoded)
    songListJson = songList.read()
    #print songListJson
    song_dict = JSONDecoder().decode(songListJson)
    #print song_dict
    song_data_dict = song_dict.get(\'data\').get(\'songList\')
    for sond_data in song_data_dict:
        song_name = sond_data.get(\'songName\')
        song_artistName = sond_data.get(\'artistName\')
        song_format = sond_data.get(\'format\')
        song_link = sond_data.get(\'songLink\')
        #print song_name+\'--\'+song_artistName+\'.\'+song_format+u\'     下载链接为:\'+song_link
        music = urllib.urlopen(song_link).read()
        open(song_name+\'.mp3\',\'w\').write(music)
    
except urllib2.URLError,e:
    if hasattr(e,"code"):
        print e.code
    if hasattr(e,"reason"):
        print e.reason

 

分类:

技术点:

相关文章: