由于抖音更新了系统,之前的无法下载视频,下面是最新更正版本,以前用wget可以下载,现在改为用aria2c来下载
# coding=utf-8 from selenium import webdriver import time, re, os, sys reload(sys) sys.setdefaultencoding("utf-8") if len(sys.argv)==1: print \'input url\' sys.exit() url = sys.argv[1] def main(): options = webdriver.ChromeOptions() options.headless = True options.add_argument(\'log-level=3\') options.add_experimental_option(\'excludeSwitches\', [\'enable-logging\']) options.add_argument(\'user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25\') chrome_driver = \'D:\soft\ChromePortable\84.0.4147.105\App\Chrome-bin\chromedriver.exe\' b = webdriver.Chrome(executable_path = chrome_driver,options=options) print \'Downloading page ...\' b.get(url) time.sleep(1) page_source = b.page_source # print page_source b.quit() src = re.findall( r\'<video class="video-player--..... hide--....." src="(.*?)"\', page_source, re.S) src2 = "" if src: src = "".join(src) print src src2 = src.replace(\'playwm\',\'play\') print src2 else: print "No match src" userTitle = re.findall( r\'<p class="desc--.....">(.*?)</p>\', page_source, re.S) if userTitle: userTitle = "".join(userTitle) print userTitle else: print "No match user Title" name = re.findall( r\'<p class="author-name--.....">@(.*?)</p>\', page_source, re.S) if name: name = "".join(name) name = name.replace(\'@\',\'\') print name else: print "No match name" uid = re.findall( r\'<p class="unique_id--.....">(.*?)</p>\', page_source, re.S) if uid: uid = "".join(uid) uid = uid.replace(\'抖音号:\',\'\') uid = uid.replace(\' \',\'\') print uid else: print "No match uid" print "downloading video aria2c" cmd = \'aria2c --user-agent="user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25" \'+\'"\'+ src2 + \'" -o "\' + uid + \'_\' + name+\'_\'+userTitle+\'.mp4"\' cmd = cmd.decode(\'utf8\').encode(\'gb2312\') val = os.system(cmd) print val if __name__ == \'__main__\': main()