#多线程赋值用字典格式 试下第二种方法 顺便把for循环 用正则表达 看能不能快点 #适应两种 m3u8 读取的格式 import requests import os import datetime import threading import re from queue import Queue import random import sys from fake_useragent import UserAgent #下载耗时:0:00:47 class xiazai(): def __init__(self): self.url = xiazaidizhi work_dir = os.getcwd() # print(work_dir) # 用来保存ts文件 file_dir = os.path.join(work_dir, wenjian) if not os.path.exists(file_dir): os.mkdir(file_dir) user_agent_list =[ \'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36\', \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36\', \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36\', \'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36\', \'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36\', \'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36\', \'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6\', \'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36\', \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36\', \'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36\' ] #t = random.choice(user_agent_list) t = UserAgent(use_cache_server=False, verify_ssl=False).random self.headers={ \'User-Agent\':t } self.savefile() def savefile(self): r = requests.get(self.url, headers=self.headers) # 合成带有hls的m3u8地址 if r.text.split(\'\n\')[-1] == \'\': hls_mark = r.text.split(\'\n\')[-2] # 以防\n结尾 hls_mark = hls_mark.split(\'/\') if len(hls_mark) > 3: hls_mark = \'/\'.join(hls_mark[-3:]) self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0] url_m3u8_hls = self.base_url + hls_mark print(url_m3u8_hls) # url_m3u8_hls = file_url.replace(\'index.m3u8\', hls_mark) # file_m3u8 = url_m3u8_hls.split(\'/\')[-1] return url_m3u8_hls # print(url_m3u8_hls) else: hls_mark = \'/\'.join(hls_mark[-3:]) # self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0] self.base_url = self.url.split(\'/\')[:3] self.base_url = \'/\'.join(self.base_url) url_m3u8_hls = self.base_url + hls_mark print(url_m3u8_hls) # url_m3u8_hls = file_url.replace(\'index.m3u8\', hls_mark) # file_m3u8 = url_m3u8_hls.split(\'/\')[-1] return url_m3u8_hls # print(url_m3u8_hls) else: hls_mark = r.text.split(\'\n\')[-1] hls_mark = hls_mark.split(\'/\') if len(hls_mark) > 3: hls_mark = \'/\'.join(hls_mark[-3:]) self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0] url_m3u8_hls = self.base_url + hls_mark print(url_m3u8_hls) # url_m3u8_hls = file_url.replace(\'index.m3u8\', hls_mark) # file_m3u8 = url_m3u8_hls.split(\'/\')[-1] return url_m3u8_hls # print(url_m3u8_hls) else: hls_mark = \'/\'.join(hls_mark[-3:]) #self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0] self.base_url = self.url.split(\'/\')[:3] self.base_url = \'/\'.join(self.base_url) url_m3u8_hls = self.base_url + hls_mark print(url_m3u8_hls) # url_m3u8_hls = file_url.replace(\'index.m3u8\', hls_mark) # file_m3u8 = url_m3u8_hls.split(\'/\')[-1] return url_m3u8_hls # print(url_m3u8_hls) class xiazai1(): def __init__(self): user_agent_list = [ \'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36\', \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36\', \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36\', \'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36\', \'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36\', \'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36\', \'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6\', \'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36\', \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36\', \'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36\' ] t = UserAgent(use_cache_server=False, verify_ssl=False).random self.headers = { \'User-Agent\': t } def index(self): global url_m3u8_hls url_m3u8_hls = xiazai().savefile() try: r = requests.get(url_m3u8_hls, headers=self.headers).text # base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", url_m3u8_hls)[0] with open(wenjian + \'/\' + \'index1.m3u8\', \'w\') as f: f.write(r) f.close() except: print(\'下载失败!\') def tihuan(self): try: with open(wenjian + \'/\'+ \'index1.m3u8\', \'r\') as f: r = f.read() f.close() text_bytes = r.split(\'\n\') return text_bytes except: print(\'下载失败!\') def url(self): text_bytes = self.tihuan() dizhi_1 = [] url_1 = [] for miyao in text_bytes: if \'.ts\' in miyao: m = miyao.split(\'/\') dizhi = m[-1] dizhi_1.append(dizhi) url = miyao url_1.append(url) return (dizhi_1, url_1) def key(self): text_bytes = self.tihuan() for miyao in text_bytes: if \'EXT-X-KEY\' in miyao: miyao = miyao.replace(\'"\', \'\') key = miyao.split(\'/\')[1:] key = \'/\'.join(key) key = \'/\' + key return key def index_shengcheng(self): key1 = self.key() dizhi, url1 = self.url() try: with open(wenjian + \'/\' + \'index1.m3u8\', \'r\') as f: lines = f.readlines() # 读取文本每一行 list = [] f = open(wenjian+\'/\'+\'index.m3u8\', \'w\') for i in range(len(lines)): list.append(lines[i]) # 将每一行的数据加入列表 if key1 in list[i]: list[i] = re.sub(key1, \'key.key\', list[i]) for t in range(len(dizhi)): if dizhi[t] in list[i]: list[i] = re.sub(url1[t], dizhi[t], list[i]) # print(list[i]) f.write(list[i]) except: print(\'错误!\') def duqu(self): #url_m3u8_hls = xiazai().savefile() try: r = requests.get(url_m3u8_hls, headers=self.headers).text base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", url_m3u8_hls)[0] text_bytes = r.split(\'\n\') # 筛选以.ts结尾的行 # 有些情况下可能是以其他格式的文件,比如png,下载后修改后缀即可 # ts_name = [i for i in text_string if i.endswith(\'.ts\')] \'\'\' ts_time = [i for i in text_bytes if i.startswith(\'#EXTINF\')] dm_time = 0 for i in range(len(ts_time)): ts_time1 = ts_time[i].replace(\'#EXTINF:\', \'\') ts_time2 = ts_time1.replace(\',\', \'\') dm_time = float(ts_time2) + dm_time \'\'\' # self.shijian(dm_time) ts_queue = Queue(10000) concatfile = wenjian+\'/\' + "s" + \'.txt\' # print(dm_time) for miyao in text_bytes: if \'EXT-X-KEY\' in miyao: miyao = miyao.replace(\'"\',\'\') key = miyao.split(\'/\')[1:] key= \'/\'.join(key) zhu_url = url_m3u8_hls.split(\'/\')[:3] zhu_url = \'/\'.join(zhu_url) key_url = zhu_url + \'/\'+ key r = requests.get(key_url, headers=self.headers) with open(wenjian + \'/\'+\'key.key\',\'w\') as f: f.write(r.text) f.close() for line in text_bytes: if \'.ts\' in line: m = line.split(\'/\') if \'http\' in line: # print("ts>>", line) ts_queue.put(line) else: if len(m) >=3: base_url1 = url_m3u8_hls.split(\'/\')[:3] base_url1 = \'/\'.join(base_url1) line = base_url1 + line # print(line) ts_queue.put(line) else: line = base_url + line # print(line) ts_queue.put(line) # print(ts_queue.put(line)) # print(\'ts>>\',line) filename = re.search(\'([a-zA-Z0-9-]+.ts)\', line).group(1).strip() open(concatfile, \'a+\').write("file %s\n" % filename) # filename = re.search(\'([a-zA-Z0-9-]+.ts)\', line).group(1).strip() # ts_neirong = [i for i in text_bytes if not i.startswith(\'#\')] # ts_neirong.pop() # self.xiazai(ts_neirong,url_m3u8_hls) # print(ts_neirong) return ts_queue except: print(\'连接失败\') sys.exit() def shijian(self,dm_time): shichang_time = str(datetime.timedelta(seconds=dm_time)) print(\'视频时长:%s\' % shichang_time) def xiazai1(self,ts_queue): #ffmpeg -f concat -safe 0 -i s.txt -c copy output.mp4 #tt_name = threading.current_thread().getName() while not ts_queue.empty(): url = ts_queue.get() try: r = requests.get(url, stream=True, headers=self.headers) filename = re.search(\'([a-zA-Z0-9-]+.ts)\', url).group(1).strip() with open(wenjian+\'/\' + filename, \'wb\') as fp: fp.write(r.content) #print(tt_name + " " + filename + \' 下载成功\') except: with open(wenjian+ \'/\'+ \'shibai.txt\', \'a\') as fp: fp.write(url + \'\n\') print(url + \'下载失败\') def hebing(self,name): basedir = os.path.abspath(os.path.dirname(__file__)) t = \'start /d "%s\%s" ffmpeg -f concat -safe 0 -i s.txt -c copy %s.mp4\' %(basedir,wenjian,name) with open(\'2.bat\', \'w\') as f: f.write(t) f.close() # os.system(r\'D:\Study\pythonProject\Python_down_m3u8\2.bat\') os.startfile("2.bat") if __name__==\'__main__\': #xiazai(\'http://iqiyi.cdn9-okzy.com/20201019/16908_b0f2428f/index.m3u8\') xiazaidizhi = input("请输入m3u8链接:") wenjian = input("请输入保存文件名:") start = datetime.datetime.now().replace(microsecond=0) xiazai1().index() xiazai1().index_shengcheng() s = xiazai1().duqu() #print(s) # print(s,concatfile) threads = [] for i in range(30): t = threading.Thread(target=xiazai1().xiazai1, name=\'th-\' + str(i), kwargs={\'ts_queue\': s}) threads.append(t) for t in threads: t.start() for t in threads: t.join() end = datetime.datetime.now().replace(microsecond=0) print(\'下载耗时:\' + str(end - start)) #下载耗时:0:01:23
下载完会出来
外加很多tx文件