aotumandaren
#多线程赋值用字典格式 试下第二种方法 顺便把for循环 用正则表达 看能不能快点
#适应两种 m3u8 读取的格式

import requests
import os
import datetime
import threading
import re
from queue import Queue
import random
import sys
from fake_useragent import UserAgent
#下载耗时:0:00:47
class xiazai():
    def __init__(self):
        self.url = xiazaidizhi
        work_dir = os.getcwd()
        # print(work_dir)
        # 用来保存ts文件
        file_dir = os.path.join(work_dir, wenjian)
        if not os.path.exists(file_dir):
            os.mkdir(file_dir)
        user_agent_list =[
            \'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36\',
                       \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36\',
                       \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36\',
                       \'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36\',
                       \'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36\',
                       \'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36\',
                       \'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6\',
                       \'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36\',
                       \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36\',
                       \'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36\'
        ]
        #t = random.choice(user_agent_list)
        t = UserAgent(use_cache_server=False, verify_ssl=False).random
        self.headers={
            \'User-Agent\':t
        }
        self.savefile()

    def savefile(self):
        r = requests.get(self.url, headers=self.headers)
        # 合成带有hls的m3u8地址
        if r.text.split(\'\n\')[-1] == \'\':
            hls_mark = r.text.split(\'\n\')[-2]  # 以防\n结尾
            hls_mark = hls_mark.split(\'/\')
            if len(hls_mark) > 3:
                hls_mark = \'/\'.join(hls_mark[-3:])
                self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]
                url_m3u8_hls = self.base_url + hls_mark
                print(url_m3u8_hls)
                # url_m3u8_hls = file_url.replace(\'index.m3u8\', hls_mark)
                # file_m3u8 = url_m3u8_hls.split(\'/\')[-1]
                return url_m3u8_hls
                # print(url_m3u8_hls)
            else:
                hls_mark = \'/\'.join(hls_mark[-3:])
                # self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]
                self.base_url = self.url.split(\'/\')[:3]
                self.base_url = \'/\'.join(self.base_url)
                url_m3u8_hls = self.base_url + hls_mark
                print(url_m3u8_hls)
                # url_m3u8_hls = file_url.replace(\'index.m3u8\', hls_mark)
                # file_m3u8 = url_m3u8_hls.split(\'/\')[-1]
                return url_m3u8_hls
                # print(url_m3u8_hls)
        else:
            hls_mark = r.text.split(\'\n\')[-1]
            hls_mark = hls_mark.split(\'/\')
            if len(hls_mark) > 3:
                hls_mark = \'/\'.join(hls_mark[-3:])
                self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]
                url_m3u8_hls = self.base_url + hls_mark
                print(url_m3u8_hls)
                # url_m3u8_hls = file_url.replace(\'index.m3u8\', hls_mark)
                # file_m3u8 = url_m3u8_hls.split(\'/\')[-1]
                return url_m3u8_hls
                # print(url_m3u8_hls)
            else:
                hls_mark = \'/\'.join(hls_mark[-3:])
                #self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]
                self.base_url = self.url.split(\'/\')[:3]
                self.base_url = \'/\'.join(self.base_url)
                url_m3u8_hls = self.base_url + hls_mark
                print(url_m3u8_hls)
                # url_m3u8_hls = file_url.replace(\'index.m3u8\', hls_mark)
                # file_m3u8 = url_m3u8_hls.split(\'/\')[-1]
                return url_m3u8_hls
                # print(url_m3u8_hls)

class xiazai1():
    def __init__(self):
        user_agent_list = [
            \'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36\',
            \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36\',
            \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36\',
            \'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36\',
            \'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36\',
            \'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36\',
            \'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6\',
            \'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36\',
            \'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36\',
            \'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36\'
        ]
        t = UserAgent(use_cache_server=False, verify_ssl=False).random
        self.headers = {
            \'User-Agent\': t
        }

    def index(self):
        global url_m3u8_hls
        url_m3u8_hls = xiazai().savefile()
        try:
            r = requests.get(url_m3u8_hls, headers=self.headers).text
            # base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", url_m3u8_hls)[0]
            with open(wenjian + \'/\' + \'index1.m3u8\', \'w\') as f:
                f.write(r)
                f.close()
        except:
            print(\'下载失败!\')

    def tihuan(self):
        try:
            with open(wenjian + \'/\'+ \'index1.m3u8\', \'r\') as f:
                r = f.read()
                f.close()
            text_bytes = r.split(\'\n\')
            return text_bytes
        except:
            print(\'下载失败!\')

    def url(self):
        text_bytes = self.tihuan()
        dizhi_1 = []
        url_1 = []
        for miyao in text_bytes:
            if \'.ts\' in miyao:
                m = miyao.split(\'/\')
                dizhi = m[-1]
                dizhi_1.append(dizhi)

                url = miyao
                url_1.append(url)

        return (dizhi_1, url_1)

    def key(self):
        text_bytes = self.tihuan()
        for miyao in text_bytes:
            if \'EXT-X-KEY\' in miyao:
                miyao = miyao.replace(\'"\', \'\')
                key = miyao.split(\'/\')[1:]
                key = \'/\'.join(key)
                key = \'/\' + key
                return key

    def index_shengcheng(self):
        key1 = self.key()
        dizhi, url1 = self.url()
        try:
            with open(wenjian + \'/\' + \'index1.m3u8\', \'r\') as f:
                lines = f.readlines()  # 读取文本每一行
                list = []
                f = open(wenjian+\'/\'+\'index.m3u8\', \'w\')
                for i in range(len(lines)):
                    list.append(lines[i])  # 将每一行的数据加入列表
                    if key1 in list[i]:
                        list[i] = re.sub(key1, \'key.key\', list[i])
                    for t in range(len(dizhi)):
                        if dizhi[t] in list[i]:
                            list[i] = re.sub(url1[t], dizhi[t], list[i])
                        # print(list[i])
                    f.write(list[i])
        except:
            print(\'错误!\')

    def duqu(self):
        #url_m3u8_hls = xiazai().savefile()
        try:
            r = requests.get(url_m3u8_hls, headers=self.headers).text
            base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", url_m3u8_hls)[0]
            text_bytes = r.split(\'\n\')
            # 筛选以.ts结尾的行
            # 有些情况下可能是以其他格式的文件,比如png,下载后修改后缀即可
            # ts_name = [i for i in text_string if i.endswith(\'.ts\')]
            \'\'\'
            ts_time = [i for i in text_bytes if i.startswith(\'#EXTINF\')]
            dm_time = 0
            for i in range(len(ts_time)):
                ts_time1 = ts_time[i].replace(\'#EXTINF:\', \'\')
                ts_time2 = ts_time1.replace(\',\', \'\')
                dm_time = float(ts_time2) + dm_time
            \'\'\'
            # self.shijian(dm_time)
            ts_queue = Queue(10000)
            concatfile = wenjian+\'/\' + "s" + \'.txt\'
            # print(dm_time)
            for miyao in text_bytes:
                if \'EXT-X-KEY\' in miyao:
                    miyao = miyao.replace(\'"\',\'\')
                    key = miyao.split(\'/\')[1:]
                    key= \'/\'.join(key)
                    zhu_url = url_m3u8_hls.split(\'/\')[:3]
                    zhu_url = \'/\'.join(zhu_url)
                    key_url = zhu_url + \'/\'+ key
                    r = requests.get(key_url, headers=self.headers)
                    with open(wenjian + \'/\'+\'key.key\',\'w\') as f:
                        f.write(r.text)
                        f.close()
            for line in text_bytes:
                if \'.ts\' in line:
                    m = line.split(\'/\')
                    if \'http\' in line:
                        # print("ts>>", line)
                        ts_queue.put(line)
                    else:
                        if len(m) >=3:
                            base_url1 = url_m3u8_hls.split(\'/\')[:3]
                            base_url1 = \'/\'.join(base_url1)
                            line = base_url1 + line
                            # print(line)
                            ts_queue.put(line)
                        else:
                            line = base_url + line
                            # print(line)
                            ts_queue.put(line)
                            # print(ts_queue.put(line))
                            # print(\'ts>>\',line)
                    filename = re.search(\'([a-zA-Z0-9-]+.ts)\', line).group(1).strip()
                    open(concatfile, \'a+\').write("file %s\n" % filename)
                    # filename = re.search(\'([a-zA-Z0-9-]+.ts)\', line).group(1).strip()
                    # ts_neirong = [i for i in text_bytes if not i.startswith(\'#\')]
                    # ts_neirong.pop()
                    # self.xiazai(ts_neirong,url_m3u8_hls)
                    # print(ts_neirong)
            return ts_queue
        except:
            print(\'连接失败\')
            sys.exit()

    def shijian(self,dm_time):
        shichang_time = str(datetime.timedelta(seconds=dm_time))
        print(\'视频时长:%s\' % shichang_time)
    def xiazai1(self,ts_queue):
        #ffmpeg -f concat -safe 0 -i s.txt -c copy output.mp4
        #tt_name = threading.current_thread().getName()
        while not ts_queue.empty():
            url = ts_queue.get()
            try:
                r = requests.get(url, stream=True, headers=self.headers)
                filename = re.search(\'([a-zA-Z0-9-]+.ts)\', url).group(1).strip()
                with open(wenjian+\'/\' + filename, \'wb\') as fp:
                    fp.write(r.content)
                #print(tt_name + " " + filename + \' 下载成功\')
            except:
                with open(wenjian+ \'/\'+ \'shibai.txt\', \'a\') as fp:
                    fp.write(url + \'\n\')
                print(url + \'下载失败\')




    def hebing(self,name):
        basedir = os.path.abspath(os.path.dirname(__file__))
        t = \'start /d "%s\%s" ffmpeg -f concat -safe 0 -i s.txt -c copy %s.mp4\' %(basedir,wenjian,name)
        with open(\'2.bat\', \'w\') as f:
            f.write(t)
            f.close()
        # os.system(r\'D:\Study\pythonProject\Python_down_m3u8\2.bat\')
        os.startfile("2.bat")

if __name__==\'__main__\':
    #xiazai(\'http://iqiyi.cdn9-okzy.com/20201019/16908_b0f2428f/index.m3u8\')
    xiazaidizhi = input("请输入m3u8链接:")
    wenjian = input("请输入保存文件名:")
    start = datetime.datetime.now().replace(microsecond=0)
    xiazai1().index()
    xiazai1().index_shengcheng()
    s = xiazai1().duqu()
    #print(s)
    # print(s,concatfile)
    threads = []
    for i in range(30):
        t = threading.Thread(target=xiazai1().xiazai1, name=\'th-\' + str(i), kwargs={\'ts_queue\': s})
        threads.append(t)
    for t in threads:
        t.start()
    for t in threads:
        t.join()
    end = datetime.datetime.now().replace(microsecond=0)
    print(\'下载耗时:\' + str(end - start))


#下载耗时:0:01:23

下载完会出来

 

 外加很多tx文件

分类:

技术点:

相关文章:

  • 2021-08-07
  • 2021-07-05
  • 2021-05-28
  • 2022-12-23
  • 2022-12-23
  • 2021-12-09
  • 2022-01-07
  • 2021-11-17
猜你喜欢
  • 2021-11-20
  • 2021-11-20
  • 2021-12-07
  • 2022-01-02
  • 2022-12-23
  • 2022-12-23
  • 2021-09-16
相关资源
相似解决方案