# !/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Time : 2020/12/21 上午8:51
# @Author : SR
# @Email : srcoder@1163.com
# @File : spider.py
# @Software: PyCharm
import os
import requests
from multiprocessing.pool import ThreadPool
class SpiderMovieFromChenYu:
def __init__(self, save_ts_path, save_movie_path, fail_ts_list=[], ):
self.save_ts_path = save_ts_path
self.save_movie_path = save_movie_path
self.fail_ts_list = fail_ts_list
self.headers = {
\'Referer\': \'http://www.chenyutv.com/\',
\'user-agent\': \'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36\'
}
def mkdir_directory(self):
if not os.path.exists(self.save_ts_path):
os.mkdir(self.save_ts_path)
if not os.path.exists(self.save_movie_path):
os.mkdir(self.save_movie_path)
def get_ts(self, number, flag=False):
play_url = \'https://sina.com-h-sina.com/20180815/9998_f9aa34bf/1000k/hls/c0cdc4673f4%03d.ts\' % number
ts_number = play_url.split(\'/\')[-1] # 获取ts编号
if ts_number not in os.listdir(self.save_ts_path): # 判断该ts是否已经下载
try:
session = requests.session()
response = session.get(play_url, headers=self.headers, timeout=60) # 进行数据请求
if response.status_code == 200:
with open(os.path.join(self.save_ts_path, ts_number), \'wb\') as f: # 读取文件
f.write(response.content) # 写入数据
f.close()
if flag: # 判断失败的ts再一次下载是否成功
self.fail_ts_list.remove(number) # 如果成功从失败列表移除
except Exception as e:
# 判断失败的ts文件序号是否已经存在在失败的列表下
if number not in self.fail_ts_list:
# 不存在添加到ts列表中
self.fail_ts_list.append(number)
def check_ts(self):
print("开始检查:")
print(self.fail_ts_list)
while self.fail_ts_list: # 通过判断列表是否有值进行数据循环
for number in self.fail_ts_list: # 获取单个的ts文件序号
self.get_ts(number, True) # 数据下载
print("%s:下载完毕" % number)
print(self.fail_ts_list)
print("ts 文件下载完成!")
self.get_video() # 下载成功之后将数据转换成mp4文件
def get_video(self):
ts_list = os.listdir(self.save_ts_path) # 获取所有的ts文件
ts_list.sort() # 将ts文件进行排序
ts_lists = [ts for ts in ts_list]
for ts in ts_lists:
with open(os.path.join(self.save_ts_path, ts), \'rb\') as f1:
with open(os.path.join(self.save_movie_path, \'明日的我与昨日你的约会.mp4\'), \'ab\') as f2:
f2.write(f1.read())
print("%s:写入完成" % ts)
if __name__ == \'__main__\':
min_number = int(input(\'请输入ts的起始数字>>:\').strip())
max_number = int(input(\'请输入ts的结尾数字>>:\').strip()) + 1
save_ts_path = input(\'请输入ts保存文件路径>>:\').strip()
save_movie_path = input(\'请输入视频保存文件路径>>:\').strip()
spider = SpiderMovieFromChenYu(save_ts_path, save_movie_path)
spider.mkdir_directory()
pool = ThreadPool(100)
pool.map(spider.get_ts, range(min_number, max_number))
pool.close()
pool.join()
spider.get_ts()
相关文章:
- python爬取视频网站m3u8视频,下载.ts后缀文件,合并成整视频 2021-12-29
- FFmpeg合并多个ts视频文件 2021-07-18
- Java 合并多个MP4视频文件 2022-12-23
- 将ts文件合并为mp4命令 2021-09-05
- ffmpeg合并mp4(视频)和m4a(音频)文件 2021-11-05
- 如何下载网络直播视频,并将下载的.ts文件合并转化成.MP4文件 2021-12-13