================================
工具准备:
================================
下载与 chome 浏览器版本一致的 chromedriver, chromedriver 国内下载镜像
https://npm.taobao.org/mirrors/chromedriver
将 chromedriver.exe 复制到 python 的scripts目录中, 比如 C:\Anaconda3\Scripts\
并将C:\Anaconda3\Scripts\加到Windows 环境变量PATH 中.
================================
安装 selenium python 包
================================
pip install selenium
================================
selenium 的更多信息
================================
selenium 不仅支持Python, 还支持Java/C#
https://www.selenium.dev/documentation/zh-cn/webdriver/driver_requirements/
https://www.selenium.dev/documentation/zh-cn/selenium_installation/installing_webdriver_binaries/
本文共有好多个下载脚本, 是一个不断完善的过程, 所以, 最后一个下载脚本是最通用, 最完美的.
================================
根据章节序号推算单章url地址, 然后下载
================================
from selenium import webdriver web = webdriver.Chrome() full_text="小说:穿越种田之将门妻" full_text=full_text+"\n" +"\n" +"\n" home_url="https://www.jingcaiyuedu6.com/novel/CW8MY3/" #web.get('https://www.jingcaiyuedu6.com/novel/CW8MY3/1.html') chapter_start=1 chapter_end=39 #39 start_page_id=0 for i in range(chapter_start,chapter_end+1): page_id=i+start_page_id url=home_url+str(page_id)+".html" #print("第"+str(i)+"章") full_text=full_text+"\n" +"\n" +"\n" +"======================"+"\n"+"第"+str(i)+"章"+ "\n" web.get(url) #<div > content_tag = web.find_element_by_id("content") #content_tag = web.find_element_by_class_name("panel panel-default panel-readcontent") content = content_tag.text full_text=full_text+content print(full_text) web.close()
================================
从列表也提取单章url, 然后下载单章文本
================================
#======================================== # 方法1: 数字转中文, 有缺陷,比如: 10将转成一零 #======================================== def num_to_char(num): """数字转中文""" num=str(num) new_str="" num_dict={"0":u"零","1":u"一","2":u"二","3":u"三","4":u"四","5":u"五","6":u"六","7":u"七","8":u"八","9":u"九"} listnum=list(num) # print(listnum) shu=[] for i in listnum: # print(num_dict[i]) shu.append(num_dict[i]) new_str="".join(shu) # print(new_str) return new_str #======================================== # 方法2: 数字转中文, 比较完美 #======================================== # ------------------------------------------------------------------------------- # Name: num2chinese # Author: yunhgu # Date: 2021/8/24 14:51 # Description: # ------------------------------------------------------------------------------- _MAPPING = (u'零', u'一', u'二', u'三', u'四', u'五', u'六', u'七', u'八', u'九',) _P0 = (u'', u'十', u'百', u'千',) _S4, _S8, _S16 = 10 ** 4, 10 ** 8, 10 ** 16 _MIN, _MAX = 0, 9999999999999999 class NotIntegerError(Exception): pass class OutOfRangeError(Exception): pass class Num2Chinese: def convert(self, number: int): """ :param number: :return:chinese number """ return self._to_chinese(number) def _to_chinese(self, num): if not str(num).isdigit(): raise NotIntegerError(u'%s is not a integer.' % num) if num < _MIN or num > _MAX: raise OutOfRangeError(u'%d out of range[%d, %d)' % (num, _MIN, _MAX)) if num < _S4: return self._to_chinese4(num) elif num < _S8: return self._to_chinese8(num) else: return self._to_chinese16(num) @staticmethod def _to_chinese4(num): assert (0 <= num < _S4) if num < 10: return _MAPPING[num] else: lst = [] while num >= 10: lst.append(num % 10) num = num // 10 lst.append(num) c = len(lst) # 位数 result = u'' for idx, val in enumerate(lst): if val != 0: result += _P0[idx] + _MAPPING[val] if idx < c - 1 and lst[idx + 1] == 0: result += u'零' return result[::-1].replace(u'一十', u'十') def _to_chinese8(self, num): assert (num < _S8) to4 = self._to_chinese4 if num < _S4: return to4(num) else: mod = _S4 high, low = num // mod, num % mod if low == 0: return to4(high) + u'万' else: if low < _S4 // 10: return to4(high) + u'万零' + to4(low) else: return to4(high) + u'万' + to4(low) def _to_chinese16(self, num): assert (num < _S16) to8 = self._to_chinese8 mod = _S8 high, low = num // mod, num % mod if low == 0: return to8(high) + u'亿' else: if low < _S8 // 10: return to8(high) + u'亿零' + to8(low) else: return to8(high) + u'亿' + to8(low) #======================================== # 从列表页提取单章url, 然后下载单章文本 #======================================== from selenium import webdriver web = webdriver.Chrome() num2chinese = Num2Chinese() full_text="小说:掌家小娘子" full_text=full_text+"\n" +"\n" +"\n" print(full_text) list_url="https://www.baihexs.com/0/54/" chapter_start=1 chapter_end=306 #306 for i in range(chapter_start,chapter_end+1): chinese_chapter_id=num2chinese.convert(i) #中文数字 #chinese_chapter_id=str(i) #阿拉伯数字 chinese_chapter_name="第"+chinese_chapter_id+"章" if chinese_chapter_name.find("百十"): chinese_chapter_name=chinese_chapter_name.replace("百十", "百一十") #print(chinese_chapter_name) web.get(list_url) #跳转会列表页, 以便抓取单页的url地址 url="" try: url=web.find_element_by_partial_link_text(chinese_chapter_name).get_attribute("href") except: url="" #print(url) if url: web.get(url) #<dd > #//*[@] #content_tag = web.find_elements_by_css_selector("dd")[2] #content_tag = web.find_element_by_id("contents") #content_tag = web.find_element_by_class_name("container body-content") content_tag = web.find_element_by_xpath('''//*[@>) content = content_tag.text else: content="不提供下载" chapter_text = "\n" + "\n" + "\n" + "======================" + "\n" + "第" + str(i) + "章" + "\n" chapter_text=chapter_text+content print(chapter_text) full_text=full_text+chapter_text #print(full_text) web.close()
================================
每章支持多个分页
作了性能优化
自动输出到文件
增加番外篇下载
增加列表页面点击"显示全部页面"功能
代码逻辑优化
================================
#======================================== # 方法1: 数字转中文, 有缺陷,比如: 10将转成一零 #======================================== def num_to_char(num): """数字转中文""" num=str(num) new_str="" num_dict={"0":u"零","1":u"一","2":u"二","3":u"三","4":u"四","5":u"五","6":u"六","7":u"七","8":u"八","9":u"九"} listnum=list(num) # print(listnum) shu=[] for i in listnum: # print(num_dict[i]) shu.append(num_dict[i]) new_str="".join(shu) # print(new_str) return new_str #======================================== # 方法2: 数字转中文, 比较完美 #======================================== # ------------------------------------------------------------------------------- # Name: num2chinese # Author: yunhgu # Date: 2021/8/24 14:51 # Description: # ------------------------------------------------------------------------------- _MAPPING = (u'零', u'一', u'二', u'三', u'四', u'五', u'六', u'七', u'八', u'九',) _P0 = (u'', u'十', u'百', u'千',) _S4, _S8, _S16 = 10 ** 4, 10 ** 8, 10 ** 16 _MIN, _MAX = 0, 9999999999999999 class NotIntegerError(Exception): pass class OutOfRangeError(Exception): pass class Num2Chinese: def convert(self, number: int): """ :param number: :return:chinese number """ return self._to_chinese(number) def _to_chinese(self, num): if not str(num).isdigit(): raise NotIntegerError(u'%s is not a integer.' % num) if num < _MIN or num > _MAX: raise OutOfRangeError(u'%d out of range[%d, %d)' % (num, _MIN, _MAX)) if num < _S4: return self._to_chinese4(num) elif num < _S8: return self._to_chinese8(num) else: return self._to_chinese16(num) @staticmethod def _to_chinese4(num): assert (0 <= num < _S4) if num < 10: return _MAPPING[num] else: lst = [] while num >= 10: lst.append(num % 10) num = num // 10 lst.append(num) c = len(lst) # 位数 result = u'' for idx, val in enumerate(lst): if val != 0: result += _P0[idx] + _MAPPING[val] if idx < c - 1 and lst[idx + 1] == 0: result += u'零' return result[::-1].replace(u'一十', u'十') def _to_chinese8(self, num): assert (num < _S8) to4 = self._to_chinese4 if num < _S4: return to4(num) else: mod = _S4 high, low = num // mod, num % mod if low == 0: return to4(high) + u'万' else: if low < _S4 // 10: return to4(high) + u'万零' + to4(low) else: return to4(high) + u'万' + to4(low) def _to_chinese16(self, num): assert (num < _S16) to8 = self._to_chinese8 mod = _S8 high, low = num // mod, num % mod if low == 0: return to8(high) + u'亿' else: if low < _S8 // 10: return to8(high) + u'亿零' + to8(low) else: return to8(high) + u'亿' + to8(low) def get_sub_page_url(chapter_url, sub_page_count, first_sub_page_url_index, sub_page_id): """ get sub_page url :param chapter_url: chapter url :param sub_page_count: total sub_page count of every chapter :param first_sub_page_url_index: first sub_page index with prefix _ :param sub_page_id: :return: """ if sub_page_count==0: return chapter_url else: if sub_page_id<first_sub_page_url_index: return chapter_url else: #https://www.mht99.com/98886/82000964.html # https://www.mht99.com/98886/82000964_1.html return chapter_url.replace(".html","_"+str(sub_page_id)+".html") def output(text,file_name): """ output to console and file :param text: :param file_name: :return: """ print(text) with open(file_name, 'a+', encoding='utf-8') as f: f.write(text+"\n") def download_chapter(chapter_url, file_name, chapter_webdriver): if not chapter_url: chapter_content = "不提供下载" else: chapter_content = "" # download 每一章的分页内容 for j in range(sub_page_count): sub_page_id = j + first_sub_page_url_index - 1 sub_page_url = get_sub_page_url(chapter_url, sub_page_count, first_sub_page_url_index, sub_page_id) # print("####第"+ str(sub_page_id)+":" +sub_page_url) try: try: chapter_webdriver.get(sub_page_url) except (WebDriverException, TimeoutException): time.sleep(60) # sleep 60 seconds, and then try get url again chapter_webdriver.get(sub_page_url) try: # content_tag = web2.find_elements_by_css_selector("dd")[2] # content_tag = web2.find_element_by_id("contents") # content_tag = web2.find_element_by_class_name("container body-content") content_tag = chapter_webdriver.find_element_by_xpath(content_tag_xpath) chapter_content = chapter_content + "\n" chapter_content = chapter_content + content_tag.text except NoSuchElementException: output("####第" + str(sub_page_id) + "页:" + "下载失败", file_name) except: output("####第" + str(sub_page_id) + "页:" + "下载失败", file_name) return chapter_content # ======================================== # 从列表页提取单章url, 然后下载单章文本 # ======================================== import time from datetime import datetime from selenium import webdriver from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException # config file_path = r"D:\\" story_name="侯府小财迷_唐初八" list_url = "https://www.x52dus.com/xuanhuan/121760/" chinese_chapter_id_flag = False #中文数字还是阿拉伯数字 sub_page_count = 2 # 每章的页数 first_sub_page_url_index = 1 # 第一个子页url中的下标, https://www.mht99.com/98886/82000964_1.html chapter_start = 1 chapter_end = 666 # 666 content_tag_xpath = '''//*[@> need_click_show_whole_list=False show_whole_list_link_ele_id="""yc""" #因为是通过章节号部分匹配方式获取url地址, 对于 001 和 1001 两个章节, 将会获取到两个地址, 用这个变量控制, 从哪个章节之后将使用第二个url choose_2nd_url_from_chapter_id=1000 #设置番外章节 appendix_chapter_urls=[ ] # init num2chinese = Num2Chinese() file_name=file_path+story_name+".txt" full_text = story_name output(full_text,file_name) start_time=datetime.now() output("下载开始时刻:"+ start_time.strftime("%c"),file_name) web = webdriver.Chrome() web2 = webdriver.Chrome() web2.implicitly_wait(60) # 设置智能等待 60 seconds, 参考 https://www.cnblogs.com/mengyu/p/6972968.html # 首先跳转到列表页, 以便抓取单页的url地址, 以便后面用来获取单章的url web.get(list_url) #点击 "显示全部" 显示完整的列表页 if need_click_show_whole_list: click_tag=web.find_element_by_id(show_whole_list_link_ele_id) web.execute_script("$(arguments[0]).click()", click_tag) for i in range(chapter_start, chapter_end + 1): #001、小鱼小蟹 # get chapter name if chinese_chapter_id_flag: chinese_chapter_id=num2chinese.convert(i) #中文数字 else: chinese_chapter_id = str(i) # 阿拉伯数字 # if len(chinese_chapter_id)<2: #补前缀 00 # chinese_chapter_+chinese_chapter_id # elif len(chinese_chapter_id)<3: #补前缀 0 # chinese_chapter_+chinese_chapter_id chinese_chapter_name = "第" + chinese_chapter_id + "章" #chinese_chapter_name=chinese_chapter_id+"" if chinese_chapter_name.find("百十"): chinese_chapter_name = chinese_chapter_name.replace("百十", "百一十") # print(chinese_chapter_name) # 从列表也中获取每章的 url chapter_url = "" try: chapter_url_tags = web.find_elements_by_partial_link_text(chinese_chapter_name) chapter_urls=[] for url_tag in chapter_url_tags: chapter_urls.append(url_tag.get_attribute("href")) #print(url_tag.get_attribute("href")) if len(chapter_urls)==0: chapter_url = "" elif i < choose_2nd_url_from_chapter_id or len(chapter_urls)==1: chapter_url=chapter_urls[0] else: chapter_url=chapter_urls[1] except Exception as e: print(e) chapter_url = "" # download 每章内容 chapter_content = download_chapter(chapter_url, file_name, web2) # output chapter content chapter_title_text = "\n" + "\n" + "\n" + "======================" + "\n" + chinese_chapter_name + "\n" chapter_full_text = chapter_title_text + chapter_content output(chapter_full_text, file_name) full_text = full_text + chapter_full_text #download 番外章节 for i in range(1, len(appendix_chapter_urls) + 1): # get chapter name chinese_chapter_name = "番外:" +str(i) print(chinese_chapter_name) chapter_url=appendix_chapter_urls[i-1] # download 每章内容 chapter_content = download_chapter(chapter_url, file_name, web2) # output chapter content chapter_title_text = "\n" + "\n" + "\n" + "======================" + "\n" + chinese_chapter_name + "\n" chapter_full_text = chapter_title_text + chapter_content output(chapter_full_text, file_name) full_text = full_text + chapter_full_text # print(full_text) output("\n" + "\n" + "\n" + "======================" + "\n", file_name) end_time=datetime.now() output("下载结束时刻:"+ end_time.strftime("%c"), file_name) total_seconds= (end_time-start_time).total_seconds() output("下载耗时:"+ str(total_seconds) +"秒", file_name) web.close() web2.close()
================================
根据正文内容 xpath 不固定
================================
#======================================== # 方法1: 数字转中文, 有缺陷,比如: 10将转成一零 #======================================== def num_to_char(num): """数字转中文""" num=str(num) new_str="" num_dict={"0":u"零","1":u"一","2":u"二","3":u"三","4":u"四","5":u"五","6":u"六","7":u"七","8":u"八","9":u"九"} listnum=list(num) # print(listnum) shu=[] for i in listnum: # print(num_dict[i]) shu.append(num_dict[i]) new_str="".join(shu) # print(new_str) return new_str #======================================== # 方法2: 数字转中文, 比较完美 #======================================== # ------------------------------------------------------------------------------- # Name: num2chinese # Author: yunhgu # Date: 2021/8/24 14:51 # Description: # ------------------------------------------------------------------------------- _MAPPING = (u'零', u'一', u'二', u'三', u'四', u'五', u'六', u'七', u'八', u'九',) _P0 = (u'', u'十', u'百', u'千',) _S4, _S8, _S16 = 10 ** 4, 10 ** 8, 10 ** 16 _MIN, _MAX = 0, 9999999999999999 class NotIntegerError(Exception): pass class OutOfRangeError(Exception): pass class Num2Chinese: def convert(self, number: int): """ :param number: :return:chinese number """ return self._to_chinese(number) def _to_chinese(self, num): if not str(num).isdigit(): raise NotIntegerError(u'%s is not a integer.' % num) if num < _MIN or num > _MAX: raise OutOfRangeError(u'%d out of range[%d, %d)' % (num, _MIN, _MAX)) if num < _S4: return self._to_chinese4(num) elif num < _S8: return self._to_chinese8(num) else: return self._to_chinese16(num) @staticmethod def _to_chinese4(num): assert (0 <= num < _S4) if num < 10: return _MAPPING[num] else: lst = [] while num >= 10: lst.append(num % 10) num = num // 10 lst.append(num) c = len(lst) # 位数 result = u'' for idx, val in enumerate(lst): if val != 0: result += _P0[idx] + _MAPPING[val] if idx < c - 1 and lst[idx + 1] == 0: result += u'零' return result[::-1].replace(u'一十', u'十') def _to_chinese8(self, num): assert (num < _S8) to4 = self._to_chinese4 if num < _S4: return to4(num) else: mod = _S4 high, low = num // mod, num % mod if low == 0: return to4(high) + u'万' else: if low < _S4 // 10: return to4(high) + u'万零' + to4(low) else: return to4(high) + u'万' + to4(low) def _to_chinese16(self, num): assert (num < _S16) to8 = self._to_chinese8 mod = _S8 high, low = num // mod, num % mod if low == 0: return to8(high) + u'亿' else: if low < _S8 // 10: return to8(high) + u'亿零' + to8(low) else: return to8(high) + u'亿' + to8(low) def get_sub_page_url(chapter_url, sub_page_count, first_sub_page_url_index, sub_page_id): """ get sub_page url :param chapter_url: chapter url :param sub_page_count: total sub_page count of every chapter :param first_sub_page_url_index: first sub_page index with prefix _ :param sub_page_id: :return: """ if sub_page_count==0: return chapter_url else: if sub_page_id<first_sub_page_url_index: return chapter_url else: #https://www.mht99.com/98886/82000964.html # https://www.mht99.com/98886/82000964_1.html return chapter_url.replace(".html","_"+str(sub_page_id)+".html") def output(text,file_name): """ output to console and file :param text: :param file_name: :return: """ print(text) with open(file_name, 'a+', encoding='utf-8') as f: f.write(text+"\n") def download_chapter(chapter_url, file_name, chapter_webdriver, chinese_chapter_id=""): #url: https://www.kubiji.org/255565/4076023.html #content_tag_xpath = '''//*[@]''' page_id=chapter_url.split("/")[-1] page_id=page_id.split(".")[0] content_tag_xpath = '''//*[@>''' if not chapter_url: chapter_content = "不提供下载" else: chapter_content = "" # download 每一章的分页内容 for j in range(sub_page_count): sub_page_id = j + first_sub_page_url_index - 1 sub_page_url = get_sub_page_url(chapter_url, sub_page_count, first_sub_page_url_index, sub_page_id) # print("####第"+ str(sub_page_id)+":" +sub_page_url) try: try: chapter_webdriver.get(sub_page_url) except (WebDriverException, TimeoutException): time.sleep(60) # sleep 60 seconds, and then try get url again chapter_webdriver.get(sub_page_url) try: # content_tag = web2.find_elements_by_css_selector("dd")[2] # content_tag = web2.find_element_by_id("contents") # content_tag = web2.find_element_by_class_name("container body-content") content_tag = chapter_webdriver.find_element_by_xpath(content_tag_xpath) chapter_content = chapter_content + "\n" chapter_content = chapter_content + content_tag.text except NoSuchElementException: output("####第" + str(sub_page_id) + "页:" + "下载失败", file_name) except: output("####第" + str(sub_page_id) + "页:" + "下载失败", file_name) return chapter_content # ======================================== # 从列表页提取单章url, 然后下载单章文本 # ======================================== import time from datetime import datetime from selenium import webdriver from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException # config file_path = r"D:\\" story_name="穿越之农家有女" list_url = "https://www.kubiji.org/165779/" chinese_chapter_id_flag = True #中文数字还是阿拉伯数字 sub_page_count = 1 # 每章的页数 first_sub_page_url_index = 1 # 第一个子页url中的下标, https://www.mht99.com/98886/82000964_1.html chapter_start = 1 chapter_end = 507# 507 content_tag_xpath = '''//*[@> need_click_show_whole_list=False show_whole_list_link_ele_id="""yc""" #因为是通过章节号部分匹配方式获取url地址, 对于 001 和 1001 两个章节, 将会获取到两个地址, 用这个变量控制, 从哪个章节之后将使用第二个url choose_2nd_url_from_chapter_id=1000 #设置番外章节 appendix_chapter_urls=[ """https://www.kubiji.org/165779/4564577.html""" ,"""https://www.kubiji.org/165779/4565113.html""" ,"""https://www.kubiji.org/165779/4565541.html""" ,"""https://www.kubiji.org/165779/4566038.html""" ,"""https://www.kubiji.org/165779/4566433.html""" ,"""https://www.kubiji.org/165779/4566834.html""" ,"""https://www.kubiji.org/165779/4567957.html""" ,"""https://www.kubiji.org/165779/4568025.html""" ,"""https://www.kubiji.org/165779/4568840.html""" ,"""https://www.kubiji.org/165779/4569742.html""" ,"""https://www.kubiji.org/165779/4570705.html""" ,"""https://www.kubiji.org/165779/4571203.html""" ,"""https://www.kubiji.org/165779/4571557.html""" ,"""https://www.kubiji.org/165779/4571990.html""" ,"""https://www.kubiji.org/165779/4572324.html""" ,"""https://www.kubiji.org/165779/4572716.html""" ,"""https://www.kubiji.org/165779/4573193.html""" ,"""https://www.kubiji.org/165779/4574522.html""" ,"""https://www.kubiji.org/165779/4575255.html""" ,"""https://www.kubiji.org/165779/4622983.html""" ] # for i in range(0, 971): # #https://m.xinqingdou.net/84964/373110.html # appendix_chapter_urls.append("https://m.xinqingdou.net/84964/"+str(373110+i)+".html") # #appendix_chapter_urls.append("https://m.quyasw.com/yuedu/527u/" + str(i) + ".html?page=2") # init num2chinese = Num2Chinese() file_name=file_path+story_name+".txt" full_text = story_name output(full_text,file_name) start_time=datetime.now() output("下载开始时刻:"+ start_time.strftime("%c"),file_name) web = webdriver.Chrome() web2 = webdriver.Chrome() web2.implicitly_wait(60) # 设置智能等待 60 seconds, 参考 https://www.cnblogs.com/mengyu/p/6972968.html # 首先跳转到列表页, 以便抓取单页的url地址, 以便后面用来获取单章的url web.get(list_url) #点击 "显示全部" 显示完整的列表页 if need_click_show_whole_list: click_tag=web.find_element_by_id(show_whole_list_link_ele_id) web.execute_script("$(arguments[0]).click()", click_tag) for i in range(chapter_start, chapter_end + 1): #001、小鱼小蟹 # get chapter name if chinese_chapter_id_flag: chinese_chapter_id=num2chinese.convert(i) #中文数字 else: chinese_chapter_id = str(i) # 阿拉伯数字 # if len(chinese_chapter_id)<2: #补前缀 00 # chinese_chapter_+chinese_chapter_id # elif len(chinese_chapter_id)<3: #补前缀 0 # chinese_chapter_+chinese_chapter_id chinese_chapter_name = "第" + chinese_chapter_id + "章" #chinese_chapter_name=chinese_chapter_id+"" if chinese_chapter_name.find("百十"): chinese_chapter_name = chinese_chapter_name.replace("百十", "百一十") # print(chinese_chapter_name) # 从列表也中获取每章的 url chapter_url = "" try: chapter_url_tags = web.find_elements_by_partial_link_text(chinese_chapter_name) chapter_urls=[] for url_tag in chapter_url_tags: chapter_urls.append(url_tag.get_attribute("href")) #print(url_tag.get_attribute("href")) if len(chapter_urls)==0: chapter_url = "" elif i < choose_2nd_url_from_chapter_id or len(chapter_urls)==1: chapter_url=chapter_urls[0] else: chapter_url=chapter_urls[1] except Exception as e: print(e) chapter_url = "" # download 每章内容 chapter_content = download_chapter(chapter_url, file_name, web2, chinese_chapter_id) # output chapter content chapter_title_text = "\n" + "\n" + "\n" + "======================" + "\n" + chinese_chapter_name + "\n" chapter_full_text = chapter_title_text + chapter_content output(chapter_full_text, file_name) full_text = full_text + chapter_full_text #download 番外章节 for i in range(1, len(appendix_chapter_urls) + 1): # get chapter name chinese_chapter_name = "番外:" +str(i) print(chinese_chapter_name) chapter_url=appendix_chapter_urls[i-1] # download 每章内容 chapter_content = download_chapter(chapter_url, file_name, web2) # output chapter content chapter_title_text = "\n" + "\n" + "\n" + "======================" + "\n" + chinese_chapter_name + "\n" chapter_full_text = chapter_title_text + chapter_content output(chapter_full_text, file_name) full_text = full_text + chapter_full_text # print(full_text) output("\n" + "\n" + "\n" + "======================" + "\n", file_name) end_time=datetime.now() output("下载结束时刻:"+ end_time.strftime("%c"), file_name) total_seconds= (end_time-start_time).total_seconds() output("下载耗时:"+ str(total_seconds) +"秒", file_name) web.close() web2.close()