import os
import time
import requests
from selenium import webdriver
from concurrent.futures import ThreadPoolExecutor
from lxml import etree
path='C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe'
head={
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36"
}
novel_name=input("输入想要下载的小说名字")
bro=webdriver.Chrome(executable_path=path)
bro.get('http://www.xbiquge.la/')
search_off=bro.find_element_by_id('wd')
search_off.send_keys(novel_name)
click_search=bro.find_element_by_id('sss')
click_search.click()
time.sleep(5)
page_text=bro.page_source
bro.quit()
tree=etree.HTML(page_text)
novel_name=tree.xpath('//*[@>#小说名
url=tree.xpath('//*[@>]
response=requests.get(url=url,headers=head)
response.raise_for_status()
response.encoding=response.apparent_encoding
tree=response.text
tree=etree.HTML(tree)
actor=tree.xpath('//*[@>#作者名
if not os.path.exists('./小说'):
os.mkdir('./小说')
if not os.path.exists('./小说/'+str(novel_name)):
os.mkdir('./小说/'+str(novel_name))
urls=[]
zhangjies=tree.xpath('//*[@>#获得存放章节信息的列表
for i in zhangjies:
zhangjie_name=i.xpath('./a/text()')[0]+'.txt'
zhangjie_url='http://www.xbiquge.la'+i.xpath('./a/@href')[0]
dic={
'name':zhangjie_name,
'url' :zhangjie_url
}
urls.append(dic)
def data_get_write(dic):
url=dic['url']
for i in range(20):
try:
data_get=requests.get(url=url,headers=head)
data_get.raise_for_status()
data_get.encoding=data_get.apparent_encoding
data_tree=data_get.text
break
except:
print("请求出错,从新请求")
data_tree=etree.HTML(data_tree)
data_writr=data_tree.xpath('//*[@>)
with open('./小说/'+str(novel_name)+'/'+dic['name'],'w',encoding='utf-8') as fp:
for name in data_writr:
fp.write(name)
print(dic['name'],'下载成功!!!')
for dic in urls:
data_get_write(dic)
time.sleep(1)
到此,算是完成了,中间也有很多问题,比如这个网页太容易崩了,一不小心就503,也可能是因为没有反爬虫的机制,服务器顶不住了,在此要感谢郭大佬的帮助,用多次循环try来解决了这个问题

也能很好的保存下来。
感觉还是很需要多练习,熟能生巧,出现问题也就能很快解决,当然算法也很重要的,能够培养思维,会练习的(