1 #coding:utf-8 2 import requests,os,re 3 from bs4 import BeautifulSoup 4 from selenium import webdriver 5 from selenium.webdriver.chrome.options import Options 6 from selenium.webdriver.common.keys import Keys 7 8 class downloader(): 9 10 def __init__(self): 11 self.urls = [] # 保存章节链接 12 self.name = [] # 保存章节名 13 self.url = \'https://so.biqusoso.com/s.php?ie=utf-8&siteid=biqugex.com&q=\' 14 15 """输入小说名,搜索""" 16 def Get_url(self): 17 #创建chrome参数对象,设置chrome浏览器无界面模式 18 chrome_options = Options() 19 chrome_options.add_argument(\'--headless\') 20 # 创建chrome无界面对象 21 browser = webdriver.Chrome(options=chrome_options) 22 browser.get(self.url) 23 c = input(\'请输入小说全名:\') 24 browser.find_element_by_xpath(\'//*[@id="wrapper"]/div[1]/div[2]/form/input[3]\').send_keys(c) 25 browser.find_element_by_xpath(\'//*[@id="wrapper"]/div[1]/div[2]/form/input[4]\').click() 26 new_url = browser.current_url 27 # 关闭浏览器 28 browser.close() 29 # 关闭chromedriver进程 30 browser.quit() 31 print("已关闭浏览器") 32 # print(new_url) 33 response = requests.get(new_url) 34 response.encoding = \'utf-8\' 35 soup = BeautifulSoup(response.text, \'lxml\') 36 # print(soup) 37 name1 = soup.find_all(\'span\', class_=\'s2\') 38 soup = BeautifulSoup(str(name1), \'lxml\') 39 new_name = soup.find(\'a\') 40 new_name1 = new_name.string 41 # print(new_name1) 42 self.href = new_name.attrs[\'href\'] 43 print(self.href) 44 return self.href 45 def Response(self): 46 response = requests.get(self.href) 47 response.encoding = \'gbk\' # 解决乱码 48 self.soup = BeautifulSoup(response.text, \'lxml\') # 解析网页 49 div = self.soup.find_all(\'div\', class_=\'listmain\') # 在解析结果中查找class_=\'listmain\' 50 soup1 = BeautifulSoup(str(div), \'lxml\') # 删除字符串头和尾的空格 51 h = soup1.find_all(\'a\') # 在class_=\'listmain下面找到a标签 52 for i in h: 53 self.name.append(i.string) # 将a标签中的非属性字符,即章节名添加到name 54 self.urls.append(\'https://www.biqugex.com%s\' % i.get(\'href\')) # 将a标签中的链接,添加到urls 55 56 def file(self): 57 """查找小说名字,并创建同名文件夹""" 58 div1 = self.soup.select(\'body > div.book > div.info > h2\') 59 a = BeautifulSoup(str(div1), \'lxml\') 60 b = a.find(\'h2\') 61 b = b.string 62 c = \'C:\\Users\\Administrator\\Desktop\\%s\' % b 63 if not os.path.exists(c): 64 os.mkdir(c) 65 66 # 循环解析urls,得到小说正文 67 i = 0 68 while i < len(self.urls): 69 response1 = requests.get(url=self.urls[i]) 70 response1.encoding = \'gbk\' 71 soup2 = BeautifulSoup(response1.text, \'lxml\') 72 d = soup2.find_all(\'div\', id=\'content\') 73 id1 = BeautifulSoup(str(d), \'lxml\') 74 # 创建文件名 75 src = self.name[i] + \'.txt\' 76 filename = c + \'/\' + src 77 print(filename) 78 79 # 将解析到的小说正文写到文件中 80 for result in id1: 81 res = result.text 82 id2 = soup2.select(\'#content\') 83 with open(filename, \'w+\', encoding=\'utf-8\') as f: 84 f.write(res) 85 i += 1 86 #如果输入的网址不是正确的网址,则提示请输入正确的笔趣阁网址 87 def Main(self): 88 try: 89 d = downloader() 90 d.Get_url() 91 except: 92 print(\'没有找到\') 93 else: 94 d.Response() 95 d.file() 96 97 98 99 if __name__ == \'__main__\': 100 # url=input(\'请输入网址:\') 101 # url=\'https://www.biqugex.com/book_104027/\' 102 a = downloader() 103 a.Main()