import requests as r
import re,encodings
import time
from lxml import etree
def pa( url,name):
headers = {
\'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36\'
}
z = r.get(url, headers = headers)
z.encoding = \'UTF-8\'
html = etree.HTML(z.text)
# 查找章节名字
zhangjie = html.xpath(\'//*[@id="wrapper"]/div[3]/div/div[2]/h1/text()\')[0]
print(zhangjie)
# xpath 查找小说内容
content = html.xpath(\'//*[@id="content"]/text()\')
content = \'\n\'.join(content)
with open(name, \'a+\', encoding="UTF-8") as txt:
txt.write(zhangjie + "\n")
txt.write(content)
print(zhangjie + ":\t写入成功")
if __name__ == \'__main__\':
mulu_url = \'http://www.yuetutu.com/cbook_22694/\'
\'User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36\'
s = r.get(mulu_url)
s.encoding = \'utf-8\'
html = etree.HTML(s.text)
text = s.text
name = (re.search(\'<h1>(.*?)</h1>\',text)).group()
name = (name.replace("<h1>",\'\')).replace(\'</h1>\',\'\')
name = "./%s.txt"%name
mulu = html.xpath(\'//*[@id="list"]/dl/dd/a/@href\')
print(name)
print(mulu)
b = 1;
for i in mulu:
if b > 8 :
pa(\'http://www.yuetutu.com\'+i, name)
b= 1+b
time.sleep(1)