demo
'''
早报
早报地址:https://www.163.com/dy/media/T1603594732083.html
'''
import requests
from lxml import etree
def main():
url="https://www.163.com/dy/media/T1603594732083.html"
rsp=requests.get(url)
html=etree.HTML(rsp.text)
today_url=html.xpath("//h2[@class='media_article_title']/a/@href")[0]
rsp=requests.get(today_url)
html=etree.HTML(rsp.text)
news_list=html.xpath("//div[@class='post_body']/p[2]//text()")
news_list=news_list[1:]
for news in news_list:
print(news)
if __name__ == "__main__":
main()
相关文章: