什么是BeautifulSoup?
BeautifulSoup支持的一些解析库
基本使用
from bs4 import BeautifulSoup html =""" <html><head><title> The Dormouse's story</title></head> <body> <p class="title" name="dromouse"> <b> The Dormouse's story</b></p> <p class="story">Once upon a time there were three little sisters;and their names were <a href="http://example.com/elsie" class="sister" > <!--Elsie--></a>, <a href="http://example.com/lacie" class="sister" >Lacie</a>and <a href="http://example.com/tillie" class="sister" >Tillie</a>; and they lived at the bottom of a well.</p> <p class="story">..</p> """ soup=BeautifulSoup(html,"lxml") print(soup.prettify()) # .prettify() 格式化代码 print(soup.title.string) # .title.string
<html> <head> <title> The Dormouse's story </title> </head> <body> <p class="title" name="dromouse"> <b> The Dormouse's story </b> </p> <p class="story"> Once upon a time there were three little sisters;and their names were <a class="sister" href="http://example.com/elsie" id="link1"> <!--Elsie--> </a> , <a class="sister" href="http://example.com/lacie" id="link2"> Lacie </a> and <a class="sister" href="http://example.com/tillie" id="link3"> Tillie </a> ; and they lived at the bottom of a well. </p> <p class="story"> .. </p> </body> </html> The Dormouse's story