from BeautifulSoup import BeautifulSoup
import re

doc = ['<html><head><title>Page title</title></head>',
       '<body><p >This is paragraph <b>one</b>.',
       '<p >This is paragraph <b>two</b>.',
       '</html>']
soup = BeautifulSoup(''.join(doc))
print soup.prettify()

 运行结果为:BeautifulSoup学习笔记

 

print soup.contents[0].name
#
print soup.contents[0].contents[0].name

for i in range(len(soup.contents[0])):
    print soup.contents[0].contents[i].name

 BeautifulSoup学习笔记

titleTag = soup.html.head.title
titleTag
# <title>Page title</title>

titleTag.string
# u'Page title'

len(soup('p'))
# 2

soup.findAll('p', align="center")
# [<p >This is paragraph <b>one</b>. </p>]

soup.find('p', align="center")
# <p >This is paragraph <b>one</b>. </p>

soup('p', align="center")[0]['id']
# u'firstpara'

soup.find('p', align=re.compile('^b.*'))['id']
# u'secondpara'

soup.find('p').b.string
# u'one'

soup('p')[1].b.string
# u'two'

 

相关文章:

  • 2021-10-11
  • 2022-01-06
  • 2021-08-29
  • 2022-12-23
  • 2021-05-28
  • 2021-06-25
  • 2021-06-09
  • 2022-12-23
猜你喜欢
  • 2022-02-20
  • 2021-09-28
  • 2021-07-13
  • 2021-07-27
  • 2021-07-19
相关资源
相似解决方案