官方文档:http://pyquery.readthedocs.io/
安装
|
1
|
pip install pyquery |
初始化
字符串初始化
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
html = \'\'\'
<div> <ul>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
print(doc(\'li\'))
|
URL初始化
|
1
2
3
|
from pyquery import PyQuery as pq
doc = pq(url=\'http://www.baidu.com\')
print(doc(\'head\'))
|
文件初始化
|
1
2
3
|
from pyquery import PyQuery as pq
doc = pq(filename=\'demo.html\')
print(doc(\'li\'))
|
基本CSS选择器
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
html = \'\'\'
<div id="container"> <ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
print(doc(\'#container .list li\'))
|
查找元素
子元素
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
html = \'\'\'
<div id="container"> <ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
items = doc(\'.list\')
print(type(items))
print(items)
lis = items.find(\'li\')
print(type(lis))
print(lis)
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
html = \'\'\'
<div id="container"> <ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
items = doc(\'.list\')
lis = items.children()
print(type(lis))
print(lis)
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
html = \'\'\'
<div id="container"> <ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
items = doc(\'.list\')
lis = items.children(\'.active\')
print(lis)
|
父元素
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
html = \'\'\'
<div id="container"> <ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
items = doc(\'.list\')
container = items.parent()
print(type(container))
print(container)
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
html = \'\'\'
<div class="wrap"> <div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
items = doc(\'.list\')
parents = items.parents()
print(type(parents))
print(parents)
|
|
1
2
|
parent = items.parents(\'.wrap\')
print(parent)
|
兄弟元素
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
html = \'\'\'
<div class="wrap"> <div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.list .item-0.active\')
print(li.siblings())
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
html = \'\'\'
<div class="wrap"> <div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.list .item-0.active\')
print(li.siblings(\'.active\'))
|
遍历
单个元素
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
html = \'\'\'
<div class="wrap"> <div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.item-0.active\')
print(li)
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
html = \'\'\'
<div class="wrap"> <div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
lis = doc(\'li\').items()
print(type(lis))
for li in lis:
print(li)
|
获取信息
获取属性
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
html = \'\'\'
<div class="wrap"> <div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
a = doc(\'.item-0.active a\')
print(a)
print(a.attr(\'href\'))
print(a.attr.href)
|
获取文本
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
html = \'\'\'
<div class="wrap"> <div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
a = doc(\'.item-0.active a\')
print(a)
print(a.text())
|
获取HTML
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
html = \'\'\'
<div class="wrap"> <div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.item-0.active\')
print(li)
print(li.html())
|
DOM操作
addClass、removeClass
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
html = \'\'\'
<div class="wrap"> <div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.item-0.active\')
print(li)
li.removeClass(\'active\')
print(li)
li.addClass(\'active\')
print(li)
|
attr、css
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
html = \'\'\'
<div class="wrap"> <div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.item-0.active\')
print(li)
li.attr(\'name\', \'link\')
print(li)
li.css(\'font-size\', \'14px\')
print(li)
|
remove
|
1
2
3
4
5
6
7
8
9
10
11
12
|
html = \'\'\'
<div class="wrap"> Hello, World
<p>This is a paragraph.</p>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
wrap = doc(\'.wrap\')
print(wrap.text())
wrap.find(\'p\').remove()
print(wrap.text())
|
其他DOM方法 http://pyquery.readthedocs.io/en/latest/api.html
伪类选择器
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
html = \'\'\'
<div class="wrap"> <div id="container">
<ul class="list">
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
\'\'\'from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'li:first-child\')
print(li)
li = doc(\'li:last-child\')
print(li)
li = doc(\'li:nth-child(2)\')
print(li)
li = doc(\'li:gt(2)\')
print(li)
li = doc(\'li:nth-child(2n)\')
print(li)
li = doc(\'li:contains(second)\')
print(li)
|