yunlongaimeng

官方文档:http://pyquery.readthedocs.io/

安装

1
pip install pyquery

初始化

字符串初始化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
html = \'\'\'
<div>
    <ul>
         <li class="item-0">first item</li>
         <li class="item-1"><a href="link2.html">second item</a></li>
         <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
         <li class="item-1 active"><a href="link4.html">fourth item</a></li>
         <li class="item-0"><a href="link5.html">fifth item</a></li>
     </ul>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
print(doc(\'li\'))
 View Code

URL初始化

1
2
3
from pyquery import PyQuery as pq
doc = pq(url=\'http://www.baidu.com\')
print(doc(\'head\'))
 View Code

文件初始化

1
2
3
from pyquery import PyQuery as pq
doc = pq(filename=\'demo.html\')
print(doc(\'li\'))
 View Code

基本CSS选择器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
html = \'\'\'
<div id="container">
    <ul class="list">
         <li class="item-0">first item</li>
         <li class="item-1"><a href="link2.html">second item</a></li>
         <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
         <li class="item-1 active"><a href="link4.html">fourth item</a></li>
         <li class="item-0"><a href="link5.html">fifth item</a></li>
     </ul>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
print(doc(\'#container .list li\'))
 View Code

查找元素

子元素

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
html = \'\'\'
<div id="container">
    <ul class="list">
         <li class="item-0">first item</li>
         <li class="item-1"><a href="link2.html">second item</a></li>
         <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
         <li class="item-1 active"><a href="link4.html">fourth item</a></li>
         <li class="item-0"><a href="link5.html">fifth item</a></li>
     </ul>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
items = doc(\'.list\')
print(type(items))
print(items)
lis = items.find(\'li\')
print(type(lis))
print(lis)
 View Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
html = \'\'\'
<div id="container">
    <ul class="list">
         <li class="item-0">first item</li>
         <li class="item-1"><a href="link2.html">second item</a></li>
         <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
         <li class="item-1 active"><a href="link4.html">fourth item</a></li>
         <li class="item-0"><a href="link5.html">fifth item</a></li>
     </ul>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
items = doc(\'.list\')
lis = items.children()
print(type(lis))
print(lis)
 View Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
html = \'\'\'
<div id="container">
    <ul class="list">
         <li class="item-0">first item</li>
         <li class="item-1"><a href="link2.html">second item</a></li>
         <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
         <li class="item-1 active"><a href="link4.html">fourth item</a></li>
         <li class="item-0"><a href="link5.html">fifth item</a></li>
     </ul>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
items = doc(\'.list\')
lis = items.children(\'.active\')
print(lis)
 View Code

父元素

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
html = \'\'\'
<div id="container">
    <ul class="list">
         <li class="item-0">first item</li>
         <li class="item-1"><a href="link2.html">second item</a></li>
         <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
         <li class="item-1 active"><a href="link4.html">fourth item</a></li>
         <li class="item-0"><a href="link5.html">fifth item</a></li>
     </ul>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
items = doc(\'.list\')
container = items.parent()
print(type(container))
print(container)
 View Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
html = \'\'\'
<div class="wrap">
    <div id="container">
        <ul class="list">
             <li class="item-0">first item</li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
             <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a></li>
         </ul>
     </div>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
items = doc(\'.list\')
parents = items.parents()
print(type(parents))
print(parents)
 View Code
1
2
parent = items.parents(\'.wrap\')
print(parent)
 View Code

兄弟元素

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
html = \'\'\'
<div class="wrap">
    <div id="container">
        <ul class="list">
             <li class="item-0">first item</li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
             <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a></li>
         </ul>
     </div>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.list .item-0.active\')
print(li.siblings())
 View Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
html = \'\'\'
<div class="wrap">
    <div id="container">
        <ul class="list">
             <li class="item-0">first item</li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
             <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a></li>
         </ul>
     </div>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.list .item-0.active\')
print(li.siblings(\'.active\'))
 View Code

遍历

单个元素

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
html = \'\'\'
<div class="wrap">
    <div id="container">
        <ul class="list">
             <li class="item-0">first item</li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
             <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a></li>
         </ul>
     </div>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.item-0.active\')
print(li)
 View Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
html = \'\'\'
<div class="wrap">
    <div id="container">
        <ul class="list">
             <li class="item-0">first item</li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
             <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a></li>
         </ul>
     </div>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
lis = doc(\'li\').items()
print(type(lis))
for li in lis:
    print(li)
 View Code

获取信息

获取属性

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
html = \'\'\'
<div class="wrap">
    <div id="container">
        <ul class="list">
             <li class="item-0">first item</li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
             <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a></li>
         </ul>
     </div>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
= doc(\'.item-0.active a\')
print(a)
print(a.attr(\'href\'))
print(a.attr.href)
 View Code

获取文本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
html = \'\'\'
<div class="wrap">
    <div id="container">
        <ul class="list">
             <li class="item-0">first item</li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
             <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a></li>
         </ul>
     </div>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
= doc(\'.item-0.active a\')
print(a)
print(a.text())
 View Code

获取HTML

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
html = \'\'\'
<div class="wrap">
    <div id="container">
        <ul class="list">
             <li class="item-0">first item</li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
             <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a></li>
         </ul>
     </div>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.item-0.active\')
print(li)
print(li.html())
 View Code

DOM操作

addClass、removeClass

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
html = \'\'\'
<div class="wrap">
    <div id="container">
        <ul class="list">
             <li class="item-0">first item</li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
             <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a></li>
         </ul>
     </div>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.item-0.active\')
print(li)
li.removeClass(\'active\')
print(li)
li.addClass(\'active\')
print(li)
 View Code

attr、css

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
html = \'\'\'
<div class="wrap">
    <div id="container">
        <ul class="list">
             <li class="item-0">first item</li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
             <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a></li>
         </ul>
     </div>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'.item-0.active\')
print(li)
li.attr(\'name\'\'link\')
print(li)
li.css(\'font-size\'\'14px\')
print(li)
 View Code

remove

1
2
3
4
5
6
7
8
9
10
11
12
html = \'\'\'
<div class="wrap">
    Hello, World
    <p>This is a paragraph.</p>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
wrap = doc(\'.wrap\')
print(wrap.text())
wrap.find(\'p\').remove()
print(wrap.text())
 View Code

其他DOM方法 http://pyquery.readthedocs.io/en/latest/api.html

伪类选择器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
html = \'\'\'
<div class="wrap">
    <div id="container">
        <ul class="list">
             <li class="item-0">first item</li>
             <li class="item-1"><a href="link2.html">second item</a></li>
             <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
             <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             <li class="item-0"><a href="link5.html">fifth item</a></li>
         </ul>
     </div>
 </div>
\'\'\'
from pyquery import PyQuery as pq
doc = pq(html)
li = doc(\'li:first-child\')
print(li)
li = doc(\'li:last-child\')
print(li)
li = doc(\'li:nth-child(2)\')
print(li)
li = doc(\'li:gt(2)\')
print(li)
li = doc(\'li:nth-child(2n)\')
print(li)
li = doc(\'li:contains(second)\')
print(li)

分类:

技术点:

相关文章:

  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2018-10-31
  • 2021-06-08
  • 2021-10-02
  • 2021-08-25
  • 2022-12-23
猜你喜欢
  • 2019-07-26
  • 2021-09-17
  • 2021-05-30
  • 2021-08-07
  • 2021-10-08
  • 2022-01-26
  • 2022-12-23
相关资源
相似解决方案