官方示例源码
<html>
 <head>
  <base href='http://example.com/' />
  <title>Example website</title>
 </head>
 <body>
  <div id='images'>
   <a href='image1.html'>Name: My image 1 <br /><img src='image1_thumb.jpg' /></a>
   <a href='image2.html'>Name: My image 2 <br /><img src='image2_thumb.jpg' /></a>
   <a href='image3.html'>Name: My image 3 <br /><img src='image3_thumb.jpg' /></a>
   <a href='image4.html'>Name: My image 4 <br /><img src='image4_thumb.jpg' /></a>
   <a href='image5.html'>Name: My image 5 <br /><img src='image5_thumb.jpg' /></a>
  </div>
 </body>
</html>

# scrapy shell http://doc.scrapy.org/en/latest/_static/selectors-sample1.html

>>> response.xpath('//title/text()')
[<Selector (text) xpath=//title/text()>]

>>> response.css('title::text')
[<Selector (text) xpath=//title/text()>]

>>> response.css('title::text').extract()
[u'Example website']

>>> response.xpath('//title/text()').extract()
[u'Example website']

>>> response.xpath('//base/@href').extract()
[u'http://example.com/']

>>> response.css('base::attr(href)').extract()
[u'http://example.com/']


>>> response.xpath('//a[contains(@href, "image")]/@href').extract()
[u'image1.html',
 u'image2.html',
 u'image3.html',
 u'image4.html',
 u'image5.html']

>>> response.css('a[href*=image]::attr(href)').extract()
[u'image1.html',
 u'image2.html',
 u'image3.html',
 u'image4.html',
 u'image5.html']

>>> response.xpath('//a/@href')]').extract()
['image1.html',
 'image2.html',
 'image3.html',
 'image4.html',
 'image5.html']

>>> response.css('a::attr(href)').extract()
['image1.html',
 'image2.html',
 'image3.html',
 'image4.html',
 'image5.html']

>>> response.xpath('//div[@>'<a href='image1.html'>Name: My image 1 <br /><img src='image1_thumb.jpg' /></a>'

相关文章:

  • 2022-12-23
  • 2021-08-23
  • 2022-12-23
  • 2021-12-25
  • 2021-11-13
  • 2021-05-24
  • 2021-11-13
  • 2021-08-19
猜你喜欢
  • 2021-12-24
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2021-11-04
相关资源
相似解决方案