rules = [  
        Rule(SgmlLinkExtractor(allow=('/u012150179/article/details'),  
                              restrict_xpaths=('//li[@class="next_article"]')),  
             callback='parse_item',  
             follow=True)  
    ]  
  
    def parse_item(self, response):  
  
        #print "parse_item>>>>>>"  
        item = CsdnblogcrawlspiderItem()   
        blog_url = str(response.url)  
        blog_name = response.xpath('//div[@>).extract()  
  
        item['blog_name'] = [n.encode('utf-8') for n in blog_name]  
        item['blog_url'] = blog_url.encode('utf-8')  
  
        return item  

 

相关文章:

  • 2022-12-23
  • 2021-04-29
  • 2022-12-23
  • 2021-10-23
  • 2022-12-23
  • 2022-12-23
  • 2021-11-18
  • 2021-07-11
猜你喜欢
  • 2021-10-09
  • 2021-10-02
  • 2021-05-03
  • 2022-12-23
  • 2022-12-23
  • 2021-05-22
  • 2021-04-07
相关资源
相似解决方案