Spider:
# -*- coding: utf-8 -*- import scrapy from scrapy_zhaopin.items import ScrapyHouseItem from scrapy.http import Request class MySpider(scrapy.Spider): name = "spiderhouse" allowed_domains = ["sh.lianjia.com"] start_urls = ["https://sh.lianjia.com/ershoufang/rs徐泾北城/"] def parse(self, response): for line in response.xpath(\'//*[contains(@log-mod,"list")]//li[contains(@class,"clear")]\'): item = ScrapyHouseItem() item[\'title\'] = line.xpath(\'//title/text()\').extract()[0].split("_")[0].replace("二手房房源", "") item[\'name\'] = line.xpath(\'.//*[@class="title"]/a/text()\').extract() item[\'address\'] = line.xpath(\'.//*[@class="positionInfo"]/a/text()\').extract() item[\'house_info\'] = line.xpath(\'.//*[@class="houseInfo"]/text()\').extract() item[\'price\'] = line.xpath(\'.//*[@class="totalPrice"]//span/text()\').extract() item[\'unit_price\'] = line.xpath(\'.//*[@class="unitPrice"]//span/text()\').extract()[0].replace("单价", "").replace("元/平米", "") yield item address_list = ["徐盈路", "徐泾镇", "华新镇", "嘉定北", "中山公园", "汇金路", "青浦新城", "爱博家园", "九亭", "佘山", "泗泾", "洞泾", "赵巷"] for i in address_list: address_url = f\'https://sh.lianjia.com/ershoufang/rs{i}/\' yield Request(address_url, callback=self.parse) # if self.page < response.xpath(\'(//*[@class="pager-num"]//*[@class="num-iten"])[last()]/text()\'): # self.page += 1 # page_url = self.page_url % self.page # yield Request(page_url, callback=self.parse)