用scrapy爬取链家全国以上房源分类的信息:
路径:
items.py
# -*- coding: utf-8 -*- # Define here the models for your scraped items # # See documentation in: # https://doc.scrapy.org/en/latest/topics/items.html import scrapy class LianItem(scrapy.Item): # define the fields for your item here like: # name = scrapy.Field() pass class ErShouFangItem(scrapy.Item): # 省份 province = scrapy.Field() # 城市 city = scrapy.Field() # 总价 total_price = scrapy.Field() # 单价 single_price = scrapy.Field() # 楼层 room_info = scrapy.Field() # 住宅位置 region = scrapy.Field() # 房屋朝向及装修情况 direction = scrapy.Field() # 建筑面积 area = scrapy.Field() # 建筑类型 house_struct = scrapy.Field() # 房屋户型 huxing = scrapy.Field() # 购买时间 buy_time = scrapy.Field() # url ershou_detail_url = scrapy.Field() class NewHouseItem(scrapy.Item): # 省份 province = scrapy.Field() # 城市 city = scrapy.Field() # 标题 title = scrapy.Field() # 位置 region = scrapy.Field() # 房屋信息 room_info = scrapy.Field() # 建筑面积 area = scrapy.Field() # 价格 price = scrapy.Field() # 详情页 newHouse_detail_url = scrapy.Field() class RentHouseItem(scrapy.Item): # 省份 province = scrapy.Field() # 城市 city = scrapy.Field() # 标题 title = scrapy.Field() # 价格 price = scrapy.Field() # 房间信息(房源户型、朝向、面积、租赁方式) house_info = scrapy.Field() # 发布时间 pub_time = scrapy.Field() # 入住: in_time = scrapy.Field() # 租期 lease = scrapy.Field() # 楼层 floor = scrapy.Field() # 电梯: lift = scrapy.Field() # 车位: carport = scrapy.Field() # 用水: use_water = scrapy.Field() # 用电: use_electricity = scrapy.Field() # 燃气: use_gas = scrapy.Field() # url rent_detail_url = scrapy.Field() class OfficeHouseItem(scrapy.Item): # 省份 province = scrapy.Field() # 城市 city = scrapy.Field() # 标题 title = scrapy.Field() # 价格 price = scrapy.Field() # 数量 num = scrapy.Field() # 面积 area = scrapy.Field() # url office_detail_url = scrapy.Field() class XiaoquHouseItem(scrapy.Item): # 省份 province = scrapy.Field() # 城市 city = scrapy.Field() # 标题 title = scrapy.Field() # 地区 region = scrapy.Field() # 单价 single_price = scrapy.Field() # 建筑年代 build_time = scrapy.Field() # 建筑类型 house_struct = scrapy.Field() # 物业费用 service_fees = scrapy.Field() # 物业公司 service_company = scrapy.Field() # 开发商 build_company = scrapy.Field() # 楼栋数 building_nums = scrapy.Field() # 房屋总数 house_nums = scrapy.Field() # url xiaoqu_detail_url = scrapy.Field()