百度招聘都是通过ajax返回的数据,用scrapy爬就很尴尬了。
建模,items文件:
1 # -*- coding: utf-8 -*- 2 3 # Define here the models for your scraped items 4 # 5 # See documentation in: 6 # http://doc.scrapy.org/en/latest/topics/items.html 7 8 import scrapy 9 10 11 class BaiduItem(scrapy.Item): 12 # 职位名称 13 job_name = scrapy.Field() 14 # 职位类别 15 job_type = scrapy.Field() 16 # 工作地点 17 address = scrapy.Field() 18 # 招聘人数 19 number = scrapy.Field() 20 # 更新时间 21 pub_time = scrapy.Field() 22 # 详情页面 ref="#/jobDetail/2/1345536716" 23 detail_link = scrapy.Field() 24 # 工作职责 25 duty = scrapy.Field() 26 # 职责要求 27 require = scrapy.Field() 28 pass