【发布时间】:2017-11-01 04:07:59
【问题描述】:
我正在做抓取。
class MMA::School
attr_accessor :name, :location_info, :url
def self.today
self.schools
end
def self.schools
schools = []
schools << self.scrape_cbs
schools
end
def self.scrape_cbs
doc = Nokogiri::HTML(open("http://newyork.cbslocal.com/top-lists/5-best-mma-and-martial-arts-studios-in-new-york/"))
schools_1 = self.new
schools_1.name = doc.search("//div/p/strong/span").text.strip
schools_1.location_info = doc.search("//div/p")[4].text.strip
schools_1.url = doc.search("//div/p/a")[0].text.strip
schools_1
schools_2 = self.new
schools_2.name = doc.search("//div/p/span")[0].text.strip
schools_2.location_info = doc.search("//div/p")[7].text.strip
schools_2.url = doc.search("//div/p/a")[2].text.strip
schools_2
schools_3 = self.new
schools_3.name = doc.search("//div/p/span")[1].text.strip
schools_3.location_info = doc.search("//div/p")[9].text.strip
schools_3.url = doc.search("//div/p/a")[3].text.strip
schools_3
schools_4 = self.new
schools_4.name = doc.search("//div/p/span")[2].text.strip
schools_4.location_info = doc.search("//div/p")[12].text.strip
schools_4.url = doc.search("//div/p/a")[5].text.strip
schools_4
schools_5 = self.new
schools_5.name = doc.search("//div/p/span")[3].text.strip
schools_5.location_info = doc.search("//div/p")[14].text.strip
schools_5.url = doc.search("//div/p/a")[6].text.strip
schools_5
end
end
我在将抓取的数据放入空数组时遇到了一些问题。它只会将schools_1 等之一推送到schools 数组。
有人对如何解决这个问题有任何建议吗?
【问题讨论】:
标签: arrays ruby screen-scraping