今天在获取一个网页的具体内容时遇到了一些小麻烦,
源代码:
1 package com.ms.test; 2 3 import us.codecraft.webmagic.Page; 4 import us.codecraft.webmagic.Site; 5 import us.codecraft.webmagic.Spider; 6 import us.codecraft.webmagic.processor.PageProcessor; 7 8 public class TestWebmagic implements PageProcessor{ 9 10 Site site = Site.me(); 11 @Override 12 public Site getSite() { 13 // TODO Auto-generated method stub 14 return site; 15 } 16 17 @Override 18 public void process(Page page) { 19 // TODO Auto-generated method stub 20 page.putField("test", page.getHtml().xpath("//div[@class=p-2]/div[@class=o-border-bottom2]/div[@class=my-2]/strong")); 21 } 22 23 public static void main(String[] args) { 24 Spider.create(new TestWebmagic()) 25 .addUrl("http://www.beijing.gov.cn/hudong/hdjl/com.web.consult.consultDetail.flow?originalId=AH20011700001") 26 .run(); 27 } 28 }