采集数据+七牛云存储图片
//爬虫 public function reptile() { require_once base_path(). "/public/static/QueryList/phpQuery.php"; require_once base_path(). "/public/static/QueryList/QueryList.php"; $range=range(1,3); foreach ($range as $val){ $url=\'https://news.ke.com/bj/baike/0033/\'.\'pg\'.$val.\'/\'; $data=@QueryList::Query($url,[ \'img\'=>[\'.item>a>img\',\'data-original\',\'\',function($item){ $img= pathinfo($item,PATHINFO_EXTENSION ); $img=\'hy_\'.rand(1,999).\'.\'.$img; $lots=base_path().\'/public/uploads/img/\'.$img; return $lots; $image= file_put_contents($lots,file_get_contents($item)); // 上传七牛云 $accessKey="***"; $secretKey="***"; $uploadMgr=new UploadManager(); $auth = new Auth($accessKey, $secretKey); $token = $auth->uploadToken(\'jmin\'); list($ret, $error) = $uploadMgr->putFile($token, $image,$lots); if ($error !== null) { return false; } else { // 回调上传后图片名在框架中可以直接展示 // var_dump(\'http://min.clouddn.com/\'.\'/\'.$ret[\'key\']); return \'http://min.clouddn.com/\'.\'/\'.$ret[\'key\']; } }], \'title\'=>[\'.tit\',\'html\'], \'desn\' => [\'.text>.summary\',\'text\'], \'rul\' => [\'.text>a\', \'href\'], ])->data; // 将采集的数据添加入库 Article::insert($data); } }
采集详情页
//根据详情链接采集详情页 public function gather() { $data=Article::get([\'id\',\'rul\'])->toArray(); require_once base_path(). "/public/static/QueryList/phpQuery.php"; require_once base_path(). "/public/static/QueryList/QueryList.php"; foreach ($data as $val){ $id=$val[\'id\']; $data=@QueryList::Query($val[\'rul\'],[ \'body\' => [\'.m-article\',\'html\'], \'name\' => [\'.author\',\'text\'] ])->data; // 获取发布者名字 $name=$data[0][\'name\']; // 获取发布者文章 $body=$data[0][\'body\']; // 修改语句 Article::where(\'id\',$id)->update([\'name\'=>$name,\'body\'=>$body]); } }