jerry-min

采集数据+七牛云存储图片

//爬虫
    public function reptile()
    {
        require_once base_path(). "/public/static/QueryList/phpQuery.php";
        require_once base_path(). "/public/static/QueryList/QueryList.php";
        $range=range(1,3);
        foreach ($range as $val){
            $url=\'https://news.ke.com/bj/baike/0033/\'.\'pg\'.$val.\'/\';
            $data=@QueryList::Query($url,[
                \'img\'=>[\'.item>a>img\',\'data-original\',\'\',function($item){
            $img=  pathinfo($item,PATHINFO_EXTENSION );
            $img=\'hy_\'.rand(1,999).\'.\'.$img;
            $lots=base_path().\'/public/uploads/img/\'.$img;
            return $lots;
            $image= file_put_contents($lots,file_get_contents($item));
//            上传七牛云
            $accessKey="***";
            $secretKey="***";
            $uploadMgr=new UploadManager();
            $auth = new Auth($accessKey, $secretKey);
            $token = $auth->uploadToken(\'jmin\');
            list($ret, $error) = $uploadMgr->putFile($token, $image,$lots);
                    if ($error !== null) {
                        return false;
                    } else {
//                    回调上传后图片名在框架中可以直接展示
//                        var_dump(\'http://min.clouddn.com/\'.\'/\'.$ret[\'key\']);
                        return  \'http://min.clouddn.com/\'.\'/\'.$ret[\'key\'];
                    }
                }],
                \'title\'=>[\'.tit\',\'html\'],
                \'desn\' => [\'.text>.summary\',\'text\'],
                \'rul\' => [\'.text>a\', \'href\'],
            ])->data;
//            将采集的数据添加入库
            Article::insert($data);

        }
    }

采集详情页

//根据详情链接采集详情页
    public function gather()
    {
        $data=Article::get([\'id\',\'rul\'])->toArray();
        require_once base_path(). "/public/static/QueryList/phpQuery.php";
        require_once base_path(). "/public/static/QueryList/QueryList.php";
        foreach ($data as $val){
            $id=$val[\'id\'];
            $data=@QueryList::Query($val[\'rul\'],[
                \'body\' => [\'.m-article\',\'html\'],
                \'name\' => [\'.author\',\'text\']
            ])->data;
        //    获取发布者名字
            $name=$data[0][\'name\'];
        //    获取发布者文章
            $body=$data[0][\'body\'];
        //    修改语句
            Article::where(\'id\',$id)->update([\'name\'=>$name,\'body\'=>$body]);

        }
    }

 

分类:

技术点:

相关文章: