php 扫描网站获取指定有用信息获取img标签获取a标签获取div标签的值或者属性值

研究一下扫描网站，获取指定有用信息

A.需求：

获取<IMG><A>标签中的内容

B.具体处理过程

1.获取目标网站的内容

2. 利用php7 自带的DOMDocument

C. 实现代码

WebGrab.php

<?php
namespace Application\test;

class WebGrab
{
    protected $content=false;
    // 获得内容， 利用DomDocument类
    public function getContent($url){
        if (!$this->content) {
            $url = \'http://\' . $url;

            $this->content = new \DOMDocument(\'1.0\', \'utf-8\');
            $this->content->preserveWhiteSpace = false;
            // @符号用于过滤掉配置错误的网页所生成的警告
            @$this->content->loadHTMLFile($url);
        }
        return $this->content;
    }
   // 获取指定的标签 以及标签的内容 
    public function getTags($url,$tag){
        $count=0;
        $result=array();
        $elements=$this->getContent($url)->getElementsByTagName($tag);
        foreach ($elements as $node){
            $result[$count][\'value\']=trim(preg_replace(\'/\s+/\',\' \',$node->nodeValue));
            if ($node->hasAttributes()){
                foreach ($node->attributes as $name=>$attr){
                    $result[$count][\'attributes\'][$name]=$attr->value;
                }
            }
            $count++;
        }
        return $result;
    }
   // 获取指定的属性
    public function getAttribute($url,$attr,$domain=null){
        $result=array();
        $element=$this->getContent($url)->getElementsByTagName(\'*\');
        foreach ($element as $node){
            if ($node->hasAttribute($attr)){
                $value=$node->getAttribute($attr);
                if ($domain){
                    if (stripos($value,$domain)!==false){
                        $result[]=trim($value);
                    }
                }else{
                    $result[]=trim($value);
                }
            }
        }
        return $result;

    }

}

执行代码：

web_grab.php

<?php
hen you read this code, good luck for you.
 */
define(\'DEFAULT_URL\',\'\');
define(\'DEFAULT_TAG\',\'img\');
// 自己封装的自动加载类，
require __DIR__.\'/../Autoload/Loader.php\';
Application\Autoload\Loader::init(__DIR__.\'/../..\');
$test=new Application\test\WebGrab();
$url=strip_tags($_GET[\'url\']??DEFAULT_URL);
$tag=strip_tags($_GET[\'tag\']??DEFAULT_TAG);

echo json_encode(
    [
        \'message\'=>\'获取成功\',
        \'attribute\'=>$test->getAttribute($url,\'src\'),
        \'tag\'=>$test->getTags($url,$tag)
    ]);

执行结果