【问题标题】:PHP Script Memory LeakPHP 脚本内存泄漏
【发布时间】:2011-04-06 17:57:11
【问题描述】:

我知道这不是最小的代码,我已经尽可能地减少它。该脚本只会消耗越来越多的内存,直到最终耗尽。我在可能的情况下使用了 unset() ,但它似乎没有任何效果。 MultiGet 函数似乎总是出错,但我不确定这是否是泄漏的地方。任何意见将不胜感激。

public function Test()
{
    $base = dirname(__FILE__) .'/';
    $prod_file = $base.'products.dbf';

    $this->dbf->load($prod_file);
    $num_rec=$ci->dbf->dbf_num_rec;

    $buffer = Array();
    for($i=0;$i<$num_rec;$i++):
        $row = $ci->dbf->getRowAssoc($i);

        $info = Array('part_number' => $row['PART_NUM'],
                      'td_group_id' => $row['GRP'],
                      'name' => 'DESCR');

        $this->db->where('td_group_id',$info['td_group_id']);
        $result = $this->db->get('tbl_categories')->row_array();
        if(isset($result['id'])):
            $info['category_id'] = $result['id'];
            $buffer[]  = $info;
        endif;

        if(count($buffer) == 100 || $i == $num_rec -1):
            $url_buffer = Array();
            foreach($buffer as $row):
                $url_buffer[] = $this->_product_url($row['part_number']);
            endforeach;

            $html_returns = $this->MultiCrawl($url_buffer);
            foreach($html_returns as $url_index=>$html):
                $more_info = $this->_extract_more_info($html);
                if($more_info):
                    $more_info['category_id'] = $buffer[$url_index]['category_id'];
                    $more_info['td_part_number'] = $buffer[$url_index]['part_number'];
                    $this->_parse_product($more_info);
                endif;
            endforeach;
            $buffer = Array();
        endif;

    endfor;



}


function MultiGet($all_urls)
{

    $useragent = $this->_useragent;
    $cookie_file = $this->_cookie_file;

    $url_index = $this->UrlIndex($all_urls);

    $return_buffer = Array();

    $mh = curl_multi_init();

    $ch = Array();
    $max_connections = 15;
    $index = 0;
    $open_connections = 0;
    $execReturnValue = true;
    $running = true;
    $max_index = count($all_urls)-1;
    $url_count = count($all_urls);
    $buffer_count = 0;

    while ($buffer_count < $url_count){

        if($open_connections < $max_connections && $index <= $max_index):
            for($i=$open_connections;$i<$max_connections && $index <= $max_index;$i++):
                $url = $all_urls[$index];
                $ch[$index] = curl_init($url);
                curl_setopt($ch[$index],CURLOPT_FOLLOWLOCATION, true);
                curl_setopt($ch[$index],CURLOPT_RETURNTRANSFER, true);
                curl_setopt($ch[$index],CURLOPT_COOKIESESSION, false);
                curl_setopt($ch[$index],CURLOPT_SSL_VERIFYHOST , false);
                curl_setopt($ch[$index],CURLOPT_SSL_VERIFYPEER , false);
                curl_setopt($ch[$index],CURLOPT_COOKIEJAR, $cookie_file);
                curl_setopt($ch[$index],CURLOPT_COOKIEFILE, $cookie_file);
                curl_setopt($ch[$index],CURLOPT_USERAGENT,$useragent);
                curl_multi_add_handle($mh, $ch[$index]);
                $open_connections++;
                $index++;
                $execReturnValue = curl_multi_exec($mh,$running);
                usleep(200);
            endfor;
        endif;

        $execReturnValue = curl_multi_exec($mh,$running);
        $ready=curl_multi_select($mh);


        while($info=curl_multi_info_read($mh)){
            $status=curl_getinfo($info['handle'],CURLINFO_HTTP_CODE);
            if($status==200){
                $successUrl=curl_getinfo($info['handle'],CURLINFO_EFFECTIVE_URL);
                $curl_index = $url_index[$successUrl];
                $return_buffer[$curl_index] = curl_multi_getcontent($ch[$curl_index]);
                $buffer_count = count($return_buffer);
                curl_multi_remove_handle($mh, $ch[$curl_index]);
                curl_close($ch[$curl_index]);
                unset($ch[$curl_index]);
                $open_connections--;
            }else{

                echo "ERROR: $status\n";
            }
        }
    } 

    curl_multi_close($mh);
    unset($mh);

    return $return_buffer;
}



private function _extract_more_info($html)
{

    $buffer = array();


    $query = "//img[@id='ctl00_cphMain_cntrlProductProfile_imgprodimage']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['td_img_url'] = $node?trim($node->getAttribute('src')):null;
    unset($result);


    $query = "//span[@class='priceLarge']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['price'] = $node?trim($node->nodeValue):null;
    if($buffer['price'] == 'Req. Auth.') return null;
    unset($result);


    $query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLtFinalPrice']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['msrp'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLTMRF']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['manf_part_number'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLblUPC']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    $buffer['upc_part_number'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//td[@class='black_text_WUL']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['manufacturer'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//td[@class='textt' and @colspan='3']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['short_description'] = $node?trim($node->nodeValue):null;
    unset($result);





    $query = "//div[@id='ctl00_cphMain_pnlMarketingDesc']//td[@class='textt']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['long_description'] = $node?trim($node->nodeValue):null;
    unset($result);

    $query = "//table[@id='ctl00_cphMain_cntrlMainSpecs_dgSpecs']";
    $result = $this->_xquery($html,$query);
    $table = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    unset($result);

    if(!$table) return null;
    $table_array = Array();
    $rows = $table->getElementsByTagName('tr');
    foreach($rows as $tr):
        $temp = Array();
        $columns = $tr->getElementsByTagName('td');
        $caption = $columns->length > 0 && $columns->length <= 2 ? trim($columns->item(0)->nodeValue) : null;
        $value = $columns->length == 2 ? trim($columns->item(1)->nodeValue) : null;

        if ($caption) $table_array[$caption] = $value;
    endforeach;


    $buffer['main_specs']=$table_array;


    $query = "//table[@id='ctl00_cphMain_cntrlExtSpecs_tblData']";
    $result = $this->_xquery($html,$query);
    $table = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    unset($result);
    $buffer['additional_specs'] = null;
    if(!$table) return $buffer;


    $table_array = Array();
    $rows = $table->getElementsByTagName('tr');
    foreach($rows as $tr):
        $temp = Array();
        $columns = $tr->getElementsByTagName('td');
        $caption = $columns->length > 0 && $columns->length <= 2 ? trim($columns->item(0)->nodeValue) : null;
        $value = $columns->length == 2 ? trim($columns->item(1)->nodeValue) : null;

        if ($caption) $table_array[$caption] = $value;
    endforeach;
    $buffer['additional_specs']=$table_array;;
    return $buffer;

}



private function _xquery($html,$query,$allnodes = false){
    $src = '';
    $dom = new DOMDocument();
    $node = null;
    if (@$dom->loadHTML($html)) {
        $xpath = new DOMXpath($dom);
        $nodeList = $xpath->query($query);
        if ($nodeList->length > 0) {
            $node = $allnodes==false?$nodeList->item(0):$nodeList;
        }
    }
    unset($xpath);
    unset($nodeList);
    unset($dom);
    return $node;
}

【问题讨论】:

  • 您确定这是泄漏,还是只是需要更多内存?
  • 在关闭 php 服务之前消耗了多少内存,您希望在任何给定时间存储的最大数据量是多少?
  • 我相信这是一个泄漏,因为我尽可能取消了任何变量。它的内存使用量高达 100mb+。没有一个页面那么大,我也没有在任何变量中存储那么多数据。
  • @65Fbef05:我正在再次运行脚本来为您获取准确的数字。我的 memory_limit 设置为 128M
  • 我不认为你“试图尽可能地减少它”哈哈

标签: php memory-leaks


【解决方案1】:

找出漏洞的策略?

  • 确保它泄漏(如果处理 1/100 的数据,内存还没有释放吗?1/1000?)
  • 考虑复杂性:如果foo 是O(n),bar 是O(n),bar 调用foo,结果可能会变成O(n*n)。
  • 实验:禁用部分程序,直到不再泄漏为止

乍一看,您正在抓取一系列网址。这些可能包含更多的 url,可以使用 MultiCrawl 方法进行抓取。你确定那里不可能有循环吗? (使用文件夹不止一次欺骗了我:浏览 '.' 作为子文件夹会产生无限循环)

【讨论】:

  • 它没有任何深度。我有一个产品编号/价格的 dbf。它使用产品编号去分销商网站获取一些附加信息。
猜你喜欢
  • 2023-03-16
  • 2011-02-05
  • 1970-01-01
  • 1970-01-01
  • 2013-01-27
  • 1970-01-01
  • 2016-02-10
  • 2023-03-17
  • 2012-04-17
相关资源
最近更新 更多