【发布时间】:2014-02-17 23:53:37
【问题描述】:
我正在尝试从网站获取链接。当我尝试通过终端连接时,我收到以下消息:“您必须在浏览器中打开 javascript 和 cookie 支持才能访问此站点”。我在 stackoverflow 和谷歌周围尝试了许多不同的代码。没有人按照我想要的方式工作。他们都没有从我试图从中获取数据的该网站获取任何数据。其他网站工作。
<?php
function get_url( $url, $javascript_loop = 0, $timeout = 5 )
{
$url = str_replace( "&", "&", urldecode(trim($url)) );
$cookie = tempnam ("/tmp", "CURLCOOKIE");
$ch = curl_init();
curl_setopt( $ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_COOKIEJAR, $cookie );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $ch, CURLOPT_ENCODING, "" );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $ch, CURLOPT_AUTOREFERER, true );
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false ); # required for https urls
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, $timeout );
curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout );
curl_setopt( $ch, CURLOPT_MAXREDIRS, 10 );
$content = curl_exec( $ch );
$response = curl_getinfo( $ch );
if(curl_exec($ch) === false)
{
echo 'Curl error: ' . curl_error($ch);
}
curl_close ( $ch );
if ($response['http_code'] == 301 || $response['http_code'] == 302)
{
ini_set("user_agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1");
if ( $headers = get_headers($response['url']) )
{
foreach( $headers as $value )
{
if ( substr( strtolower($value), 0, 9 ) == "location:" )
return get_url( trim( substr( $value, 9, strlen($value) ) ) );
}
}
}
if ( ( preg_match("/>[[:space:]]+window\.location\.replace\('(.*)'\)/i", $content, $value) || preg_match("/>[[:space:]]+window\.location\=\"(.*)\"/i", $content, $value) ) &&
$javascript_loop < 5
)
{
return get_url( $value[1], $javascript_loop+1 );
}
else
{
return array( $content, $response );
}
}
$test = get_url('http://livefootball.ws');
print_r($test);
?>
如果我将 URL 切换到其他网站,我会得到结果,但在这个网站上它不起作用。任何帮助将不胜感激。
【问题讨论】:
标签: javascript php html curl