编程语言
首页 > 编程语言> > php使用服务器进行远程抓取百度网页内容

php使用服务器进行远程抓取百度网页内容

作者:互联网

   php使用服务器进行远程抓取百度网页内容

<?php
error_reporting(E_ALL^E_NOTICE^E_WARNING);
$useragent= $_SERVER['HTTP_USER_AGENT'];
//获取客户端ip
function getip() { 
    $unknown = 'unknown'; 
    if (isset($_SERVER['HTTP_X_FORWARDED_FOR']) && $_SERVER['HTTP_X_FORWARDED_FOR'] && strcasecmp($_SERVER['HTTP_X_FORWARDED_FOR'], $unknown)) { 
        $ip = $_SERVER['HTTP_X_FORWARDED_FOR']; 
    } 
    elseif(isset($_SERVER['REMOTE_ADDR']) && $_SERVER['REMOTE_ADDR'] && strcasecmp($_SERVER['REMOTE_ADDR'], $unknown)) { 
        $ip = $_SERVER['REMOTE_ADDR']; 
    } 
    if (false !== strpos($ip, ',')) $ip = reset(explode(',', $ip)); 
    return $ip; 
} 

function get_client_ip(){
    $cip = "unknown";
    if($_SERVER['REMOTE_ADDR']){
        $cip = $_SERVER['REMOTE_ADDR'];
    }else if(getenv("REMOTE_ADDR")){
        $cip = getenv("REMOTE_ADDR");
    }
    return $cip;
}

//添加关键词
$word=[
 '医院',
];
$arrword=$word[mt_rand(0,count($word)-1)];
$keyword= urlencode($arrword);
$url = "http://m.baidu.com/s?word=".$keyword;
//$url = "http://www.sdfymj.com/ua.php";
// 构造包头,模拟浏览器请求
$header = array (
		"Host:www.baidu.com",
		"Content-Type:application/x-www-form-urlencoded",//post请求
		"Connection: keep-alive",
		'Referer:http://m.baidu.com/'
		
);
$ch = curl_init ();
curl_setopt ( $ch, CURLOPT_URL, $url );
curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header );
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:'.getip(), 'CLIENT-IP:'.get_client_ip()));
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
// 执行
$content = curl_exec ( $ch );
if ($content == FALSE) {
	echo "error:" . curl_error ( $ch );
}
// 关闭
curl_close ( $ch );
 
//输出结果
echo $content;
?>

  

标签:抓取,服务器进行,网页内容,php,远程,百度
来源: https://www.cnblogs.com/68xi/p/13784816.html