php使用服务器进行远程抓取百度网页内容
作者:互联网
php使用服务器进行远程抓取百度网页内容
<?php error_reporting(E_ALL^E_NOTICE^E_WARNING); $useragent= $_SERVER['HTTP_USER_AGENT']; //获取客户端ip function getip() { $unknown = 'unknown'; if (isset($_SERVER['HTTP_X_FORWARDED_FOR']) && $_SERVER['HTTP_X_FORWARDED_FOR'] && strcasecmp($_SERVER['HTTP_X_FORWARDED_FOR'], $unknown)) { $ip = $_SERVER['HTTP_X_FORWARDED_FOR']; } elseif(isset($_SERVER['REMOTE_ADDR']) && $_SERVER['REMOTE_ADDR'] && strcasecmp($_SERVER['REMOTE_ADDR'], $unknown)) { $ip = $_SERVER['REMOTE_ADDR']; } if (false !== strpos($ip, ',')) $ip = reset(explode(',', $ip)); return $ip; } function get_client_ip(){ $cip = "unknown"; if($_SERVER['REMOTE_ADDR']){ $cip = $_SERVER['REMOTE_ADDR']; }else if(getenv("REMOTE_ADDR")){ $cip = getenv("REMOTE_ADDR"); } return $cip; } //添加关键词 $word=[ '医院', ]; $arrword=$word[mt_rand(0,count($word)-1)]; $keyword= urlencode($arrword); $url = "http://m.baidu.com/s?word=".$keyword; //$url = "http://www.sdfymj.com/ua.php"; // 构造包头,模拟浏览器请求 $header = array ( "Host:www.baidu.com", "Content-Type:application/x-www-form-urlencoded",//post请求 "Connection: keep-alive", 'Referer:http://m.baidu.com/' ); $ch = curl_init (); curl_setopt ( $ch, CURLOPT_URL, $url ); curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header ); curl_setopt($ch, CURLOPT_USERAGENT, $useragent); curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:'.getip(), 'CLIENT-IP:'.get_client_ip())); curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 ); // 执行 $content = curl_exec ( $ch ); if ($content == FALSE) { echo "error:" . curl_error ( $ch ); } // 关闭 curl_close ( $ch ); //输出结果 echo $content; ?>
标签:抓取,服务器进行,网页内容,php,远程,百度 来源: https://www.cnblogs.com/68xi/p/13784816.html