下面是网上找到的一个类,是说可以实现 php 异步抓取数据,但是 php 一直是同步的,打印出的结果也是同步的,但是通过下面 rolling_url 的方法,的确可以节省一定的时间,但是既然都是同步的 ,为什么会节约时间呢? 希望各位大神能给出一些指导。 /**
/**
单一的请求对象 /
class Curl_request {
public $url = '';
public $method = 'GET';
public $post_data = null;
public $headers = null;
public $optiOns= null;
/ *
* @param string $url * @param string $method * @param string $post_data * @param string $headers * @param array $options * @return void /
public function __construct($url, $method = 'GET', $post_data = null, $headers = null, $optiOns= null) {
$this->url = $url;
$this->method = strtoupper( $method );
$this->post_data = $post_data;
$this->headers = $headers;
$this->optiOns= $options;
}
/* * @return void /
public function __destruct() {
unset ( $this->url, $this->method, $this->post_data, $this->headers, $this->options );
}
}
/*
请求 url 个数
@var int /
private $size = 1; /*
等待所有 cURL 批处理中的活动连接等待响应时间
@var int /
private $timeout = 5; /*
完成请求回调函数
@var string /
private $callback = null;
/*
cRUL 配置
@var array /
private $optiOns= array (CURLOPT_SSL_VERIFYPEER => 0,CURLOPT_RETURNTRANSFER => 1,CURLOPT_COnNECTTIMEOUT=> 30 );
/*
请求头
@var array /
private $headers = array ();
/*
请求列队
@var array /
private $requests = array ();
/*
请求列队索引
@var array /
private $request_map = array ();
/*
错误
@var array /
private $errors = array ();
/*
@access public
@param string $callback 回调函数
该函数有 4 个参数($response,$info,$error,$request)
$response url 返回的 body
$info cURL 连接资源句柄的信息
$error 错误
$request 请求对象 /
public function __construct($callback = null) {
$this->callback = $callback;
}
/*
添加一个请求对象到列队
@access public
@param object $request
@return boolean /
public function add($request) { $this->requests [] = $request;
return TRUE;
}
/*
创建一个请求对象并添加到列队
@access public
@param string $url
@param string $method
@param string $post_data
@param string $headers
@param array $options
@return boolean /
public function request($url, $method = 'GET', $post_data = null, $headers = null, $optiOns= null) {
$this->requests [] = new Curl_request ( $url, $method, $post_data, $headers, $options );
return TRUE;
}
/*
创建 GET 请求对象
@access public
@param string $url
@param string $headers
@param array $options
@return boolean /
public function get($url, $headers = null, $optiOns= null) {
return $this->request ( $url, "GET", null, $headers, $options );
}
/*
创建一个 POST 请求对象
@access public
@param string $url
@param string $post_data
@param string $headers
@param array $options
@return boolean /
public function post($url, $post_data = null, $headers = null, $optiOns= null) {
return $this->request ( $url, "POST", $post_data, $headers, $options );
}
/*
执行 cURL
@access public
@param int $size 最大连接数
@return Ambigous <boolean, mixed>|boolean /
public function execute($size = null) {
if (sizeof ( $this->requests ) == 1) {
return $this->single_curl ();
} else { return $this->rolling_curl ( $size ); }
}
/*
单个 url 请求
@access private
@return mixed|boolean */
private function single_curl() { $ch = curl_init (); $request = array_shift ( $this->requests ); $optiOns= $this->get_options ( $request ); curl_setopt_array ( $ch, $options ); $output = curl_exec ( $ch ); $info = curl_getinfo ( $ch );
// it's not neccesary to set a callback for one-off requests if ($this->callback) { $callback = $this->callback; if (is_callable ( $this->callback )) { call_user_func ( $callback, $output, $info, $request ); } } else return $output; return true; }
/**
多个 url 请求
@access private
@param int $size 最大连接数
@return boolean */
private function rolling_curl($size = null) {
if ($size)
$this->size = $size;
else
$this->size = count($this->requests);
if (sizeof ( $this->requests ) < $this->size)
$this->size = sizeof ( $this->requests );
if ($this->size < 2)
$this->set_error ( 'size must be greater than 1' );
$master = curl_multi_init ();
//添加 cURL 连接资源句柄到 map 索引
for($i = 0; $i < $this->size; $i ++) {
$ch = curl_init (); $optiOns= $this->get_options ( $this->requests [$i] ); curl_setopt_array ( $ch, $options );
curl_multi_add_handle ( $master, $ch );
$key = ( string ) $ch; $this->request_map [$key] = $i;
}
$active = $dOne= null;
do {
while ( ($execrun = curl_multi_exec ( $master, $active )) == CURLM_CALL_MULTI_PERFORM )
;
if ($execrun != CURLM_OK)
break;
//有一个请求完成则回调
while ( $dOne= curl_multi_info_read ( $master ) ) {
//$done 完成的请求句柄
$info = curl_getinfo ( $done ['handle'] );//
$output = curl_multi_getcontent ( $done ['handle'] );//
$error = curl_error ( $done ['handle'] );//
$this->set_error ( $error ); //调用回调函数,如果存在的话 $callback = $this->callback; if (is_callable ( $callback )) { $key = ( string ) $done ['handle']; $request = $this->requests [$this->request_map [$key]]; unset ( $this->request_map [$key] ); call_user_func ( $callback, $output, $info, $error, $request ); } curl_close ( $done ['handle'] ); //从列队中移除已经完成的 request curl_multi_remove_handle ( $master, $done ['handle'] ); } //等待所有 cURL 批处理中的活动连接 if ($active) curl_multi_select ( $master, $this->timeout );
} while ( $active );
//完成关闭
curl_multi_close ( $master );
return true;
}
/**
获取没得请求对象的 cURL 配置
@access private
@param object $request
@return array */
private function get_options($request) {
$optiOns= $this->__get ( 'options' );
if (ini_get ( 'safe_mode' ) == 'Off' || ! ini_get ( 'safe_mode' )) {
$options [CURLOPT_FOLLOWLOCATION] = 1;
$options [CURLOPT_MAXREDIRS] = 5;
}
//$headers = $this->__get ( 'headers' );
$headers = $request->headers;
if ($request->options) {
$optiOns= $request->options + $options;
}
$options [CURLOPT_URL] = $request->url;
if ($request->post_data && strtolower($request->method) == 'post' ) {
$options [CURLOPT_POST] = 1;
$options [CURLOPT_POSTFIELDS] = $request->post_data;
}
if ($headers) {
$options [CURLOPT_HEADER] = 0;
$options [CURLOPT_HTTPHEADER] = $headers;
}
return $options;
}
/**
设置错误信息
@access public
@param string $msg /
public function set_error($msg) {
if (! empty ( $msg ))
$this->errors [] = $msg;
}
/*
获取错误信息
@access public
@param string $open
@param string $close
@return string /
public function display_errors($open = '
', $close = '
') {@access public
@param string $name
@param string $value
@return boolean /
public function __set($name, $value) {
if ($name == 'options' || $name == 'headers') {
$this->{$name} = $value + $this->{$name};
} else {
$this->{$name} = $value;
}
return TRUE;
}
/*
@param string $name
@return mixed
@access public /
public function __get($name) {
return (isset ( $this->{$name} )) ? $this->{$name} : null;
}
/*
@return void
@access public /
public function __destruct() {
unset ( $this->size, $this->timeout, $this->callback, $this->options, $this->headers, $this->requests, $this->request_map, $this->errors );
}
}
// END Curl Class
/ End of file curl.class.php */
1 z5864703 2016-09-12 09:45:36 +08:00 ![]() 因为这里是同时发出多个请求,不是一个发完等响应后再发下一个。 所以你可以测试单个请求下,用 curl 单发和并发的时间是一样的,多个请求下并发就会快一些。 |
![]() | 3 lincanbin 2016-09-12 11:32:51 +08:00 ![]() @william23 curl_multi 是整个请求池所有请求处理完毕,把所有响应报文拿到,才会继续往下走的。 虽然看起来像异步,但是实际上是同步阻塞的。 |
![]() | 4 donwa 2016-09-12 11:57:45 +08:00 ![]() curl_multi 并发请求,所以比单个快, 然后 判断了$execrun != CURLM_OK ,哪个请求先完成,就先处理哪个请求。如果你用回调( callback )打印的话,会发现不是顺序输出,而是哪个请求先完成,输出哪个。 其实这个 rolling_curl 最大的好处是控制并发的总请求数。 |
![]() | 5 Clarencep 2016-09-12 13:13:31 +08:00 ![]() 有多个请求的时候使用 curl_multi 确实会比多个 curl 快一些。甚至你都可以在等待的时候使用非阻塞方式进行等待,顺便干点其他事情。 以前我也封装过一个: https://github.com/Clarence-pan/restful 特别针对于只要发送请求,不要应答结果的场景还优化出一个`CurlMulti::waitLeast` |
![]() | 7 dawniii 2016-09-13 20:49:45 +08:00 为什么不用 guzzle 呢。异步同步都很方便 http://docs.guzzlephp.org/en/latest/quickstart.html#async-requests |