Ejemplo n.º 1
0
 public function fetchAllOnce(array $urls, $isRedirect = false)
 {
     if (!$isRedirect) {
         $urls = array_unique($urls);
     }
     if (empty($urls)) {
         return;
     }
     //////////////////////////////////////////////////////
     // parallel (HttpRequestPool)
     if ($this->method == self::METHOD_REQUEST_POOL) {
         $this->debug('Starting parallel fetch (HttpRequestPool)');
         try {
             while (count($urls) > 0) {
                 $this->debug('Processing set of ' . min($this->maxParallelRequests, count($urls)));
                 $subset = array_splice($urls, 0, $this->maxParallelRequests);
                 $pool = new HttpRequestPool();
                 foreach ($subset as $orig => $url) {
                     if (!$isRedirect) {
                         $orig = $url;
                     }
                     unset($this->redirectQueue[$orig]);
                     $this->debug("...{$url}");
                     if (!$isRedirect && isset($this->requests[$url])) {
                         $this->debug("......in memory");
                         /*
                         } elseif ($this->isCached($url)) {
                         	$this->debug("......is cached");
                         	if (!$this->minimiseMemoryUse) {
                         		$this->requests[$url] = $this->getCached($url);
                         	}
                         */
                     } else {
                         $this->debug("......adding to pool");
                         $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url;
                         $req_url = $this->removeFragment($req_url);
                         $httpRequest = new HttpRequest($req_url, HttpRequest::METH_GET, $this->requestOptions);
                         // send cookies, if we have any
                         if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
                             $this->debug("......sending cookies: {$cookies}");
                             $httpRequest->addHeaders(array('Cookie' => $cookies));
                         }
                         $this->requests[$orig] = array('headers' => null, 'body' => null, 'httpRequest' => $httpRequest);
                         $this->requests[$orig]['original_url'] = $orig;
                         $pool->attach($httpRequest);
                     }
                 }
                 // did we get anything into the pool?
                 if (count($pool) > 0) {
                     $this->debug('Sending request...');
                     try {
                         $pool->send();
                     } catch (HttpRequestPoolException $e) {
                         // do nothing
                     }
                     $this->debug('Received responses');
                     foreach ($subset as $orig => $url) {
                         if (!$isRedirect) {
                             $orig = $url;
                         }
                         //if (!isset($this->requests[$url]['fromCache'])) {
                         $request = $this->requests[$orig]['httpRequest'];
                         //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());
                         // getResponseHeader() doesn't return status line, so, for consistency...
                         $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));
                         $this->requests[$orig]['body'] = $request->getResponseBody();
                         $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');
                         $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();
                         // is redirect?
                         if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {
                             $redirectURL = $request->getResponseHeader('location');
                             if (!preg_match('!^https?://!i', $redirectURL)) {
                                 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
                             }
                             if ($this->validateURL($redirectURL)) {
                                 $this->debug('Redirect detected. Valid URL: ' . $redirectURL);
                                 // store any cookies
                                 $cookies = $request->getResponseHeader('set-cookie');
                                 if ($cookies && !is_array($cookies)) {
                                     $cookies = array($cookies);
                                 }
                                 if ($cookies) {
                                     $this->cookieJar->storeCookies($url, $cookies);
                                 }
                                 $this->redirectQueue[$orig] = $redirectURL;
                             } else {
                                 $this->debug('Redirect detected. Invalid URL: ' . $redirectURL);
                             }
                         }
                         //die($url.' -multi- '.$request->getResponseInfo('effective_url'));
                         $pool->detach($request);
                         unset($this->requests[$orig]['httpRequest'], $request);
                         /*
                         if ($this->minimiseMemoryUse) {
                         	if ($this->cache($url)) {
                         		unset($this->requests[$url]);
                         	}
                         }
                         */
                         //}
                     }
                 }
             }
         } catch (HttpException $e) {
             $this->debug($e);
             return false;
         }
     } elseif ($this->method == self::METHOD_CURL_MULTI) {
         $this->debug('Starting parallel fetch (curl_multi_*)');
         while (count($urls) > 0) {
             $this->debug('Processing set of ' . min($this->maxParallelRequests, count($urls)));
             $subset = array_splice($urls, 0, $this->maxParallelRequests);
             $pool = new RollingCurl(array($this, 'handleCurlResponse'));
             $pool->window_size = count($subset);
             foreach ($subset as $orig => $url) {
                 if (!$isRedirect) {
                     $orig = $url;
                 }
                 unset($this->redirectQueue[$orig]);
                 $this->debug("...{$url}");
                 if (!$isRedirect && isset($this->requests[$url])) {
                     $this->debug("......in memory");
                     /*
                     } elseif ($this->isCached($url)) {
                     	$this->debug("......is cached");
                     	if (!$this->minimiseMemoryUse) {
                     		$this->requests[$url] = $this->getCached($url);
                     	}
                     */
                 } else {
                     $this->debug("......adding to pool");
                     $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url;
                     $req_url = $this->removeFragment($req_url);
                     $headers = array();
                     // send cookies, if we have any
                     if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
                         $this->debug("......sending cookies: {$cookies}");
                         $headers[] = 'Cookie: ' . $cookies;
                     }
                     $httpRequest = new RollingCurlRequest($req_url, 'GET', null, $headers, array(CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], CURLOPT_TIMEOUT => $this->requestOptions['timeout']));
                     $httpRequest->set_original_url($orig);
                     $this->requests[$orig] = array('headers' => null, 'body' => null, 'httpRequest' => $httpRequest);
                     $this->requests[$orig]['original_url'] = $orig;
                     // TODO: is this needed anymore?
                     $pool->add($httpRequest);
                 }
             }
             // did we get anything into the pool?
             if (count($pool) > 0) {
                 $this->debug('Sending request...');
                 $pool->execute();
                 // this will call handleCurlResponse() and populate $this->requests[$orig]
                 $this->debug('Received responses');
                 foreach ($subset as $orig => $url) {
                     if (!$isRedirect) {
                         $orig = $url;
                     }
                     // $this->requests[$orig]['headers']
                     // $this->requests[$orig]['body']
                     // $this->requests[$orig]['effective_url']
                     $status_code = $this->requests[$orig]['status_code'];
                     if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
                         $redirectURL = $this->requests[$orig]['location'];
                         if (!preg_match('!^https?://!i', $redirectURL)) {
                             $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
                         }
                         if ($this->validateURL($redirectURL)) {
                             $this->debug('Redirect detected. Valid URL: ' . $redirectURL);
                             // store any cookies
                             $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
                             if (!empty($cookies)) {
                                 $this->cookieJar->storeCookies($url, $cookies);
                             }
                             $this->redirectQueue[$orig] = $redirectURL;
                         } else {
                             $this->debug('Redirect detected. Invalid URL: ' . $redirectURL);
                         }
                     }
                     // die($url.' -multi- '.$request->getResponseInfo('effective_url'));
                     unset($this->requests[$orig]['httpRequest']);
                 }
             }
         }
     } else {
         $this->debug('Starting sequential fetch (file_get_contents)');
         $this->debug('Processing set of ' . count($urls));
         foreach ($urls as $orig => $url) {
             if (!$isRedirect) {
                 $orig = $url;
             }
             unset($this->redirectQueue[$orig]);
             $this->debug("...{$url}");
             if (!$isRedirect && isset($this->requests[$url])) {
                 $this->debug("......in memory");
                 /*
                 } elseif ($this->isCached($url)) {
                 	$this->debug("......is cached");
                 	if (!$this->minimiseMemoryUse) {
                 		$this->requests[$url] = $this->getCached($url);
                 	}
                 */
             } else {
                 $this->debug("Sending request for {$url}");
                 $this->requests[$orig]['original_url'] = $orig;
                 $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url;
                 $req_url = $this->removeFragment($req_url);
                 // send cookies, if we have any
                 $httpContext = $this->httpContext;
                 if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
                     $this->debug("......sending cookies: {$cookies}");
                     $httpContext['http']['header'] .= 'Cookie: ' . $cookies . "\r\n";
                 }
                 if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {
                     $this->debug('Received response');
                     // get status code
                     if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\\d+\\.\\d+\\s+(\\d+)!', trim($http_response_header[0]), $match)) {
                         $this->debug('Error: no status code found');
                         // TODO: handle error - no status code
                     } else {
                         $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);
                         $this->requests[$orig]['body'] = $html;
                         $this->requests[$orig]['effective_url'] = $req_url;
                         $this->requests[$orig]['status_code'] = $status_code = (int) $match[1];
                         unset($match);
                         // handle redirect
                         if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) {
                             $this->requests[$orig]['location'] = trim($match[1]);
                         }
                         if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
                             $redirectURL = $this->requests[$orig]['location'];
                             if (!preg_match('!^https?://!i', $redirectURL)) {
                                 $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
                             }
                             if ($this->validateURL($redirectURL)) {
                                 $this->debug('Redirect detected. Valid URL: ' . $redirectURL);
                                 // store any cookies
                                 $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
                                 if (!empty($cookies)) {
                                     $this->cookieJar->storeCookies($url, $cookies);
                                 }
                                 $this->redirectQueue[$orig] = $redirectURL;
                             } else {
                                 $this->debug('Redirect detected. Invalid URL: ' . $redirectURL);
                             }
                         }
                     }
                 } else {
                     $this->debug('Error retrieving URL');
                     //print_r($req_url);
                     //print_r($http_response_header);
                     //print_r($html);
                     // TODO: handle error - failed to retrieve URL
                 }
             }
         }
     }
 }
Ejemplo n.º 2
0
 /**
  * @return void
  */
 public function __destruct()
 {
     unset($this->group);
     parent::__destruct();
 }
Ejemplo n.º 3
0
 /**
  * @param string $output received page body
  * @param array $info holds various information about response such as HTTP response code, content type, time taken to make request etc.
  * @param RollingCurlRequest $request request used
  * @return void
  */
 protected function process($output, $info, $request)
 {
     if ($request instanceof RollingCurlGroupRequest) {
         $request->process($output, $info);
     }
     if (is_callable($this->group_callback)) {
         call_user_func($this->group_callback, $output, $info, $request);
     }
 }