public function fetchAllOnce(array $urls, $isRedirect = false) { if (!$isRedirect) { $urls = array_unique($urls); } if (empty($urls)) { return; } ////////////////////////////////////////////////////// // parallel (HttpRequestPool) if ($this->method == self::METHOD_REQUEST_POOL) { $this->debug('Starting parallel fetch (HttpRequestPool)'); try { while (count($urls) > 0) { $this->debug('Processing set of ' . min($this->maxParallelRequests, count($urls))); $subset = array_splice($urls, 0, $this->maxParallelRequests); $pool = new HttpRequestPool(); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } unset($this->redirectQueue[$orig]); $this->debug("...{$url}"); if (!$isRedirect && isset($this->requests[$url])) { $this->debug("......in memory"); /* } elseif ($this->isCached($url)) { $this->debug("......is cached"); if (!$this->minimiseMemoryUse) { $this->requests[$url] = $this->getCached($url); } */ } else { $this->debug("......adding to pool"); $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url; $req_url = $this->removeFragment($req_url); $httpRequest = new HttpRequest($req_url, HttpRequest::METH_GET, $this->requestOptions); // send cookies, if we have any if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { $this->debug("......sending cookies: {$cookies}"); $httpRequest->addHeaders(array('Cookie' => $cookies)); } $this->requests[$orig] = array('headers' => null, 'body' => null, 'httpRequest' => $httpRequest); $this->requests[$orig]['original_url'] = $orig; $pool->attach($httpRequest); } } // did we get anything into the pool? if (count($pool) > 0) { $this->debug('Sending request...'); try { $pool->send(); } catch (HttpRequestPoolException $e) { // do nothing } $this->debug('Received responses'); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } //if (!isset($this->requests[$url]['fromCache'])) { $request = $this->requests[$orig]['httpRequest']; //$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader()); // getResponseHeader() doesn't return status line, so, for consistency... $this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size')); $this->requests[$orig]['body'] = $request->getResponseBody(); $this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url'); $this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode(); // is redirect? if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) { $redirectURL = $request->getResponseHeader('location'); if (!preg_match('!^https?://!i', $redirectURL)) { $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); } if ($this->validateURL($redirectURL)) { $this->debug('Redirect detected. Valid URL: ' . $redirectURL); // store any cookies $cookies = $request->getResponseHeader('set-cookie'); if ($cookies && !is_array($cookies)) { $cookies = array($cookies); } if ($cookies) { $this->cookieJar->storeCookies($url, $cookies); } $this->redirectQueue[$orig] = $redirectURL; } else { $this->debug('Redirect detected. Invalid URL: ' . $redirectURL); } } //die($url.' -multi- '.$request->getResponseInfo('effective_url')); $pool->detach($request); unset($this->requests[$orig]['httpRequest'], $request); /* if ($this->minimiseMemoryUse) { if ($this->cache($url)) { unset($this->requests[$url]); } } */ //} } } } } catch (HttpException $e) { $this->debug($e); return false; } } elseif ($this->method == self::METHOD_CURL_MULTI) { $this->debug('Starting parallel fetch (curl_multi_*)'); while (count($urls) > 0) { $this->debug('Processing set of ' . min($this->maxParallelRequests, count($urls))); $subset = array_splice($urls, 0, $this->maxParallelRequests); $pool = new RollingCurl(array($this, 'handleCurlResponse')); $pool->window_size = count($subset); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } unset($this->redirectQueue[$orig]); $this->debug("...{$url}"); if (!$isRedirect && isset($this->requests[$url])) { $this->debug("......in memory"); /* } elseif ($this->isCached($url)) { $this->debug("......is cached"); if (!$this->minimiseMemoryUse) { $this->requests[$url] = $this->getCached($url); } */ } else { $this->debug("......adding to pool"); $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url; $req_url = $this->removeFragment($req_url); $headers = array(); // send cookies, if we have any if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { $this->debug("......sending cookies: {$cookies}"); $headers[] = 'Cookie: ' . $cookies; } $httpRequest = new RollingCurlRequest($req_url, 'GET', null, $headers, array(CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], CURLOPT_TIMEOUT => $this->requestOptions['timeout'])); $httpRequest->set_original_url($orig); $this->requests[$orig] = array('headers' => null, 'body' => null, 'httpRequest' => $httpRequest); $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore? $pool->add($httpRequest); } } // did we get anything into the pool? if (count($pool) > 0) { $this->debug('Sending request...'); $pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig] $this->debug('Received responses'); foreach ($subset as $orig => $url) { if (!$isRedirect) { $orig = $url; } // $this->requests[$orig]['headers'] // $this->requests[$orig]['body'] // $this->requests[$orig]['effective_url'] $status_code = $this->requests[$orig]['status_code']; if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { $redirectURL = $this->requests[$orig]['location']; if (!preg_match('!^https?://!i', $redirectURL)) { $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); } if ($this->validateURL($redirectURL)) { $this->debug('Redirect detected. Valid URL: ' . $redirectURL); // store any cookies $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); if (!empty($cookies)) { $this->cookieJar->storeCookies($url, $cookies); } $this->redirectQueue[$orig] = $redirectURL; } else { $this->debug('Redirect detected. Invalid URL: ' . $redirectURL); } } // die($url.' -multi- '.$request->getResponseInfo('effective_url')); unset($this->requests[$orig]['httpRequest']); } } } } else { $this->debug('Starting sequential fetch (file_get_contents)'); $this->debug('Processing set of ' . count($urls)); foreach ($urls as $orig => $url) { if (!$isRedirect) { $orig = $url; } unset($this->redirectQueue[$orig]); $this->debug("...{$url}"); if (!$isRedirect && isset($this->requests[$url])) { $this->debug("......in memory"); /* } elseif ($this->isCached($url)) { $this->debug("......is cached"); if (!$this->minimiseMemoryUse) { $this->requests[$url] = $this->getCached($url); } */ } else { $this->debug("Sending request for {$url}"); $this->requests[$orig]['original_url'] = $orig; $req_url = $this->rewriteHashbangFragment ? $this->rewriteHashbangFragment($url) : $url; $req_url = $this->removeFragment($req_url); // send cookies, if we have any $httpContext = $this->httpContext; if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) { $this->debug("......sending cookies: {$cookies}"); $httpContext['http']['header'] .= 'Cookie: ' . $cookies . "\r\n"; } if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) { $this->debug('Received response'); // get status code if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\\d+\\.\\d+\\s+(\\d+)!', trim($http_response_header[0]), $match)) { $this->debug('Error: no status code found'); // TODO: handle error - no status code } else { $this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false); $this->requests[$orig]['body'] = $html; $this->requests[$orig]['effective_url'] = $req_url; $this->requests[$orig]['status_code'] = $status_code = (int) $match[1]; unset($match); // handle redirect if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) { $this->requests[$orig]['location'] = trim($match[1]); } if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) { $redirectURL = $this->requests[$orig]['location']; if (!preg_match('!^https?://!i', $redirectURL)) { $redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url); } if ($this->validateURL($redirectURL)) { $this->debug('Redirect detected. Valid URL: ' . $redirectURL); // store any cookies $cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']); if (!empty($cookies)) { $this->cookieJar->storeCookies($url, $cookies); } $this->redirectQueue[$orig] = $redirectURL; } else { $this->debug('Redirect detected. Invalid URL: ' . $redirectURL); } } } } else { $this->debug('Error retrieving URL'); //print_r($req_url); //print_r($http_response_header); //print_r($html); // TODO: handle error - failed to retrieve URL } } } } }
/** * @return void */ public function __destruct() { unset($this->group); parent::__destruct(); }
/** * @param string $output received page body * @param array $info holds various information about response such as HTTP response code, content type, time taken to make request etc. * @param RollingCurlRequest $request request used * @return void */ protected function process($output, $info, $request) { if ($request instanceof RollingCurlGroupRequest) { $request->process($output, $info); } if (is_callable($this->group_callback)) { call_user_func($this->group_callback, $output, $info, $request); } }