public function next() { if (count($this->responseCache) == 0) { $urls = $this->pageContainer->pop($this->parallelReqeusts); if (empty($urls)) { return false; } $requests = array(); foreach ($urls as $url) { if (!$this->isFiltered($url)) { $request = RequestFactory::getRequest($url, 'GET', 'php://memory', [], []); $requests[] = $request; } } if (empty($requests)) { return $this->next(); } try { $this->responseCache = $this->httpClient->sendRequests($requests); } catch (MultiHttpAdapterException $e) { $exceptions = $e->getExceptions(); $errorMessages = ""; foreach ($exceptions as $exception) { // @fixme this must be part of the http client $message = $exception->getMessage(); if (strpos($message, "An error occurred when fetching the URI") === 0) { $url = substr($message, "41", strpos($message, '"', 41) - 41); if (strpos($url, '/') === 0) { $this->pageContainer->push(new Uri($this->startUri->getScheme() . '://' . $this->startUri->getHost() . $url)); } } else { $errorMessages .= $exception->getMessage() . "\n"; } } if ($errorMessages != "") { throw new \RuntimeException($errorMessages); } } } if (empty($this->responseCache)) { return $this->next(); } $response = array_pop($this->responseCache); if ($response->hasHeader('Content-Type')) { $contentTypeElements = explode(';', $response->getHeader('Content-Type')[0]); $contentType = array_shift($contentTypeElements); if ($contentType === "text/html") { $document = new Document((string) $response->getBody(), true); $elements = $document->getUnorderedDependencies($response->getUri()); foreach ($elements as $element) { $urlString = $this->createCleanUriString($element); if (!array_key_exists($urlString, $this->comingFrom)) { $this->comingFrom[$urlString] = $response->getUri(); } $this->pageContainer->push($element); } } } return $response; }
public function next() { if (empty($this->systems)) { return false; } $system = array_pop($this->systems); $request = RequestFactory::getRequest(new Uri($system['url']), 'GET', 'php://memory', ['Accept-Encoding' => 'gzip', 'Connection' => 'keep-alive']); $responses = $this->client->sendRequests(array($request)); return $responses[0]; }
/** * This function return a http client. * * @throws \Ivory\HttpAdapter\HttpAdapterException * * @return \Ivory\HttpAdapter\HttpAdapterInterface */ protected function getHttpClient() { $eventDispatcher = new EventDispatcher(); $eventDispatcher->addSubscriber(new RedirectSubscriber()); $eventDispatcher->addSubscriber(new RetrySubscriber()); // $guessedAdapter = HttpAdapterFactory::guess(); /** @var \Ivory\HttpAdapter\Guzzle6HttpAdapter $guessedAdapter */ $guessedAdapter = new CurlHttpAdapter(); RequestFactory::addStandardHeader('Accept-Encoding', 'gzip'); RequestFactory::addStandardHeader('Connection', 'keep-alive'); $adapter = new EventDispatcherHttpAdapter($guessedAdapter, $eventDispatcher); $adapter->getConfiguration()->setTimeout(30); //$adapter->getConfiguration()->setUserAgent('versioneye-php'); $adapter->getConfiguration()->setMessageFactory(new MessageFactory()); return $adapter; }
public function next() { if (empty($this->urlStack)) { return false; } $url = array_pop($this->urlStack); $request = RequestFactory::getRequest(new Uri($url['url']), 'GET', 'php://memory', ['Accept-Encoding' => 'gzip', 'Connection' => 'keep-alive']); try { $responses = $this->httpClient->sendRequests(array($request)); } catch (MultiHttpAdapterException $e) { $exceptions = $e->getExceptions(); $errorMessages = ''; foreach ($exceptions as $exception) { // @fixme this must be part of the http client $message = $exception->getMessage(); if (strpos($message, 'An error occurred when fetching the URI') === 0) { $corruptUrl = substr($message, '41', strpos($message, '"', 41) - 41); if (strpos($corruptUrl, '/') === 0) { /* @var \Ivory\HttpAdapter\HttpAdapterException $exception */ $mainUri = $request->getUri(); $this->redirects[(string) $mainUri->getScheme() . '://' . $mainUri->getHost() . $corruptUrl] = (string) $mainUri; $this->urls[] = ['url' => $mainUri->getScheme() . '://' . $mainUri->getHost() . $corruptUrl, 'system' => $url['system']]; $this->urlStack[] = ['url' => $mainUri->getScheme() . '://' . $mainUri->getHost() . $corruptUrl, 'system' => $url['system']]; return $this->next(); } // the error handling should be done withing the calling class echo "\n " . $exception->getMessage() . "\n"; return $this->next(); } else { $errorMessages .= $exception->getMessage() . "\n"; } } if ($errorMessages !== '') { throw new \RuntimeException($errorMessages); } } return $responses[0]; }