Esempio n. 1
0
 public function next()
 {
     if (count($this->responseCache) == 0) {
         $urls = $this->pageContainer->pop($this->parallelReqeusts);
         if (empty($urls)) {
             return false;
         }
         $requests = array();
         foreach ($urls as $url) {
             if (!$this->isFiltered($url)) {
                 $request = RequestFactory::getRequest($url, 'GET', 'php://memory', [], []);
                 $requests[] = $request;
             }
         }
         if (empty($requests)) {
             return $this->next();
         }
         try {
             $this->responseCache = $this->httpClient->sendRequests($requests);
         } catch (MultiHttpAdapterException $e) {
             $exceptions = $e->getExceptions();
             $errorMessages = "";
             foreach ($exceptions as $exception) {
                 // @fixme this must be part of the http client
                 $message = $exception->getMessage();
                 if (strpos($message, "An error occurred when fetching the URI") === 0) {
                     $url = substr($message, "41", strpos($message, '"', 41) - 41);
                     if (strpos($url, '/') === 0) {
                         $this->pageContainer->push(new Uri($this->startUri->getScheme() . '://' . $this->startUri->getHost() . $url));
                     }
                 } else {
                     $errorMessages .= $exception->getMessage() . "\n";
                 }
             }
             if ($errorMessages != "") {
                 throw new \RuntimeException($errorMessages);
             }
         }
     }
     if (empty($this->responseCache)) {
         return $this->next();
     }
     $response = array_pop($this->responseCache);
     if ($response->hasHeader('Content-Type')) {
         $contentTypeElements = explode(';', $response->getHeader('Content-Type')[0]);
         $contentType = array_shift($contentTypeElements);
         if ($contentType === "text/html") {
             $document = new Document((string) $response->getBody(), true);
             $elements = $document->getUnorderedDependencies($response->getUri());
             foreach ($elements as $element) {
                 $urlString = $this->createCleanUriString($element);
                 if (!array_key_exists($urlString, $this->comingFrom)) {
                     $this->comingFrom[$urlString] = $response->getUri();
                 }
                 $this->pageContainer->push($element);
             }
         }
     }
     return $response;
 }
Esempio n. 2
0
 public function next()
 {
     if (empty($this->systems)) {
         return false;
     }
     $system = array_pop($this->systems);
     $request = RequestFactory::getRequest(new Uri($system['url']), 'GET', 'php://memory', ['Accept-Encoding' => 'gzip', 'Connection' => 'keep-alive']);
     $responses = $this->client->sendRequests(array($request));
     return $responses[0];
 }
Esempio n. 3
0
 /**
  * This function return a http client.
  *
  * @throws \Ivory\HttpAdapter\HttpAdapterException
  *
  * @return \Ivory\HttpAdapter\HttpAdapterInterface
  */
 protected function getHttpClient()
 {
     $eventDispatcher = new EventDispatcher();
     $eventDispatcher->addSubscriber(new RedirectSubscriber());
     $eventDispatcher->addSubscriber(new RetrySubscriber());
     // $guessedAdapter = HttpAdapterFactory::guess();
     /** @var \Ivory\HttpAdapter\Guzzle6HttpAdapter $guessedAdapter */
     $guessedAdapter = new CurlHttpAdapter();
     RequestFactory::addStandardHeader('Accept-Encoding', 'gzip');
     RequestFactory::addStandardHeader('Connection', 'keep-alive');
     $adapter = new EventDispatcherHttpAdapter($guessedAdapter, $eventDispatcher);
     $adapter->getConfiguration()->setTimeout(30);
     //$adapter->getConfiguration()->setUserAgent('versioneye-php');
     $adapter->getConfiguration()->setMessageFactory(new MessageFactory());
     return $adapter;
 }
Esempio n. 4
0
 public function next()
 {
     if (empty($this->urlStack)) {
         return false;
     }
     $url = array_pop($this->urlStack);
     $request = RequestFactory::getRequest(new Uri($url['url']), 'GET', 'php://memory', ['Accept-Encoding' => 'gzip', 'Connection' => 'keep-alive']);
     try {
         $responses = $this->httpClient->sendRequests(array($request));
     } catch (MultiHttpAdapterException $e) {
         $exceptions = $e->getExceptions();
         $errorMessages = '';
         foreach ($exceptions as $exception) {
             // @fixme this must be part of the http client
             $message = $exception->getMessage();
             if (strpos($message, 'An error occurred when fetching the URI') === 0) {
                 $corruptUrl = substr($message, '41', strpos($message, '"', 41) - 41);
                 if (strpos($corruptUrl, '/') === 0) {
                     /* @var \Ivory\HttpAdapter\HttpAdapterException $exception */
                     $mainUri = $request->getUri();
                     $this->redirects[(string) $mainUri->getScheme() . '://' . $mainUri->getHost() . $corruptUrl] = (string) $mainUri;
                     $this->urls[] = ['url' => $mainUri->getScheme() . '://' . $mainUri->getHost() . $corruptUrl, 'system' => $url['system']];
                     $this->urlStack[] = ['url' => $mainUri->getScheme() . '://' . $mainUri->getHost() . $corruptUrl, 'system' => $url['system']];
                     return $this->next();
                 }
                 // the error handling should be done withing the calling class
                 echo "\n   " . $exception->getMessage() . "\n";
                 return $this->next();
             } else {
                 $errorMessages .= $exception->getMessage() . "\n";
             }
         }
         if ($errorMessages !== '') {
             throw new \RuntimeException($errorMessages);
         }
     }
     return $responses[0];
 }