Example #1
0
 /**
  * The RollingCurl callback function
  *
  * @param  Request     $request      The request object
  * @param  RollingCurl $rolling_curl The current RollingCurl object
  * @return void
  */
 public function callback(Request $request, RollingCurl $rollingCurl)
 {
     $this->getIndex()->cacheUnsetRequest($request->getUrl());
     $httpCode = array_get($request->getResponseInfo(), 'http_code', false);
     $this->getIndex()->add($request->getUrl(), ['last_http_code' => $httpCode]);
     if ($httpCode == 200) {
         $newLinks = $this->getParser()->parseHtml($request);
         if (is_array($newLinks) && count($newLinks) > 0) {
             $this->addRequests($newLinks);
         }
     }
     // Logging
     $this->logCallback($request, $httpCode);
     // Garbage collect
     unset($request, $httpCode, $newLinks);
     $this->clearCompleted();
     $this->prunePendingRequestQueue();
     $this->crawlUrls();
 }
Example #2
0
 /**
  * Process the returned HTML with our parsers
  *
  * @param  Request     $request
  * @param  RollingCurl $rolling_curl
  * @return void
  */
 public function parseHtml(Request $request)
 {
     $html = $request->getResponseText();
     $url = $request->getUrl();
     $httpCode = array_get($request->getResponseInfo(), 'http_code', false);
     // For checking if $rollingCurl is keeping the same instance
     // $rollingCurl->log('<span style="color:#ccc;"><strong>Code:</strong> ' . $httpCode . ' <strong>URL:</strong> #' . $rollingCurl->countCompleted() . ' - ' . $request->getUrl() . '</span><br />');
     $newLinks = [];
     if ($httpCode >= 200 and $httpCode < 400 and !empty($html)) {
         // Parse - Links
         $newLinks = $this->parseNewLinks($html);
         // $this->parseNewLinks($html);
         // Parse - Emails
         $this->getEmails()->run($html);
         // Garbage collect
         unset($html, $url, $httpCode);
         // if (is_array($newLinks) && count($newLinks) > 0) {
         //     // dd($newLinks);
         //     $rollingCurl->addRequests($newLinks);
         // }
     }
     return $newLinks;
 }