Esempio n. 1
0
 /**
  * Get site (old or new) from request
  * @param \RollingCurl\Request $request
  * @return mixed
  * @throws \Exception
  */
 public function getSite(\RollingCurl\Request $request)
 {
     if (preg_match("/Site: ([\\w]+)/i", $request->getHeaders()[1], $out)) {
         return $out[1];
     }
     throw new \Exception('Could not get site from response!');
 }
Esempio n. 2
0
 public function execute(Request $old_request, Request $new_request)
 {
     $old_url = parse_url($old_request->getUrl());
     $new_url = parse_url($new_request->getUrl());
     $old_robots = Xbb_RobotsTxt::getInstance($old_url["scheme"] . '://' . $old_url["host"]);
     $new_robots = Xbb_RobotsTxt::getInstance($new_url["scheme"] . '://' . $new_url["host"]);
     $this->_columns[$this->_filter->getName() . '_old'] = (int) $old_robots->allow($old_request->getUrl());
     $this->_columns[$this->_filter->getName() . '_new'] = (int) $new_robots->allow($new_request->getUrl());
     return $this->_columns;
 }
 /**
  * Helper function to gather all the curl options: global, inferred, and per request
  *
  * @param Request $request
  * @return array
  */
 private function prepareRequestOptions(Request $request)
 {
     // options for this entire curl object
     $options = $this->getOptions();
     // set the request URL
     $options[CURLOPT_URL] = $request->getUrl();
     // set the request method
     $options[CURLOPT_CUSTOMREQUEST] = $request->getMethod();
     // posting data w/ this request?
     if ($request->getPostData()) {
         $options[CURLOPT_POST] = 1;
         $options[CURLOPT_POSTFIELDS] = $request->getPostData();
     }
     // if the request has headers, use those, or if there are global headers, use those
     if ($request->getHeaders()) {
         $options[CURLOPT_HEADER] = 0;
         $options[CURLOPT_HTTPHEADER] = $request->getHeaders();
     } elseif ($this->getHeaders()) {
         $options[CURLOPT_HEADER] = 0;
         $options[CURLOPT_HTTPHEADER] = $this->getHeaders();
     }
     // if the request has options set, use those and have them take precedence
     if ($request->getOptions()) {
         $options = $request->getOptions() + $options;
     }
     return $options;
 }
 /**
  * gets response header value.
  * @param Request $request curl request
  * @param string $key header hey.
  * @return string header value.
  */
 public function responseHeader(Request $request, $key)
 {
     $info = $request->getResponseInfo();
     return @$info[$key];
 }
Esempio n. 5
0
 public function filter(Request $data)
 {
     preg_match($this->__regexp, $data->getResponseText(), $match);
     return !empty($match[1]) ? $match[1] : NULL;
 }
Esempio n. 6
0
 public function execute(Request $old_request, Request $new_request)
 {
     $this->_columns[$this->_filter->getName() . '_old'] = $old_request->getResponseInfo()["http_code"];
     $this->_columns[$this->_filter->getName() . '_new'] = $new_request->getResponseInfo()["http_code"];
     return $this->_columns;
 }
Esempio n. 7
0
 /**
  * The RollingCurl callback function
  *
  * @param  Request     $request      The request object
  * @param  RollingCurl $rolling_curl The current RollingCurl object
  * @return void
  */
 public function theCallback(Request $request, RollingCurl $rollingCurl)
 {
     // dd($request->getResponseInfo());
     $url = $request->getUrl();
     $html = $request->getResponseText();
     $httpCode = array_get($request->getResponseInfo(), 'http_code');
     // Add URL to index (or update count)
     $this->getWebCache()->add($url);
     if ($httpCode >= 200 && $httpCode < 400 && !empty($html)) {
         $matches = [];
         // Parse - Links
         $this->getParser()->parseLinks($request, $rollingCurl);
         // Parse - Emails
         // $this->parseEmails($html);
         // Garbage collect
         unset($html);
     }
     $this->crawlUrls();
     // dd($this->getWebCache()->all());
     // return $newLinks;
 }
Esempio n. 8
0
 /**
  * Sends rolling curl multirequest.
  * @param array $data curl request data as [url, url, ...] or [url=>['post'=>[...], 'files'=>[...]]]
  * @param \Closure $callback callback at response.
  */
 public function multiRequest($data, $callback)
 {
     $rollingCurl = new RollingCurl();
     foreach ($data as $url => $options) {
         if (is_string($options)) {
             $url = $options;
         }
         if ($this->unique && in_array($url, $this->visited) && !@$options['post']) {
             continue;
         } else {
             $this->visited[] = $url;
         }
         $method = isset($options['post']) ? 'POST' : 'GET';
         $request = new Request($url, $method);
         $curlOptions = $this->getCurlOptions($url, @$options['post'], @$options['files']);
         $request->setOptions($curlOptions);
         $rollingCurl->add($request);
     }
     $rollingCurl->setCallback($callback)->execute();
 }