/** * Get site (old or new) from request * @param \RollingCurl\Request $request * @return mixed * @throws \Exception */ public function getSite(\RollingCurl\Request $request) { if (preg_match("/Site: ([\\w]+)/i", $request->getHeaders()[1], $out)) { return $out[1]; } throw new \Exception('Could not get site from response!'); }
public function execute(Request $old_request, Request $new_request) { $old_url = parse_url($old_request->getUrl()); $new_url = parse_url($new_request->getUrl()); $old_robots = Xbb_RobotsTxt::getInstance($old_url["scheme"] . '://' . $old_url["host"]); $new_robots = Xbb_RobotsTxt::getInstance($new_url["scheme"] . '://' . $new_url["host"]); $this->_columns[$this->_filter->getName() . '_old'] = (int) $old_robots->allow($old_request->getUrl()); $this->_columns[$this->_filter->getName() . '_new'] = (int) $new_robots->allow($new_request->getUrl()); return $this->_columns; }
/** * Helper function to gather all the curl options: global, inferred, and per request * * @param Request $request * @return array */ private function prepareRequestOptions(Request $request) { // options for this entire curl object $options = $this->getOptions(); // set the request URL $options[CURLOPT_URL] = $request->getUrl(); // set the request method $options[CURLOPT_CUSTOMREQUEST] = $request->getMethod(); // posting data w/ this request? if ($request->getPostData()) { $options[CURLOPT_POST] = 1; $options[CURLOPT_POSTFIELDS] = $request->getPostData(); } // if the request has headers, use those, or if there are global headers, use those if ($request->getHeaders()) { $options[CURLOPT_HEADER] = 0; $options[CURLOPT_HTTPHEADER] = $request->getHeaders(); } elseif ($this->getHeaders()) { $options[CURLOPT_HEADER] = 0; $options[CURLOPT_HTTPHEADER] = $this->getHeaders(); } // if the request has options set, use those and have them take precedence if ($request->getOptions()) { $options = $request->getOptions() + $options; } return $options; }
/** * gets response header value. * @param Request $request curl request * @param string $key header hey. * @return string header value. */ public function responseHeader(Request $request, $key) { $info = $request->getResponseInfo(); return @$info[$key]; }
public function filter(Request $data) { preg_match($this->__regexp, $data->getResponseText(), $match); return !empty($match[1]) ? $match[1] : NULL; }
public function execute(Request $old_request, Request $new_request) { $this->_columns[$this->_filter->getName() . '_old'] = $old_request->getResponseInfo()["http_code"]; $this->_columns[$this->_filter->getName() . '_new'] = $new_request->getResponseInfo()["http_code"]; return $this->_columns; }
/** * The RollingCurl callback function * * @param Request $request The request object * @param RollingCurl $rolling_curl The current RollingCurl object * @return void */ public function theCallback(Request $request, RollingCurl $rollingCurl) { // dd($request->getResponseInfo()); $url = $request->getUrl(); $html = $request->getResponseText(); $httpCode = array_get($request->getResponseInfo(), 'http_code'); // Add URL to index (or update count) $this->getWebCache()->add($url); if ($httpCode >= 200 && $httpCode < 400 && !empty($html)) { $matches = []; // Parse - Links $this->getParser()->parseLinks($request, $rollingCurl); // Parse - Emails // $this->parseEmails($html); // Garbage collect unset($html); } $this->crawlUrls(); // dd($this->getWebCache()->all()); // return $newLinks; }
/** * Sends rolling curl multirequest. * @param array $data curl request data as [url, url, ...] or [url=>['post'=>[...], 'files'=>[...]]] * @param \Closure $callback callback at response. */ public function multiRequest($data, $callback) { $rollingCurl = new RollingCurl(); foreach ($data as $url => $options) { if (is_string($options)) { $url = $options; } if ($this->unique && in_array($url, $this->visited) && !@$options['post']) { continue; } else { $this->visited[] = $url; } $method = isset($options['post']) ? 'POST' : 'GET'; $request = new Request($url, $method); $curlOptions = $this->getCurlOptions($url, @$options['post'], @$options['files']); $request->setOptions($curlOptions); $rollingCurl->add($request); } $rollingCurl->setCallback($callback)->execute(); }