/** * {@inheritdoc} * @param Proxy $proxy * * @return mixed|void * @throws \Exception */ public function setProxy(Proxy $proxy) { $sessionName = md5($proxy->getUrl()); $this->browser->resetSessions(); if ($this->browser->hasSession($sessionName)) { $this->browser->setDefaultSessionName($sessionName); return; } $driver = $this->browser->getSession()->getDriver(); switch (get_class($driver)) { case 'Behat\\Mink\\Driver\\GoutteDriver': /* @var $driver GoutteDriver */ $client = new Client(); $guzzle = $client->getClient(); $client->setClient($guzzle); $guzzle->setDefaultOption('proxy', $proxy->getUrl()); $goutteDriver = new GoutteDriver($client); $this->browser->registerSession($sessionName, new Session($goutteDriver)); $this->browser->setDefaultSessionName($sessionName); break; default: throw new \Exception('Error : Proxy configuration is not implemented for class ' . get_class($driver) . ''); } }
private function setUpClient() { $this->client = new Client(); $this->client->setHeader('User-Agent', $this->user_agent); $this->client->setHeader('Accept-Language', 'en-gb'); $this->client->setClient(new \GuzzleHttp\Client(['allow_redirects' => false, 'cookies' => true, 'verify' => false, 'proxy' => ['http' => $this->proxy]])); }
public function setupClient($clientClass, $httpClientClass) { if (is_null($this->client)) { $this->client = new $clientClass(); /** * @var \GuzzleHttp\Client $httpClientClass */ $httpClient = new $httpClientClass([RequestOptions::VERIFY => false, RequestOptions::TIMEOUT => 1800]); $this->client->setClient($httpClient); } }
/** * Construct function. * * @param string $username Matricula * @param string $password User's password * @param bool $is_access_key Whether use access code */ public function __construct($username = null, $password = null, $is_access_key = false) { if ($username && $password) { $this->username = $username; $this->password = $password; $this->is_access_key = $is_access_key; } // Goutte client $this->client = new Client(); // Create and use a guzzle client instance that will time out after 10 seconds $guzzle_client = new \GuzzleHttp\Client(['timeout' => 10, 'connect_timeout' => 10]); $this->client->setClient($guzzle_client); }
private function useProxyIfAvailable(Client $client) { if (defined('SS_OUTBOUND_PROXY') && defined('SS_OUTBOUND_PROXY_PORT')) { $guzzleClient = new GuzzleClient('', array('request.options' => array('proxy' => 'tcp://' . SS_OUTBOUND_PROXY . ':' . SS_OUTBOUND_PROXY_PORT))); $client->setClient($guzzleClient); } }
public function setupClient() { $clientClass = $this->clientClass; $this->httpClient = new $clientClass(); $httpClientClass = $this->httpClientClass; $httpClient = new $httpClientClass([RequestOptions::VERIFY => false, RequestOptions::TIMEOUT => 1800]); $this->httpClient->setClient($httpClient); }
/** * @return GouteClient */ public function getClient() { if (!$this->client) { $this->client = new GouteClient(); $this->client->setClient(new GuzzleClient(['verify' => false])); } return $this->client; }
public function testImage() { $client = new Client(); $client->setClient($this->getGuzzle(require __DIR__ . '/response/SoundCloud.php')); $ripple = new Ripple(self::URL_TRACK); $ripple->request($client); $this->assertSame('soundcloud_thumbnail.jpg', $ripple->image()); }
/** * @param string $file * @param string $url * @param string $embed * @dataProvider embedProvider */ public function testEmbed($file, $url, $embed) { $client = new Client(); $client->setClient($this->getGuzzle(require __DIR__ . "/response/{$file}.php")); $ripple = new Ripple($url); $ripple->request($client); $this->assertSame($embed, $ripple->embed()); }
public function testImage() { $client = new Client(); $client->setClient($this->getGuzzle(require __DIR__ . '/response/Vimeo.php')); $ripple = new Ripple(self::URL_TRACK . static::id()); $ripple->request($client); $this->assertSame('vimeo_thumbnail.jpg', $ripple->image()); }
public function actionIndex() { /** @var $entity RivegaucheLink */ $entity = new RivegaucheLink(); $offset = 0; do { $links = $entity->getLinks($offset, 5); if (!empty($links)) { foreach ($links as $link) { \Yii::info(sprintf('Обрабатываем: %s ', $link['link']), 'cron'); $client = new Client(); $guzzle = $client->getClient(); $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_PROXY, 'http://141.101.118.147:80'); //$client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 10); $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_CONNECTTIMEOUT, 10); $client->setClient($guzzle); /*$guzzle = $client->getClient(); $guzzle->setDefaultOption('timeout', 10); $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT_MS, 100); $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_CONNECTTIMEOUT, 5); $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_RETURNTRANSFER, true); $client->setClient($guzzle);*/ $crawler = $client->request('GET', $link['link']); \Yii::info(sprintf('Извлекаем тело: %s ', $link['link']), 'cron'); $head = $this->getHtml($crawler, true); \Yii::info(sprintf('HEAD тело: %s ', $link['link']), 'cron'); if (!empty($head['links'])) { foreach ($head['links'] as $l) { $crawler = $client->request('GET', $l); $subHead = $this->getHtml($crawler, false); $subHead['link'] = $l; $this->saveResult($subHead, $link); } } if (empty($head['title'])) { $head = $this->getPromoHTML($crawler, true); } if (empty($head['title'])) { $head = $this->getPromo2HTML($crawler, true); } $head['link'] = $link['link']; $this->saveResult($head, $link); unset($node); unset($subNode); unset($head); } $z = 1; $offset += 5; unset($links); unset($client); } else { $z = 0; } } while ($z > 0); return 0; }
public function cinemex() { $client = new Client(); //$client->getClient()->setDefaultOption('config/curl/' . CURLOPT_SSL_VERIFYPEER, false); $guzzleClient = new \GuzzleHttp\Client(array('curl' => array(CURLOPT_SSL_VERIFYPEER => false))); $client->setClient($guzzleClient); $crawler = $client->request('POST', 'http://www.cinepolis.com/pelicula/deadpool'); dd($crawler->html()); }
public function setUp() { $this->guzzleMockPlugin = new MockPlugin(); $guzzleClient = new GuzzleClient(); $guzzleClient->addSubscriber($this->guzzleMockPlugin); $goutte = new Client(); $goutte->setClient($guzzleClient); $this->api = new Api($goutte); }
private function scrapeIcons() { $guzzleClient = new \GuzzleHttp\Client(array('timeout' => $this->timeout, 'verify' => false)); $client = new Client(); $client->setClient($guzzleClient); $crawler = $client->request('GET', $this->url); $link_tags = $crawler->filter("link"); $icons = array(); $url_array =& $this->url_array; $startsWidth = function ($haystack, $needle) { return $needle === "" || strrpos($haystack, $needle, -strlen($haystack)) !== false; }; $link_tags->each(function (Crawler $crawler) use(&$icons, &$url_array, &$startsWidth) { $node = $crawler->getNode(0); $has_attribute_rel = $node->hasAttribute("rel"); $has_attribute_href = $node->hasAttribute("href"); $icon = false; if ($has_attribute_rel && $has_attribute_href) { $attribute_rel = $node->getAttribute("rel"); $attribute_href = $node->getAttribute("href"); $has_attribute_sizes = $node->hasAttribute("sizes"); switch ($attribute_rel) { case IconType::APPLE_TOUCH: case IconType::SHORTCUT_ICON: case IconType::FAVICON: case IconType::APPLE_TOUCH_PRECOMPOSED: case IconType::ANDROID_TOUCH: $attribute_href_has_no_scheme = $startsWidth($attribute_href, "//"); $href = filter_var($attribute_href, FILTER_VALIDATE_URL) || $attribute_href_has_no_scheme ? $attribute_href : $url_array["scheme"] . "://" . $url_array["host"] . $attribute_href; if ($has_attribute_sizes) { $sizes = strtolower($node->getAttribute("sizes")); $sizes_array = explode("x", $sizes); $size_width = false; $size_height = false; if ($sizes_array && sizeof($sizes_array) == 2) { $size_width = intval($sizes_array[0]); $size_height = intval($sizes_array[1]); } $buffer = file_get_contents($attribute_href_has_no_scheme ? "http:" . $href : $href); $f = finfo_open(); $mime_type = finfo_buffer($f, $buffer, FILEINFO_MIME_TYPE); $b64image = "data:{$mime_type};base64," . base64_encode($buffer); $icon = new Icon($attribute_rel, $href, $sizes, $size_width, $size_height, $b64image); } else { $icon = new Icon($attribute_rel, $href); } break; } } if ($icon) { array_push($icons, $icon); } }); $this->icons = $icons; }
public function testGetProductDetails() { $expected = ["size" => "37.25kb", "title" => "Sainsbury's Avocado Ripe & Ready XL Loose 300g", "unit_price" => '1.50', "description" => "Avocados"]; $responses = [new GuzzleResponse(200, [], $this->htmlSainsburyProduct)]; $guzzle = $this->getGuzzle($responses); $client = new Client(); $client->setClient($guzzle); $scraper = new Scraper($client); $product = $scraper->getProductDetails('www.dummmy.com'); $this->assertEquals($expected, $product); }
/** * @using gutte proxy */ public function getGutte() { $client = new Client(); //Set proxy using tor $guzzleClient = new \GuzzleHttp\Client(['curl' => [CURLOPT_PROXY => '127.0.0.1:9050', CURLOPT_PROXYTYPE => CURLPROXY_SOCKS5]]); $client->setClient($guzzleClient); $crawler = $client->request('GET', 'http://188.166.243.11'); dd($crawler->html()); }
public function testCustomUserAgentConstructor() { $guzzle = $this->getGuzzle(); $client = new Client(['HTTP_HOST' => '1.2.3.4', 'HTTP_USER_AGENT' => 'SomeHost']); $client->setClient($guzzle); $client->request('GET', 'http://www.example.com/'); $this->assertEquals('SomeHost', end($this->history)['request']->getHeaderLine('User-Agent')); }
/** * Returns content of URL * * @param string $url Any valid URL * @param string $actionType "GET", "POST", any other... * @return null|\Symfony\Component\DomCrawler\Crawler */ protected function getContentOfUrl($url, $actionType = 'GET', $listenRobotsDotTxt = true) { if (!$url) { return null; } // Check if url is allowed if ($listenRobotsDotTxt && $this->robotsTxtContent) { $parser = new \RobotsTxtParser($this->robotsTxtContent); // $parser->setUserAgent('VeiktDotComBot'); // ??? if ($parser->isDisallowed($url)) { return null; } } $goutteClient = new GoutteClient(); $guzzleClient = new GuzzleClient(array('curl' => array(CURLOPT_TIMEOUT => $this::CURLOPT_TIMEOUT, CURLOPT_CONNECTTIMEOUT => $this::CURLOPT_CONNECTTIMEOUT))); $goutteClient->setClient($guzzleClient); $result = $goutteClient->request($actionType, $url); if (!$result) { return null; } return $result; }
/** * Set the Guzzle HTTP Client * * @param ClientInterface $client */ public function setHttpClient(ClientInterface $client) { $this->client->setClient($client); }
/** * create and configure goutte client used for scraping * @return GoutteClient */ protected function getScrapClient() { $client = new GoutteClient(); $client->followRedirects(); $guzzleClient = new \GuzzleHttp\Client(array('curl' => array(CURLOPT_SSL_VERIFYHOST => false, CURLOPT_SSL_VERIFYPEER => false))); $client->setClient($guzzleClient); return $client; }
/** * {@inheritdoc} * @param Proxy $proxy * * @return mixed|void * @throws \Exception */ public function setProxy(Proxy $proxy) { $sessionName = md5($proxy->getUrl()); $this->browser->resetSessions(); if ($this->browser->hasSession($sessionName)) { $this->browser->setDefaultSessionName($sessionName); return; } $driver = $this->browser->getSession()->getDriver(); switch (get_class($driver)) { case 'Behat\\Mink\\Driver\\GoutteDriver': /* @var $driver GoutteDriver */ $client = new Client(); $guzzle = $client->getClient(); $guzzle->setDefaultOption('proxy', $proxy->getUrl()); $client->setClient($guzzle); $goutteDriver = new GoutteDriver($client); $this->browser->registerSession($sessionName, new Session($goutteDriver)); $this->browser->setDefaultSessionName($sessionName); break; case 'Behat\\Mink\\Driver\\Selenium2Driver': /* @var $driver Selenium2Driver */ // Todo : use other files than pac file // Currently it does only support pac file if (empty($proxy->pacFile)) { throw new \Exception('Pac file/url is required.'); } // @see https://code.google.com/p/selenium/wiki/JsonWireProtocol#Proxy_JSON_Object $driver->setDesiredCapabilities(["proxy" => array("proxyType" => "pac", "proxyAutoconfigUrl" => $proxy->pacFile)]); $this->browser->stopSessions(); $this->browser->registerSession($sessionName, new Session($driver)); $this->browser->setDefaultSessionName($sessionName); break; default: throw new \Exception('Error : Proxy configuration is not implemented for class ' . get_class($driver) . ''); } }
/** * Sets the browser driver depending on the javascript select parameter or injected browser driver * @param Session $driver */ private function setBrowser(Session $driver = null) { if ($driver != null) { $this->browser = new Mink(['custom' => $driver]); $this->browser->setDefaultSessionName('custom'); return; } $client = new Client(); $guzzle = $client->getClient(); CacheSubscriber::attach($guzzle, []); $client->setClient($guzzle); // init Mink and register sessions $this->browser = new Mink(['goutte' => new Session(new GoutteDriver($client)), 'selenium2' => new Session(new Selenium2Driver('firefox', ["permissions.default.image" => 2]))]); if (!$this->javaScriptRequired) { $this->browser->setDefaultSessionName('goutte'); return; } $this->browser->setDefaultSessionName('selenium2'); }
/** * Display a listing of the resource. * * @return Response */ public function index() { $client = new Client(); $client->setClient(new \GuzzleHttp\Client([\GuzzleHttp\RequestOptions::VERIFY => base_path() . '/resources/cacert.pem'])); $crawler = $client->request('GET', 'http://www.google.pl/search?q=nepras.pl'); $linki = $crawler->filter('div.kv cite')->each(function (Crawler $node) { return $node->text(); }); return response()->json($linki); // return $searchImg; // // // // // // // $idcaptcha = $request->input('idcaptcha'); // $codecaptcha = $request->input('codecaptcha'); // // $client = new Client(); // $crawler; // // $client->setClient(new \GuzzleHttp\Client([ // \GuzzleHttp\RequestOptions::VERIFY => base_path().'/resources/cacert.pem', // ])); //// $client->setHeader('User-Agent', "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.130 Safari/537.36"); //// $client->setHeader('Host', "www.google.pl:443"); //// $client->setHeader('Accept', "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); //// $client->setHeader('Accept-Language', "pl-PL,pl;q=0.8,en-US;q=0.6,en;q=0.4"); //// $client->setHeader('Accept-Encoding', "gzip, deflate, sdch"); //// $client->setHeader('Accept-Charset', "ISO-8859-1,utf-8;q=0.7,*;q=0.7"); // $client->setHeader('Cookie', "CONSENT=YES+PL.pl+20150628-20-0; PREF=ID=1111111111111111:FF=0:LD=pl:TM=1432644702:LM=1437734772:V=1:S=RRpfqt2e_sxty4VY; OGPC=4061130-21:5061590-5:5061586-2:5061614-5:; GOOGLE_ABUSE_EXEMPTION=ID=627efac7f77e19da:TM=1439361413:C=c:IP=83.1.94.2-:S=APGng0tqPDgYuPgrDIvBAFw9FrxFpNr8GQ; NID=70=kcr5NSQqSueSiU4uDU7FcZk8cspS0WJNpkyGu6SacSsMvdKChiRkCWczVziWld9A7Hz0L6qqmn1qQj1rvFwux48LOrIdu7k_wCAkzOmdnRtspMAkSFCwIkWZVHJzB48P"); //// $client->setHeader('X-Client-Dat', "CJe2yQEIpLbJAQiptskBCMS2yQEI8YjKAQj9lcoB"); // // // // // if (!$idcaptcha || !$codecaptcha) // { // //for ($i=0 ; $i < 100 ; $i++) // //{ // //echo 't'; // $crawler = $client->request('GET', 'https://www.google.pl/search?q=sagrol'); // //$request->getHeaderLine('Cookie'); // var_dump($client->getCookieJar()); // //var_dump($client); // // //} // // // } else // { // echo $idcaptcha; // echo $codecaptcha; // $requestSearch = 'https%3A%2F%2Fwww.google.pl%2Fsearch%3Fq%3Dsagrol.pl'; // $urlGoogleCaptcha = 'https://www.google.pl/sorry/CaptchaRedirect?continue=' . // $requestSearch . '&id=' . // $idcaptcha . '&captcha=' . // $codecaptcha . '&submit=Prze%C5%9Blij'; // $crawler = $client->request('GET', $urlGoogleCaptcha . ' HTTP/1.1'); // echo '<br>' . $urlGoogleCaptcha; // echo '<br>' . $crawler->html(); // } // // // $linki = $crawler->filter('div.kv cite')->each(function (Crawler $node) { // return $node->text(); // }); // // if (!$linki) // { // $imgHtml = array(); // $inputId = array(); // // $searchImg = $crawler->filter('body > div')->first()->html(); /*/* // preg_match('/<img.*?>/', $searchImg, $imgHtml); // preg_match('/<input type="hidden" name="id" .*?>/', $searchImg, $inputId); // preg_match('/\d+/', $inputId[0], $inputId); // // $imgHtml = str_replace("<img src=\"/","<img src=\"https://www.google.pl/",$imgHtml); // // return $imgHtml[0] . ' // <form method="POST" action="/scraper"> // ' . csrf_field() . ' // Id Captcha // <input type="text" name="idcaptcha" value="' . $inputId[0] . '"> // Kod obrazka // <input type="text" name="codecaptcha" value=""> // <button type="submit">Dalej</button> // </form> // '; // } // var_dump($linki); // //return response()->json($linki); } return response()->json($linki); */ }
public function testHandlesRedirectsCorrectly() { $guzzle = $this->getGuzzle(); $this->mockPlugin->clearQueue(); $this->mockPlugin->addResponse(new GuzzleResponse(301, array('Location' => 'http://www.example.com/'))); $this->mockPlugin->addResponse(new GuzzleResponse(200, null, '<html><body><p>Test</p></body></html>')); $client = new Client(); $client->setClient($guzzle); $crawler = $client->request('GET', 'http://www.example.com/'); $this->assertEquals('Test', $crawler->filter('p')->text()); // Ensure that two requests were sent $this->assertEquals(2, count($this->historyPlugin)); }
/** * create and configure goutte client used for scraping * @return GoutteClient */ protected function getScrapClient() { $client = new GoutteClient(); $client->followRedirects(); $guzzleClient = new \GuzzleHttp\Client(array('curl' => array(CURLOPT_SSL_VERIFYHOST => false, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_PROXY => 'kuzh.polytechnique.fr:8080', CURLOPT_TIMEOUT => 7, CURLOPT_CONNECTTIMEOUT => 5))); $client->setClient($guzzleClient); return $client; }
/** * @return Client */ private function setClient() { $client = new Client(); $guzzleClient = new \GuzzleHttp\Client(array('curl' => array(CURLOPT_TIMEOUT => 60, CURLOPT_SSL_VERIFYPEER => false))); $client->setClient($guzzleClient); return $client; }
public function testConvertsGuzzleHeadersToArrays() { if (!class_exists("Guzzle\\Http\\Message\\Header")) { $this->markTestSkipped("Guzzle ~3.6 required"); } $guzzle = $this->getGuzzle(); $this->mockPlugin->clearQueue(); $this->mockPlugin->addResponse(new GuzzleResponse(200, array(new GuzzleHeader('Date', 'Tue, 04 Jun 2013 13:22:41 GMT')))); $client = new Client(); $client->setClient($guzzle); $client->request('GET', 'http://www.example.com/'); $response = $client->getResponse(); $headers = $response->getHeaders(); $this->assertInternalType("array", array_shift($headers), "Header not converted from Guzzle\\Http\\Message\\Header to array"); }
/** * Validates an Safair ticket via web scraper * * @param array $ticketDetails * @return Boolean */ protected function verifySafair($ticketDetails) { $client = new Client(); //Do not verify SSL for this host as we get SSL errors... $guzzleClient = new \GuzzleHttp\Client(array('curl' => array(CURLOPT_SSL_VERIFYPEER => false))); $client->setClient($guzzleClient); $crawler = $client->request('GET', 'https://www.flysafair.co.za/manage/Manage-booking'); $form = $crawler->selectButton('Retrieve booking')->form(); $form['PNR'] = $ticketDetails['ticketref']; $form['lastName'] = Auth::user()->lastname; $crawler = $client->submit($form); if (!strpos($crawler->text(), "\n Your booking could not be found. Please check the spelling and try again.\n ")) { return false; } else { return true; } }
/** * @param string $url * * @return null|\Symfony\Component\DomCrawler\Crawler */ private function getData($url) { $client = new Client(); $guzzle = $client->getClient(); //Использование Прокси пока отключено //$client->getClient()->setDefaultOption('config/curl/'.CURLOPT_PROXY, 'http://141.101.118.147:80'); //Максимальное количество секунд выполнения запроса $client->getClient()->setDefaultOption('verify', false); $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 10); //Ожидание до подключения $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_CONNECTTIMEOUT, 15); $client->setClient($guzzle); try { $crawler = $client->request('GET', $url); } catch (\Exception $e) { \Yii::error(sprintf('Ошибка обработки: %s %s ', $e->getMessage(), $url), 'cron'); return null; } return $crawler; }