getClient() public method

public getClient ( )
 /**
  * {@inheritdoc}
  * @param Proxy $proxy
  *
  * @return mixed|void
  * @throws \Exception
  */
 public function setProxy(Proxy $proxy)
 {
     $sessionName = md5($proxy->getUrl());
     $this->browser->resetSessions();
     if ($this->browser->hasSession($sessionName)) {
         $this->browser->setDefaultSessionName($sessionName);
         return;
     }
     $driver = $this->browser->getSession()->getDriver();
     switch (get_class($driver)) {
         case 'Behat\\Mink\\Driver\\GoutteDriver':
             /* @var $driver  GoutteDriver */
             $client = new Client();
             $guzzle = $client->getClient();
             $client->setClient($guzzle);
             $guzzle->setDefaultOption('proxy', $proxy->getUrl());
             $goutteDriver = new GoutteDriver($client);
             $this->browser->registerSession($sessionName, new Session($goutteDriver));
             $this->browser->setDefaultSessionName($sessionName);
             break;
         default:
             throw new \Exception('Error : Proxy configuration is not implemented for class ' . get_class($driver) . '');
     }
 }
Example #2
0
 function __construct(Pixie $pixie)
 {
     $this->pixie = $pixie;
     // Create client
     $this->client = new Client();
     $this->client->getClient()->setDefaultOption('verify', false);
     $this->client->getClient()->setDefaultOption('timeout', 120);
     $this->service = $this->pixie->paymentTest;
 }
 public function actionIndex()
 {
     /** @var $entity RivegaucheLink */
     $entity = new RivegaucheLink();
     $offset = 0;
     do {
         $links = $entity->getLinks($offset, 5);
         if (!empty($links)) {
             foreach ($links as $link) {
                 \Yii::info(sprintf('Обрабатываем: %s ', $link['link']), 'cron');
                 $client = new Client();
                 $guzzle = $client->getClient();
                 $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_PROXY, 'http://141.101.118.147:80');
                 //$client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 10);
                 $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_CONNECTTIMEOUT, 10);
                 $client->setClient($guzzle);
                 /*$guzzle = $client->getClient();
                                     $guzzle->setDefaultOption('timeout', 10);
                 
                                     $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT_MS, 100);
                                     $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_CONNECTTIMEOUT, 5);
                                     $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_RETURNTRANSFER, true);
                                     $client->setClient($guzzle);*/
                 $crawler = $client->request('GET', $link['link']);
                 \Yii::info(sprintf('Извлекаем тело: %s ', $link['link']), 'cron');
                 $head = $this->getHtml($crawler, true);
                 \Yii::info(sprintf('HEAD тело: %s ', $link['link']), 'cron');
                 if (!empty($head['links'])) {
                     foreach ($head['links'] as $l) {
                         $crawler = $client->request('GET', $l);
                         $subHead = $this->getHtml($crawler, false);
                         $subHead['link'] = $l;
                         $this->saveResult($subHead, $link);
                     }
                 }
                 if (empty($head['title'])) {
                     $head = $this->getPromoHTML($crawler, true);
                 }
                 if (empty($head['title'])) {
                     $head = $this->getPromo2HTML($crawler, true);
                 }
                 $head['link'] = $link['link'];
                 $this->saveResult($head, $link);
                 unset($node);
                 unset($subNode);
                 unset($head);
             }
             $z = 1;
             $offset += 5;
             unset($links);
             unset($client);
         } else {
             $z = 0;
         }
     } while ($z > 0);
     return 0;
 }
Example #4
0
 /**
  * @param Song $song
  * @return array
  */
 private function getInfo(Song $song)
 {
     $videoId = $song->videoId();
     $url = 'http://youtube.com/get_video_info?video_id=' . $videoId;
     /** @var Response $response */
     $response = $this->client->getClient()->get($url);
     $body = $response->getBody(true);
     parse_str($body, $arr);
     if (array_key_exists("status", $arr) && $arr["status"] == 'fail') {
         throw new \Exception($arr["reason"], 697);
     }
     return $arr;
 }
Example #5
0
 /**
  * @param boolean $allPages
  */
 public function run($allPages)
 {
     $client = new Client();
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 30);
     $client->setHeader('User-Agent', $this->config['user_agent']);
     try {
         $crawler = $client->request('GET', $this->config['url']);
     } catch (TransferException $e) {
         echo $e->getMessage() . PHP_EOL;
         exit(1);
     }
     if ($client->getResponse()->getStatus() == 200) {
         $this->getUrlsAndDownload($crawler);
         if ($allPages) {
             $link = $this->getNextLink($crawler);
             while ($link) {
                 $crawler = $client->click($link);
                 $this->getUrlsAndDownload($crawler);
                 $link = $this->getNextLink($crawler);
             }
         }
     } else {
         echo "site not available\n";
     }
 }
Example #6
0
 /**
  * Returns original {@see \Goutte\Client} client.
  *
  * If not set, creates new instance and sets
  * {@see \Diggin\Bridge\Guzzle\AutoCharsetEncodingPlugin\AutoCharsetEncodingPlugin} to support various charsets.
  *
  * @return GoutteClient
  */
 public function getClient()
 {
     if (null === $this->client) {
         $this->client = new GoutteClient();
         $this->client->getClient()->addSubscriber(new AutoCharsetEncodingPlugin());
     }
     return $this->client;
 }
Example #7
0
 /**
  * @return \Goutte\Client
  */
 public static function getClient()
 {
     if (!static::$client) {
         static::$client = new Client();
         static::$client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 30);
     }
     return static::$client;
 }
Example #8
0
 /**
  * Execute load request.
  *
  * @return array
  */
 public function execute()
 {
     $client = new Client();
     $client->getClient()->setDefaultOption('verify', false);
     $crawler = $client->request('GET', 'https://loadxtreme.ph/cgi-bin/webload.cgi?state=webload');
     $form = $crawler->selectButton('SEND LOAD')->form();
     $crawler = $client->submit($form, array('state' => 'webload', 'step' => '1', 'webtype' => '', 'uid' => $this->uid, 'pik' => $this->pik, 'pc' => $this->pc, 'cellno' => $this->cellno, 'email' => $this->email));
     return $this->response($crawler->html());
 }
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $name = $input->getArgument('name');
     if ($name) {
         $text = 'Hello ' . $name;
     } else {
         $text = 'Hello';
     }
     if ($input->getOption('yell')) {
         $text = strtoupper($text);
     }
     //    $output->writeln($text);
     if (!($file = fopen("prijzen-" . date('Y-m-d') . ".csv", "w"))) {
         $output->writeln("can not open file");
         die;
     }
     $client = new Client();
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 60);
     $products = $this->getProducts();
     $counter = 1;
     foreach ($products as $product) {
         $crawler = $client->request('GET', 'http://brickwat.ch/' . $product);
         $status_code = $client->getResponse()->getStatus();
         if ($status_code == 200) {
             $result = $crawler->filter('#prices')->filter('tr.row-collapse');
             $output->writeln("Sites found for product " . $product . ": " . $result->count());
             $prices[$product] = array();
             if ($result->count()) {
                 for ($i = 0; $i < $result->count(); $i++) {
                     $price = "";
                     $company = "";
                     $values = $result->eq($i)->filter('td');
                     // Get company Name
                     if ($values->eq(0)->filter('a img')->count()) {
                         $company = $values->eq(0)->filter('a img')->attr('title');
                         $price = $values->eq(2)->filter('a')->text();
                     } else {
                         $company = $values->eq(0)->text();
                         $price = $values->eq(2)->text();
                     }
                     $prices[$product][$company] = $price;
                     $companys[$company] = $company;
                 }
             }
         }
         $counter++;
         usleep(500000);
         //if ($counter > 2)
         //   break;
     }
     $this->writeResults($output, $prices, $companys, $file);
     fclose($file);
 }
 /**
  * Metodo para realizar a consulta
  *
  * @param  string $cnpj CNPJ
  * @param  string $ie IE - Não Testado
  * @param  string $paramBot ParamBot parametro enviado para validação do captcha
  * @param  string $captcha CAPTCHA
  * @param  string $stringCookie COOKIE
  * @throws Exception
  * @return array  Dados da empresa
  */
 public static function consulta($cnpj, $ie, $paramBot, $captcha, $stringCookie)
 {
     $arrayCookie = explode(';', $stringCookie);
     if (!Utils::isCnpj($cnpj)) {
         throw new Exception('O CNPJ informado não é válido.');
     }
     $client = new Client();
     #$client->getClient()->setDefaultOption('timeout', 120);
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 0);
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT_MS, 0);
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_CONNECTTIMEOUT, 0);
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_RETURNTRANSFER, true);
     $client->setHeader('Host', 'pfeserv1.fazenda.sp.gov.br');
     $client->setHeader('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0');
     $client->setHeader('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9, */* ;q=0.8');
     $client->setHeader('Accept-Language', 'pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3');
     $client->setHeader('Accept-Encoding', 'gzip, deflate');
     $client->setHeader('Referer', 'http://www.sintegra.gov.br/new_bv.html');
     $client->setHeader('Cookie', $arrayCookie[0]);
     $client->setHeader('Connection', 'keep-alive');
     $servico = strlen($cnpj) > 0 ? 'cnpj' : 'ie';
     $consultaPor = strlen($cnpj) > 0 ? 'Consulta por CNPJ' : 'Consulta por IE';
     $param = array('hidFlag' => '0', 'cnpj' => Utils::unmask($cnpj), 'ie' => Utils::unmask($ie), 'paramBot' => $paramBot, 'Key' => $captcha, 'servico' => $servico, 'botao' => $consultaPor);
     $crawler = $client->request('POST', 'http://pfeserv1.fazenda.sp.gov.br/sintegrapfe/sintegra', $param);
     $imageError = 'O valor da imagem esta incorreto ou expirou. Verifique novamente a imagem e digite exatamente os 5 caracteres exibidos.';
     $checkError = $crawler->filter('body > center')->eq(1)->count();
     if ($checkError && $imageError == trim($crawler->filter('body > center')->eq(1)->text())) {
         throw new Exception($imageError, 99);
     }
     $center_ = $crawler->filter('body > center');
     if (count($center_) == 0) {
         throw new Exception('Serviço indisponível!. Tente novamente.', 99);
     }
     //self::saveFile($client);
     $html = self::parseContent($client->getResponse()->__toString());
     $crawler = new \Symfony\Component\DomCrawler\Crawler($html);
     $data = self::parseSelectors($crawler);
     return $data;
 }
 /**
  * @param $episodeFilename string show file name
  * @param $download boolean download the file?
  *
  * @return null|string
  */
 public function findSubtitle($episodeFilename, $download)
 {
     $language = $this->config->getSubtitleLanguage();
     if (!isset($this->languages[$language])) {
         printf("Missing language [%s].\n", $language);
         return null;
     }
     $episode = new Episode($episodeFilename);
     if (!isset($this->shows[$episode->sanitizedShowName])) {
         printf("Missing show [%s].\n", $episode->showName);
         return null;
     }
     $languageId = $this->languages[$language];
     $showId = $this->shows[$episode->sanitizedShowName];
     $url = $this->builder->getAddictedShowAjaxUrl($showId, $episode->season, $languageId);
     printf("Trying to get subtitles from [%s].\n", $url);
     $crawler = $this->client->request('GET', $url);
     $matchingSubtitles = $crawler->filter('div#season > table > tbody > tr.epeven')->reduce(function (Crawler $node) use($episode) {
         $children = $node->children();
         $ep = $children->getNode(1)->nodeValue;
         $group = strtolower($children->getNode(4)->nodeValue);
         $status = strtolower($children->getNode(5)->nodeValue);
         return (int) $ep === (int) $episode->ep && $episode->inGroups($group) && strpos($status, '%') === false;
     });
     if ($matchingSubtitles->count() == 0) {
         printf("Missing subtitles for show [%s] season [%s] episode [%s] \n  and groups [%s].\n", $episode->showName, $episode->season, $episode->ep, implode(', ', $episode->groups));
         return null;
     }
     $chosenSubtitle = $matchingSubtitles->first();
     $downloadUri = $chosenSubtitle->children()->getNode(9)->firstChild->getAttribute('href');
     $url = $this->builder->getSubtitleUrl($downloadUri);
     if ($download === false) {
         printf("Chosen subtitle [%s].\n", $url);
         return null;
     }
     printf("Downloading subtitle [%s].\n", $url);
     $headers = $this->builder->getRequestHeaders($showId);
     return $this->client->getClient()->get($url, ['headers' => $headers])->getBody()->getContents();
 }
Example #12
0
 public function get_scrap()
 {
     $client = new Client();
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 60000);
     $crawled = "news.liputan6.com";
     $title = "article.hentry > header.entry-header > h1";
     $article = "article.hentry > div.entry-content > div.text-detail > p";
     $geturl = DB::collection('crawling')->where('refurl', $crawled)->get();
     foreach ($geturl as $key) {
         $url = $key['url'];
         $crawler = $client->request('GET', $url);
         $status_code = $client->getResponse()->getStatus();
         if ($status_code == 200) {
             $crawler->filter($article)->each(function ($node) {
                 $yew = new Scraping();
                 $yew->article = $node->text();
                 $yew->save();
             });
         } else {
             echo "we F*****G LOST DUDE !";
         }
         echo "<hr>";
     }
 }
Example #13
0
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     // @todo: Determine better exit code.
     $exit_code = 1;
     $base_url = $input->getArgument('baseurl');
     $c = $this->container;
     $sitemap_url = new UrlBuilder('/sitemap.xml', $base_url);
     $output->writeln('Crawling: ' . $sitemap_url);
     $sitemap = new SitemapCrawler($sitemap_url, $this->logger);
     $bad_urls = [];
     $p = new ProgressBar($output, count($sitemap));
     $p->start();
     $client = new Client();
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, $input->getOption('timeout'));
     // Pull in all URLs from the sitemap file(s), and compile a list of linked
     // URLs to check.
     // Linked array has the URL as key and NULL as the value, to be filled in
     // later with the status code of a HEAD request.
     // $linked is keyed by the URL so that we don't have duplicates.
     $linked = array();
     foreach ($sitemap as $page_url) {
         //      \sleep(2);
         $crawler = $client->request('GET', $page_url);
         $status = $client->getResponse()->getStatus();
         if ($status != 200) {
             $bad_urls[] = $page_url;
         } else {
             if ($input->getOption('spider')) {
                 $page_crawler = new HtmlCrawler($crawler, new UrlBuilder('', $base_url));
                 foreach ($page_crawler as $page_crawl_url => $page_crawl) {
                     $linked[$page_crawl_url] = NULL;
                 }
             }
         }
         $p->advance();
     }
     $p->finish();
     if ($input->getOption('spider')) {
         $linked_urls = [];
         $output->writeln('');
         $output->writeln('Spidering links...');
         $p = new ProgressBar($output, count($linked));
         $p->start();
         // Verify all linked URLs.
         foreach ($linked as $resource_url => $foo) {
             try {
                 $crawler = $client->request('HEAD', $resource_url);
                 $status = $client->getResponse()->getStatus();
                 if ($status < 400) {
                     $linked_urls[$resource_url] = $client->getResponse()->getStatus();
                 } else {
                     $bad_urls[] = $resource_url;
                 }
             } catch (\Exception $e) {
                 $bad_urls[] = $resource_url;
             }
             $p->advance();
         }
         $p->finish();
     }
     $output->writeln('');
     if (empty($bad_urls)) {
         $output->writeln('No errors for any page in ' . $sitemap_url);
         $exit_code = 0;
     } else {
         foreach ($bad_urls as $item) {
             $output->writeln($item);
         }
     }
     $output->writeln('');
     $output->writeln('<info>Done.</info>');
     return $exit_code;
 }
Example #14
0
 /**
  * {@inheritdoc}
  * @param Proxy $proxy
  *
  * @return mixed|void
  * @throws \Exception
  */
 public function setProxy(Proxy $proxy)
 {
     $sessionName = md5($proxy->getUrl());
     $this->browser->resetSessions();
     if ($this->browser->hasSession($sessionName)) {
         $this->browser->setDefaultSessionName($sessionName);
         return;
     }
     $driver = $this->browser->getSession()->getDriver();
     switch (get_class($driver)) {
         case 'Behat\\Mink\\Driver\\GoutteDriver':
             /* @var $driver  GoutteDriver */
             $client = new Client();
             $guzzle = $client->getClient();
             $guzzle->setDefaultOption('proxy', $proxy->getUrl());
             $client->setClient($guzzle);
             $goutteDriver = new GoutteDriver($client);
             $this->browser->registerSession($sessionName, new Session($goutteDriver));
             $this->browser->setDefaultSessionName($sessionName);
             break;
         case 'Behat\\Mink\\Driver\\Selenium2Driver':
             /* @var $driver Selenium2Driver */
             // Todo : use other files than pac file
             // Currently it does only support pac file
             if (empty($proxy->pacFile)) {
                 throw new \Exception('Pac file/url is required.');
             }
             // @see https://code.google.com/p/selenium/wiki/JsonWireProtocol#Proxy_JSON_Object
             $driver->setDesiredCapabilities(["proxy" => array("proxyType" => "pac", "proxyAutoconfigUrl" => $proxy->pacFile)]);
             $this->browser->stopSessions();
             $this->browser->registerSession($sessionName, new Session($driver));
             $this->browser->setDefaultSessionName($sessionName);
             break;
         default:
             throw new \Exception('Error : Proxy configuration is not implemented for class ' . get_class($driver) . '');
     }
 }
Example #15
0
        curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, 1);
        // $output contains the output string
        $this->result = curl_exec($this->ch);
        if ($this->result == false) {
            return false;
        }
        return true;
    }
    public function getResult()
    {
        return $this->result;
    }
}
use Goutte\Client;
$client = new Client();
$client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 60);
$crawler = $client->request('GET', 'http://www.symfony.com/blog/');
$status_code = $client->getResponse()->getStatus();
if ($status_code == 200) {
    echo $crawler->filterXPath('html/head/title')->text();
    echo $crawler->filter('title')->text();
}
/*

$pageloader = new pageloader();
$doc = new DOMDocument();

$result = $pageloader->loadurl("http://www.brickwatch.net/nl/set/7280/Straight-Crossroad-Plates.html");        
if ($result) {
  $doc->loadHTML($pageloader->getResult());
 /**
  * @param string $url
  *
  * @return null|\Symfony\Component\DomCrawler\Crawler
  */
 private function getData($url)
 {
     $client = new Client();
     $guzzle = $client->getClient();
     //Использование Прокси пока отключено
     //$client->getClient()->setDefaultOption('config/curl/'.CURLOPT_PROXY, 'http://141.101.118.147:80');
     //Максимальное количество секунд выполнения запроса
     $client->getClient()->setDefaultOption('verify', false);
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 10);
     //Ожидание до подключения
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_CONNECTTIMEOUT, 15);
     $client->setClient($guzzle);
     try {
         $crawler = $client->request('GET', $url);
     } catch (\Exception $e) {
         \Yii::error(sprintf('Ошибка обработки: %s %s ', $e->getMessage(), $url), 'cron');
         return null;
     }
     return $crawler;
 }
Example #17
0
 public function testCreatesDefaultClient()
 {
     $client = new Client();
     $this->assertInstanceOf('GuzzleHttp\\ClientInterface', $client->getClient());
 }
Example #18
0
 /**
  * Sets the browser driver depending on the javascript select parameter or injected browser driver
  * @param Session $driver
  */
 private function setBrowser(Session $driver = null)
 {
     if ($driver != null) {
         $this->browser = new Mink(['custom' => $driver]);
         $this->browser->setDefaultSessionName('custom');
         return;
     }
     $client = new Client();
     $guzzle = $client->getClient();
     CacheSubscriber::attach($guzzle, []);
     $client->setClient($guzzle);
     // init Mink and register sessions
     $this->browser = new Mink(['goutte' => new Session(new GoutteDriver($client)), 'selenium2' => new Session(new Selenium2Driver('firefox', ["permissions.default.image" => 2]))]);
     if (!$this->javaScriptRequired) {
         $this->browser->setDefaultSessionName('goutte');
         return;
     }
     $this->browser->setDefaultSessionName('selenium2');
 }