setHeader() public method

public setHeader ( $name, $value )
 private function setUpClient()
 {
     $this->client = new Client();
     $this->client->setHeader('User-Agent', $this->user_agent);
     $this->client->setHeader('Accept-Language', 'en-gb');
     $this->client->setClient(new \GuzzleHttp\Client(['allow_redirects' => false, 'cookies' => true, 'verify' => false, 'proxy' => ['http' => $this->proxy]]));
 }
Example #2
0
 /**
  * @param boolean $allPages
  */
 public function run($allPages)
 {
     $client = new Client();
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 30);
     $client->setHeader('User-Agent', $this->config['user_agent']);
     try {
         $crawler = $client->request('GET', $this->config['url']);
     } catch (TransferException $e) {
         echo $e->getMessage() . PHP_EOL;
         exit(1);
     }
     if ($client->getResponse()->getStatus() == 200) {
         $this->getUrlsAndDownload($crawler);
         if ($allPages) {
             $link = $this->getNextLink($crawler);
             while ($link) {
                 $crawler = $client->click($link);
                 $this->getUrlsAndDownload($crawler);
                 $link = $this->getNextLink($crawler);
             }
         }
     } else {
         echo "site not available\n";
     }
 }
 /**
  * @param $basic
  * @return mixed
  * @throws \Exception
  */
 public function postOauth2Token($basic)
 {
     $rquestBody = 'grant_type=client_credentials';
     $this->client->setHeader('Authorization', 'Basic ' . $basic);
     $this->client->setHeader('Content-Type', 'application/x-www-form-urlencoded;charset=UTF-8');
     $this->client->request('POST', $this->getApiBaseUrl() . '/' . $this->authenticationUri, [], [], [], $rquestBody);
     /**
      * @var $response Response
      */
     $response = $this->client->getResponse();
     $decodedResponse = json_decode($response->getContent(), true);
     $lastError = json_last_error();
     if ($lastError !== JSON_ERROR_NONE) {
         throw new \Exception('An error occurred when decoding the response (Error code: ' . $lastError . ')');
     }
     return $decodedResponse;
 }
Example #4
0
 public function testCustomUserAgent()
 {
     $guzzle = $this->getGuzzle();
     $client = new Client();
     $client->setClient($guzzle);
     $client->setHeader('User-Agent', 'foo');
     $crawler = $client->request('GET', 'http://www.example.com/');
     $this->assertEquals('foo', $this->history->getLastRequest()->getHeader('User-Agent'));
 }
Example #5
0
 /**
  * @param array $parameters
  */
 public function setParameters($parameters = [])
 {
     $this->validParameters($parameters);
     $this->client->setHeader('HTTP_USER_AGENT', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:45.0) Gecko/20100101 Firefox/45.0');
     $crawler = $this->client->request('GET', $this->entryPoint);
     $crawler->filter('.tps_parcours.BP .secteurTable tbody tr td')->each(function (Crawler $node, $i) {
         $text = $node->text();
         if (!($i % 7)) {
             $this->row++;
         } else {
             $way = $i <= 49 ? self::DIRECTION_EXTERIOR : self::DIRECTION_INTERIOR;
             $text = str_replace(["é", "\n", "\r", "\t", " "], ['e', ''], $text);
             $this->dataFetched[$this->row][$way][] = $text;
         }
     });
     $this->sanitizeContent();
     $this->calculateRoute();
 }
Example #6
0
 public function testCustomUserAgent()
 {
     $guzzle = $this->getGuzzle();
     $client = new Client();
     $client->setClient($guzzle);
     $client->setHeader('User-Agent', 'foo');
     $client->request('GET', 'http://www.example.com/');
     $this->assertEquals('Symfony2 BrowserKit, foo', end($this->history)['request']->getHeaderLine('User-Agent'));
 }
Example #7
0
 protected function getContents($url, $headers = [], $as_html = false)
 {
     $key = $this->buildCacheKey($url);
     if (self::$cache->has($key)) {
         return self::$cache->get($key);
     }
     if ($as_html) {
         foreach ($headers as $k => $v) {
             $this->client->setHeader($k, $v);
         }
         $contents = $this->client->request('GET', $url)->html();
         foreach ($headers as $k => $v) {
             $this->client->removeHeader($k);
         }
     } else {
         $contents = file_get_contents($url);
     }
     self::$cache->set($key, $contents);
     return $contents;
 }
 /**
  * Metodo para realizar a consulta
  *
  * @param  string $cnpj CNPJ
  * @param  string $ie IE - Não Testado
  * @param  string $paramBot ParamBot parametro enviado para validação do captcha
  * @param  string $captcha CAPTCHA
  * @param  string $stringCookie COOKIE
  * @throws Exception
  * @return array  Dados da empresa
  */
 public static function consulta($cnpj, $ie, $paramBot, $captcha, $stringCookie)
 {
     $arrayCookie = explode(';', $stringCookie);
     if (!Utils::isCnpj($cnpj)) {
         throw new Exception('O CNPJ informado não é válido.');
     }
     $client = new Client();
     #$client->getClient()->setDefaultOption('timeout', 120);
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 0);
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT_MS, 0);
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_CONNECTTIMEOUT, 0);
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_RETURNTRANSFER, true);
     $client->setHeader('Host', 'pfeserv1.fazenda.sp.gov.br');
     $client->setHeader('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0');
     $client->setHeader('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9, */* ;q=0.8');
     $client->setHeader('Accept-Language', 'pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3');
     $client->setHeader('Accept-Encoding', 'gzip, deflate');
     $client->setHeader('Referer', 'http://www.sintegra.gov.br/new_bv.html');
     $client->setHeader('Cookie', $arrayCookie[0]);
     $client->setHeader('Connection', 'keep-alive');
     $servico = strlen($cnpj) > 0 ? 'cnpj' : 'ie';
     $consultaPor = strlen($cnpj) > 0 ? 'Consulta por CNPJ' : 'Consulta por IE';
     $param = array('hidFlag' => '0', 'cnpj' => Utils::unmask($cnpj), 'ie' => Utils::unmask($ie), 'paramBot' => $paramBot, 'Key' => $captcha, 'servico' => $servico, 'botao' => $consultaPor);
     $crawler = $client->request('POST', 'http://pfeserv1.fazenda.sp.gov.br/sintegrapfe/sintegra', $param);
     $imageError = 'O valor da imagem esta incorreto ou expirou. Verifique novamente a imagem e digite exatamente os 5 caracteres exibidos.';
     $checkError = $crawler->filter('body > center')->eq(1)->count();
     if ($checkError && $imageError == trim($crawler->filter('body > center')->eq(1)->text())) {
         throw new Exception($imageError, 99);
     }
     $center_ = $crawler->filter('body > center');
     if (count($center_) == 0) {
         throw new Exception('Serviço indisponível!. Tente novamente.', 99);
     }
     //self::saveFile($client);
     $html = self::parseContent($client->getResponse()->__toString());
     $crawler = new \Symfony\Component\DomCrawler\Crawler($html);
     $data = self::parseSelectors($crawler);
     return $data;
 }
Example #9
0
 /**
  * Metodo para realizar a consulta
  *
  * @param  string $cpf CPF
  * @param  string $captcha CAPTCHA
  * @param  string $stringCookie COOKIE
  * @throws Exception
  * @return array  Dados da pessoa
  */
 public static function consulta($cpf, $captcha, $stringCookie)
 {
     try {
         $arrayCookie = explode(';', $stringCookie);
         if (!Utils::isCpf($cpf)) {
             throw new Exception();
         }
         $client = new Client();
         $client->setHeader('Host', 'www.receita.fazenda.gov.br');
         $client->setHeader('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0');
         $client->setHeader('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
         $client->setHeader('Accept-Language', 'pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3');
         $client->setHeader('Accept-Encoding', 'gzip, deflate');
         $client->setHeader('Referer', 'http://www.receita.fazenda.gov.br/aplicacoes/atcta/cpf/ConsultaPublica.asp');
         $client->setHeader('Cookie', $arrayCookie[0]);
         $client->setHeader('Connection', 'keep-alive');
         $param = array('txtCPF' => Utils::unmask($cpf), 'txtTexto_captcha_serpro_gov_br' => $captcha, 'Enviar' => 'Consultar');
         $crawler = $client->request('POST', 'http://www.receita.fazenda.gov.br/aplicacoes/atcta/cpf/ConsultaPublicaExibir.asp', $param);
         $clConteudoDados = $crawler->filter('span.clConteudoDados');
         return array('cpf' => Utils::unmask($cpf), 'nome' => trim(str_replace('Nome da Pessoa Física: ', '', $clConteudoDados->eq(1)->html())), 'situacao_cadastral' => str_replace('Situação Cadastral: ', '', $clConteudoDados->eq(2)->html()), 'digito_verificador' => str_replace('Digito Verificador: ', '', $clConteudoDados->eq(3)->html()));
     } catch (Exception $e) {
         throw new Exception('Aconteceu um erro ao fazer a consulta. Envie os dados novamente.');
     }
 }
Example #10
0
 /**
  * @param string $service
  * @param string $method
  * @param array $parameters
  * @return \Symfony\Component\DomCrawler\Crawler
  */
 protected function requestJsonAmf($service, $method, array $parameters = [])
 {
     $payload = $this->prepareAmfPayload($service, $method, $parameters);
     $this->client->setHeader('Content-Type', 'application/json');
     return $this->client->request('POST', '/amf', [], [], [], $payload);
 }
Example #11
0
 protected function getHttpClient()
 {
     $client = new Client();
     $client->setHeader('User-Agent', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:12.0) Gecko/20100101 Firefox/12.0 FirePHP/0.7.1");
     return $client;
 }
Example #12
0
 /**
  * @param FuzzingUri $uri_target
  */
 public function attackFuzzedWithCsrf(FuzzingUri $uri_target)
 {
     $fuzz_target = $uri_target->getFuzzTarget();
     $url = $this->_guzzle->getBaseUrl() . $uri_target->getUri();
     $http_params = $fuzz_target->getParameters();
     $userAndPassRefs = $this->getUserAndPassRefs($http_params);
     $usernames = $this->_easycredentials->getUsernames($this->NB_CREDENTIALS);
     $passwords = $this->_easycredentials->getPasswords($this->NB_CREDENTIALS);
     foreach ($usernames as $one_username) {
         $userAndPassRefs['username'] = $one_username;
         foreach ($passwords as $one_password) {
             /**
              * todo add to DB with array of values and entity->persist() ?
              * todo clariss verifier connexion + verification fichier
              * todo choix api guzzle && goutte == wtf ?
              */
             $userAndPassRefs['password'] = $one_password;
             //create a client
             $client = new Client();
             //set options
             $client->setHeader('User-Agent', "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36");
             $client->followRedirects(true);
             //request csrf
             $crawler_csrf = $client->request('get', $url);
             $csrf_token = $crawler_csrf->filter('input[name="' . $uri_target->getCsrf() . '"]')->first()->extract('value')[0];
             $http_params = array_merge($http_params, array($uri_target->getCsrf() => $csrf_token));
             $client->followRedirects(true);
             $crawler_login = $client->request($uri_target->getHttpMethod(), $url, $http_params);
             if (preg_match($uri_target->getMatchSuccess(), $crawler_login->text())) {
                 $this->report(true, "Bruteforced " . $url . " with user " . $one_username . " and password " . $one_password);
             }
         }
     }
 }
Example #13
0
 public function testRestart()
 {
     $client = new Client();
     $client->setHeader('X-Test', 'test');
     $client->setAuth('foo', 'bar');
     $headersReflectionProperty = new \ReflectionProperty('Goutte\\Client', 'headers');
     $headersReflectionProperty->setAccessible(true);
     $this->assertEquals(array('X-Test' => 'test'), $headersReflectionProperty->getValue($client));
     $authReflectionProperty = new \ReflectionProperty('Goutte\\Client', 'auth');
     $authReflectionProperty->setAccessible(true);
     $this->assertEquals(array('foo', 'bar', 'basic'), $authReflectionProperty->getValue($client));
     $client->restart();
     $this->assertEquals([], $headersReflectionProperty->getValue($client));
     $this->assertNull($authReflectionProperty->getValue($client));
 }
 /**
  * Metodo para realizar a consulta
  *
  * @param  string $cnpj CNPJ
  * @param  string $captcha CAPTCHA
  * @param  string $stringCookie COOKIE
  * @throws Exception
  * @return array  Dados da empresa
  */
 public static function consulta($cnpj, $captcha, $stringCookie)
 {
     $result = array();
     $arrayCookie = explode(';', $stringCookie);
     if (!Utils::isCnpj($cnpj)) {
         throw new Exception('O CNPJ informado não é válido');
     }
     $client = new Client();
     $client->setHeader('Host', 'www.receita.fazenda.gov.br');
     $client->setHeader('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0');
     $client->setHeader('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9, */* ;q=0.8');
     $client->setHeader('Accept-Language', 'pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3');
     $client->setHeader('Accept-Encoding', 'gzip, deflate');
     $client->setHeader('Referer', 'http://www.receita.fazenda.gov.br/pessoajuridica/cnpj/cnpjreva/valida.asp');
     $client->setHeader('Cookie', $arrayCookie[0]);
     $client->setHeader('Connection', 'keep-alive');
     $param = array('origem' => 'comprovante', 'cnpj' => Utils::unmask($cnpj), 'txtTexto_captcha_serpro_gov_br' => $captcha, 'submit1' => 'Consultar', 'search_type' => 'cnpj');
     $crawler = $client->request('POST', 'http://www.receita.fazenda.gov.br/pessoajuridica/cnpj/cnpjreva/valida.asp', $param);
     if ($crawler->filter('body > table:nth-child(3) > tr:nth-child(2) > td > b > font')->count() > 0) {
         throw new Exception('Erro ao consultar. O CNPJ informado não existe no cadastro.', 99);
     }
     $td = $crawler->filter('body > table:nth-child(3) > tr > td');
     foreach ($td->filter('td') as $td) {
         $td = new Crawler($td);
         if ($td->filter('font:nth-child(1)')->count() > 0) {
             $key = trim(preg_replace('/\\s+/', ' ', $td->filter('font:nth-child(1)')->html()));
             switch ($key) {
                 case 'NOME EMPRESARIAL':
                     $key = 'razao_social';
                     break;
                 case 'TÍTULO DO ESTABELECIMENTO (NOME DE FANTASIA)':
                     $key = 'nome_fantasia';
                     break;
                 case 'CÓDIGO E DESCRIÇÃO DA ATIVIDADE ECONÔMICA PRINCIPAL':
                     $key = 'cnae_principal';
                     break;
                 case 'CÓDIGO E DESCRIÇÃO DAS ATIVIDADES ECONÔMICAS SECUNDÁRIAS':
                     $key = 'cnaes_secundario';
                     break;
                 case 'CÓDIGO E DESCRIÇÃO DA NATUREZA JURÍDICA':
                     $key = 'natureza_juridica';
                     break;
                 case 'LOGRADOURO':
                     $key = 'logradouro';
                     break;
                 case 'NÚMERO':
                     $key = 'numero';
                     break;
                 case 'COMPLEMENTO':
                     $key = 'complemento';
                     break;
                 case 'CEP':
                     $key = 'cep';
                     break;
                 case 'BAIRRO/DISTRITO':
                     $key = 'bairro';
                     break;
                 case 'MUNICÍPIO':
                     $key = 'cidade';
                     break;
                 case 'UF':
                     $key = 'uf';
                     break;
                 case 'SITUAÇÃO CADASTRAL':
                     $key = 'situacao_cadastral';
                     break;
                 case 'DATA DA SITUAÇÃO CADASTRAL':
                     $key = 'situacao_cadastral_data';
                     break;
                 case 'MOTIVO DE SITUAÇÃO CADASTRAL':
                     $key = 'motivo_situacao_cadastral';
                     break;
                 case 'SITUAÇÃO ESPECIAL':
                     $key = 'situacao_especial';
                     break;
                 case 'DATA DA SITUAÇÃO ESPECIAL':
                     $key = 'situacao_especial_data';
                     break;
                 case 'TELEFONE':
                     $key = 'telefone';
                     break;
                 case 'ENDEREÇO ELETRÔNICO':
                     $key = 'email';
                     break;
                 case 'ENTE FEDERATIVO RESPONSÁVEL (EFR)':
                     $key = 'ente_federativo_responsavel';
                     break;
                 default:
                     $key = null;
                     break;
             }
             if (!is_null($key)) {
                 $bs = $td->filter('font > b');
                 foreach ($bs as $b) {
                     $b = new Crawler($b);
                     $str = trim(preg_replace('/\\s+/', ' ', $b->html()));
                     $attach = htmlspecialchars_decode($str);
                     if ($bs->count() == 1) {
                         $result[$key] = $attach;
                     } else {
                         $result[$key][] = $attach;
                     }
                 }
             }
         }
     }
     return $result;
 }
 /**
  * @return category page single link url data eg.mobile, email etc
  */
 public function getData()
 {
     $link = Link::first();
     //$ua = 'Mozilla/5.0 (Windows NT 5.1; rv:16.0) Gecko/20100101 Firefox/16.0 (ROBOT)';
     $client = new Client();
     $client->setHeader('User-Agent', "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36");
     //Set proxy using tor
     $guzzleClient = new \GuzzleHttp\Client(['curl' => [CURLOPT_PROXY => '127.0.0.1:9050', CURLOPT_PROXYTYPE => CURLPROXY_SOCKS5]]);
     $client->setClient($guzzleClient);
     $crawler = $client->request('GET', $link->url);
     //$button = $crawler->filter('.reply_button');
     $isBlock = $crawler->filter('p')->text();
     $isRun = true;
     $i = 0;
     while ($isRun) {
         if (strpos($isBlock, 'blocked') != false) {
             $this->torNew();
             //return $this->getIndex();
             $crawler = $client->request('GET', $link->url);
             $isBlock = $crawler->filter('p')->text();
         } else {
             $lnk = $crawler->selectLink('reply')->link();
             $crawler = $client->click($lnk);
             if ($crawler->filterXpath("//div[@class='captcha']")->count()) {
                 $this->torNew();
             } else {
                 var_dump($crawler->html());
                 $title = $crawler->filter('title')->text();
                 $mobile = $crawler->filter('.mobile-only')->first()->text();
                 $email = $crawler->filter('.mailapp')->first()->text();
                 echo $link->url . ' ' . $title . ' ' . $mobile . ' ' . $email;
                 Scrap::create(['url' => $link->url, 'title' => $title, 'email' => $email, 'phone' => $mobile]);
                 $isRun = false;
             }
         }
     }
     //End While
     // $crawler->filter('a.i')->each(function ($node) {
     // 	    $url = $node->attr("href")."\n";
     // 	    //$link = $node->filter('a')->first();
     // 	    $text = $node->text();
     // 	    $fullUrl = "http://auburn.craigslist.org".$url;
     // 	    //$scrap::create(['url' => $url, 'title' => $text ]);
     // 	   	Link::create(['url'=>$fullUrl, 'title'=> $text]);
     // 	    var_dump($url);
     // 	    $this->tor_new_identity();
     // });
 }
Example #16
0
<?php

require __DIR__ . '/vendor/autoload.php';
use Goutte\Client;
use WebmasterHacks\Pornhub\Video;
use WebmasterHacks\Pornhub\Pornstar;
use WebmasterHacks\Pornhub\Category;
use WebmasterHacks\Pornhub\Tag;
$client = new Client();
$client->setHeader('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/44.0.2403.89 Chrome/44.0.2403.89 Safari/537.36');
$video = new Video($client, 'http://www.pornhub.com/view_video.php?viewkey=ph55f4113f77d67');
echo 'Pornstars:' . PHP_EOL;
$video->pornstars()->each(function (Pornstar $pornstar) {
    echo $pornstar->url() . PHP_EOL;
});
echo 'Categories:' . PHP_EOL;
$video->categories()->each(function (Category $category) {
    echo $category->url() . PHP_EOL;
});
echo 'Tags:' . PHP_EOL;
$video->tags()->each(function (Tag $tag) {
    echo $tag->url() . PHP_EOL;
});
Example #17
-1
 /**
  * crawling single url after checking the depth value
  * @param string $url
  * @param int $depth
  */
 protected function crawlPages($url, $depth)
 {
     if (!$url || isset($this->pages[$url]) && isset($this->pages[$url]['visited']) && $this->pages[$url]['visited']) {
         return;
     }
     $client = new Client();
     $client->setHeader('User-Agent', $this->parameters['user_agent']);
     try {
         $crawler = $client->request('GET', $url);
         $statusCode = $client->getResponse()->getStatus();
         $this->log(sprintf("%s: %s", $statusCode, $url));
     } catch (\Exception $e) {
         $statusCode = 400;
         $this->log(sprintf("%s: %s", $statusCode, $url));
         $this->log(sprintf("Error page retrieving (%s)", $e->getMessage()));
     }
     $this->setPageStatusStats($statusCode);
     if ($statusCode >= 400) {
         return;
     }
     if (!isset($this->pages[$url])) {
         $this->pages[$url] = array();
     }
     $this->pages[$url]['status_code'] = $statusCode;
     $contentType = $client->getResponse()->getHeader('Content-Type');
     if (strpos($contentType, ';') !== false) {
         $contentType = substr($contentType, 0, strpos($contentType, ';'));
     }
     switch ($contentType) {
         case 'text/html':
             $provider = $this->container->get('symbio_fulltext_search.provider.html');
             try {
                 $pageInfo = $provider->extract(array(HtmlProvider::CONFIG_CRAWLER_PARAMETERS_HANDLER => $this->parameters, HtmlProvider::CONFIG_CRAWLER_HANDLER => $crawler, HtmlProvider::CONFIG_IS_EXTERNAL_LINK_HANDLER => isset($this->pages[$url]['external_link']) ? $this->pages[$url]['external_link'] : false));
             } catch (\Exception $e) {
                 error_log('Error retrieving data from link: ' . $url . ' (' . $e->getMessage() . ') ');
                 $this->pages[$url]['dont_index'] = true;
             }
             if ($pageInfo) {
                 $this->pages[$url] = array_merge($this->pages[$url], $pageInfo);
                 $this->pages[$url]['visited'] = true;
                 // mark current url as visited
                 if (!isset($this->pages[$url]['external_link']) || !$this->pages[$url]['external_link']) {
                     // for internal uris, get all links inside
                     $links = $this->extractLinks($crawler, $url);
                     if (count($links)) {
                         $this->crawlChildLinks($links, $depth !== false ? $depth - 1 : false);
                     }
                 } elseif ($this->parameters[self::CRAWL_EXTERNAL_LINKS] && $this->parameters[self::EXTERNAL_LINKS_DEPTH] > 0) {
                     $links = $this->extractLinks($crawler, $url);
                     if (count($links)) {
                         $this->crawlChildLinks($links, $this->parameters[self::EXTERNAL_LINKS_DEPTH]);
                     }
                 }
             }
             break;
     }
 }