Exemple #1
1
 /**
  * Add product data
  *
  * @param Crawler $node
  */
 private function addProduct(Crawler $node)
 {
     $product = new \stdClass();
     /** @var Crawler $title */
     $title = $node->filter('h3 a');
     $product->title = trim($title->text());
     $product->unit_price = $this->getPrice($node->filter('.pricePerUnit'));
     $productCrawler = $this->client->request('GET', $title->attr('href'));
     $product->size = $this->bytesToKb(strlen($this->client->getResponse()->getContent()));
     $product->description = '';
     $description = $productCrawler->filterXPath('//h3[.="Description"]');
     if ($description->count() > 0) {
         foreach ($description->siblings() as $sibling) {
             // product pages have different structures!
             if ($sibling->tagName == 'h3') {
                 break;
             }
             if ($product->description != "") {
                 $product->description .= "\n";
             }
             // @TODO address formatting issues - breaks to new lines
             $product->description .= trim(preg_replace("/[^\\S\r\n]+/", " ", $sibling->nodeValue));
             // remove excess whitespace but not new lines
         }
     }
     $this->total += $product->unit_price;
     // increment total
     $this->results[] = $product;
 }
Exemple #2
1
 public function testUserSeesPageTitle()
 {
     $client = new Client();
     $crawler = $client->request('GET', 'http://localhost:8000/lists');
     $this->assertEquals(200, $client->getResponse()->getStatus());
     $this->assertCount(1, $crawler->filter('h1:contains("Lists")'));
 }
 public function testLoggedIn()
 {
     // we don't use the trait method here since we want our
     // test to span two page requests, and we need to have
     // the session persist on the remote server
     // create a web client and hit the login page
     $url = "http://localhost/login";
     $client = new Client();
     $crawler = $client->request('GET', $url);
     $response_code = $client->getResponse()->getStatus();
     // we should get 200 back
     $this->assertEquals(200, $response_code);
     // select the form on the page and populate values
     // since we are using Goutte\Client, we don't need
     // to worry about parsing the HTML to find the csrf _token
     $form = $crawler->selectButton('Sign in')->form();
     $form->setValues(['email' => '*****@*****.**', 'password' => 'verysecret']);
     // submit the form
     $client->submit($form);
     $response_code_after_submit = $client->getResponse()->getStatus();
     // make sure the HTML page displayed (response code 200
     $this->assertEquals(200, $response_code_after_submit);
     // make sure we can get to the testimonial page
     $client->request('GET', 'http://localhost/add-testimonial');
     $response_code = $client->getResponse()->getStatus();
     $this->assertEquals(200, $response_code);
 }
 public function testSyncVendingMachineLoads()
 {
     $vendingMachineConnectionPath = sprintf("%s?%s", $this->getVendingMachineSerial(), $this->getVendingMachineAuthentificationString());
     $vendingMachineConnectionUrl = $this->getVendingMachineConnectionUrl($vendingMachineConnectionPath);
     $client = new Client();
     $client->request(VendingMachineLoad::getSyncMethod(), $vendingMachineConnectionUrl, [], [], ['CONTENT_TYPE' => 'application/json'], VendingMachineLoad::getData());
     $this->assertEquals(200, $client->getResponse()->getStatus());
     $this->assertEquals('null', $client->getResponse()->getContent());
 }
Exemple #5
0
 /**
  * @return null|Response
  */
 protected function getResponse()
 {
     if (!$this->client) {
         return null;
     }
     return $this->client->getResponse();
 }
 private function fetchDetails()
 {
     $url = 'https://www.twitter.com/' . $this->getAccountName();
     $client = new Client();
     $client->followRedirects();
     $crawler = $client->request('GET', $url);
     /**
      * @var Response $response
      */
     $response = $client->getResponse();
     if ($response->getStatus() != '200') {
         $this->setIsNotFound(true);
         return false;
     }
     // --
     if (stripos($response->getContent(), 'suspended')) {
         $this->setIsSuspended(true);
         return false;
     }
     // --
     $post_times = $crawler->filter('#stream-items-id li ._timestamp')->each(function (Crawler $node) {
         return $node->attr('data-time');
     });
     rsort($post_times);
     $last_post_time = $post_times[0];
     $hour_difference = round((time() - $last_post_time) / 60 / 60, 2);
     if ($hour_difference > 24) {
         // if last post was later than 24 hours
         $this->setDoesntHaveRecentPosts(true);
     }
 }
Exemple #7
0
 /**
  * @param boolean $allPages
  */
 public function run($allPages)
 {
     $client = new Client();
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 30);
     $client->setHeader('User-Agent', $this->config['user_agent']);
     try {
         $crawler = $client->request('GET', $this->config['url']);
     } catch (TransferException $e) {
         echo $e->getMessage() . PHP_EOL;
         exit(1);
     }
     if ($client->getResponse()->getStatus() == 200) {
         $this->getUrlsAndDownload($crawler);
         if ($allPages) {
             $link = $this->getNextLink($crawler);
             while ($link) {
                 $crawler = $client->click($link);
                 $this->getUrlsAndDownload($crawler);
                 $link = $this->getNextLink($crawler);
             }
         }
     } else {
         echo "site not available\n";
     }
 }
 /**
  * Call orange portal and submit credentials
  * @return [type]      [description]
  */
 protected function loginToOrange()
 {
     $config = $this->getHelperSet()->get('config');
     $this->outputMessage('Login to orange wifi ...');
     // Forge form submit as there is no button or input
     $parameters = array('username' => $config['login'], 'password' => $config['pass'], 'isCgu' => 'true', 'code' => 0, 'lang' => 'fr', 'auth' => 1, 'restrictedCode' => '', 'restrictedProfile' => 0, 'restrictedRealm' => '', 'originForm' => 'true', 'tab' => '1');
     try {
         $client = new Client();
         $crawler = $client->request('POST', 'https://hautdebitmobile.orange.fr:8443/home/wassup', $parameters);
     } catch (\Exception $e) {
         $this->outputError('Connection error : ' . $e->getMessage(), true);
         exit(1);
     }
     // If login is a success, we should have follow the redirect to orange home page
     if ($client->getRequest()->getUri() == 'http://www.orange.fr') {
         $this->outputMessage('Login success !');
     } else {
         $error_mssg = 'Login failed';
         $div_error = $crawler->filterXPath("//div[@id='loginFormWassupErrorMessage']");
         if ($div_error->count() == 1) {
             $error_mssg .= ' : ' . trim($div_error->text());
         }
         $this->outputError($error_mssg);
         // Output raw reponse if (-vv)
         if ($this->output->getVerbosity() > OutputInterface::VERBOSITY_VERBOSE) {
             echo $client->getResponse();
         }
         return 1;
     }
 }
 /**
  * return response code when crawling a given url
  * @param $url
  * @return mixed
  */
 function crawl($url)
 {
     $client = new Client();
     $client->request('GET', $url);
     $response_code = $client->getResponse()->getStatus();
     return $response_code;
 }
Exemple #10
0
 public function TestUserSeesWelcomeMessage()
 {
     $client = new Client();
     $crawler = $client->request('GET', 'http://homestead.app/');
     $this->assertEquals(200, $client->getResponse()->getStatus());
     $this->assertCount(1, $crawler->filter('h1:contains("Welcome to TODOParrot")'));
 }
Exemple #11
0
 /**
  * Crawl single URL
  * @param string $url
  * @param int    $depth
  */
 protected function traverseSingle($url, $depth)
 {
     try {
         $client = new Client();
         $client->followRedirects();
         $crawler = $client->request('GET', $url);
         $statusCode = $client->getResponse()->getStatus();
         $hash = $this->getPathFromUrl($url);
         $this->links[$hash]['status_code'] = $statusCode;
         if ($statusCode === 200) {
             $content_type = $client->getResponse()->getHeader('Content-Type');
             if (strpos($content_type, 'text/html') !== false) {
                 //traverse children in case the response in HTML document only
                 $this->extractTitleInfo($crawler, $hash);
                 $childLinks = array();
                 if (isset($this->links[$hash]['external_link']) === true && $this->links[$hash]['external_link'] === false) {
                     $childLinks = $this->extractLinksInfo($crawler, $hash);
                 }
                 $this->links[$hash]['visited'] = true;
                 $this->traverseChildren($childLinks, $depth - 1);
             }
         }
     } catch (CurlException $e) {
         $this->links[$url]['status_code'] = '404';
         $this->links[$url]['error_code'] = $e->getCode();
         $this->links[$url]['error_message'] = $e->getMessage();
     } catch (\Exception $e) {
         $this->links[$url]['status_code'] = '404';
         $this->links[$url]['error_code'] = $e->getCode();
         $this->links[$url]['error_message'] = $e->getMessage();
     }
 }
 /**
  * @param $basic
  * @return mixed
  * @throws \Exception
  */
 public function postOauth2Token($basic)
 {
     $rquestBody = 'grant_type=client_credentials';
     $this->client->setHeader('Authorization', 'Basic ' . $basic);
     $this->client->setHeader('Content-Type', 'application/x-www-form-urlencoded;charset=UTF-8');
     $this->client->request('POST', $this->getApiBaseUrl() . '/' . $this->authenticationUri, [], [], [], $rquestBody);
     /**
      * @var $response Response
      */
     $response = $this->client->getResponse();
     $decodedResponse = json_decode($response->getContent(), true);
     $lastError = json_last_error();
     if ($lastError !== JSON_ERROR_NONE) {
         throw new \Exception('An error occurred when decoding the response (Error code: ' . $lastError . ')');
     }
     return $decodedResponse;
 }
 /**
  * Funcion con la que realizaremos la deteccion de tipo de foro que nos estan solicitando
  * @param type $name
  * @return type
  */
 public function detect()
 {
     $client = new Client();
     $crawler = $client->request($this->method, $this->uri);
     $client->getResponse()->getStatus();
     // Filtramos por metas
     $crawler->filter($metas)->each(function (Crawler $node, $i) {
     });
 }
Exemple #14
0
 public function request($url, $method = 'GET', $parameters = array())
 {
     if (strpos($url, '/') === 0) {
         $url = 'http://' . $_SERVER['SERVER_NAME'] . $url;
     }
     $client = new Client();
     $client->request($method, $url, $parameters);
     return $client->getResponse()->getContent();
 }
 public function testSubmitFormOk()
 {
     $client = new Client();
     $crawler = $client->request('GET', 'http://localhost');
     $form = $crawler->selectButton('OK')->form();
     $crawler = $client->submit($form);
     $this->assertEquals(200, $client->getResponse()->getStatus());
     $this->assertEquals('Data successfully submitted', $crawler->filter('.message')->text());
 }
 /**
  *  Get Crawler for url
  * 
  *  @param string $url The url to crawl
  * 
  * @return \Goutte\Client Crawler goute client
  */
 protected function getBaseCrawler($url)
 {
     $client = new GoutteClient();
     $crawler = $client->request('GET', $url);
     $status_code = $client->getResponse()->getStatus();
     if (200 != $status_code) {
         throw new EntityClientException(sprintf('Request for url: %s, returned with status code: %s', $url, $status_code));
     }
     return $crawler;
 }
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $name = $input->getArgument('name');
     if ($name) {
         $text = 'Hello ' . $name;
     } else {
         $text = 'Hello';
     }
     if ($input->getOption('yell')) {
         $text = strtoupper($text);
     }
     //    $output->writeln($text);
     if (!($file = fopen("prijzen-" . date('Y-m-d') . ".csv", "w"))) {
         $output->writeln("can not open file");
         die;
     }
     $client = new Client();
     $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 60);
     $products = $this->getProducts();
     $counter = 1;
     foreach ($products as $product) {
         $crawler = $client->request('GET', 'http://brickwat.ch/' . $product);
         $status_code = $client->getResponse()->getStatus();
         if ($status_code == 200) {
             $result = $crawler->filter('#prices')->filter('tr.row-collapse');
             $output->writeln("Sites found for product " . $product . ": " . $result->count());
             $prices[$product] = array();
             if ($result->count()) {
                 for ($i = 0; $i < $result->count(); $i++) {
                     $price = "";
                     $company = "";
                     $values = $result->eq($i)->filter('td');
                     // Get company Name
                     if ($values->eq(0)->filter('a img')->count()) {
                         $company = $values->eq(0)->filter('a img')->attr('title');
                         $price = $values->eq(2)->filter('a')->text();
                     } else {
                         $company = $values->eq(0)->text();
                         $price = $values->eq(2)->text();
                     }
                     $prices[$product][$company] = $price;
                     $companys[$company] = $company;
                 }
             }
         }
         $counter++;
         usleep(500000);
         //if ($counter > 2)
         //   break;
     }
     $this->writeResults($output, $prices, $companys, $file);
     fclose($file);
 }
Exemple #18
0
 /**
  * @param Client $client
  */
 public function request(Client $client)
 {
     if (isset($this->provider)) {
         $class = static::$providers[$this->provider];
         if (isset($class::$endpoint)) {
             $client->request('GET', $class::$endpoint . rawurlencode($this->url));
             $this->content = json_decode($client->getResponse()->getContent());
         } else {
             $this->content = $client->request('GET', $this->url);
         }
     }
 }
Exemple #19
0
 protected function searchBundlesOnTwitter($query, array $repos, $limit)
 {
     $this->output->write(sprintf('Search "%s" on Twitter', $query));
     $url = sprintf('http://search.twitter.com/search.json?q=%s&rpp=%d', urlencode($query), 100);
     $this->browser->request('GET', $url);
     $data = $this->browser->getResponse()->getContent();
     $data = json_decode($data, true);
     $alreadyFound = array();
     if ($data) {
         $results = $data['results'];
         foreach ($results as $result) {
             $tweet = $result['text'];
             // Search urls in the tweet
             if (preg_match_all("#https?://([-\\w\\.]+)+(:\\d+)?(/([\\w/_\\.]*(\\?\\S+)?)?)?#i", $tweet, $m)) {
                 $urls = $m[0];
                 foreach ($urls as $url) {
                     $url = rtrim($url, '.');
                     if (isset($alreadyFound[$url])) {
                         continue;
                     }
                     $alreadyFound[$url] = true;
                     // The url is perhaps directly a github url
                     if (preg_match('#^https?://github.com/([^/]+/[^/]+)(/.*)?#', $url, $m)) {
                         $name = $m[1];
                         if (!$this->isValidBundleName($name)) {
                             continue;
                         }
                         $repos[strtolower($name)] = new Bundle($name);
                         // Or a redirect/multi-redirect link => we parse the resulting github page
                     } else {
                         try {
                             $html = file_get_contents($url);
                         } catch (\ErrorException $e) {
                             continue;
                         }
                         if (preg_match('#<title>([a-z0-9-_]+/[^\'"/ ]+) - GitHub</title>#i', $html, $m)) {
                             $name = $m[1];
                             if (!$this->isValidBundleName($name)) {
                                 continue;
                             }
                             $repos[strtolower($name)] = new Bundle($name);
                         }
                     }
                 }
             }
         }
     }
     $this->output->writeln('... DONE');
     return $repos;
 }
 public function getThisWeeksGifs()
 {
     $key = env('GIPHY_API_KEY');
     $client = new Client();
     $url = 'http://api.giphy.com/v1/gifs/random?api_key=' . $key . '&tag=funny&fmt=html';
     $crawler = $client->request('GET', $url);
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $date = substr(Carbon::today(), 0, 10);
     $imgUrl = $crawler->filter('img')->attr('src');
     $id = DB::table('email_articles')->where('post_date', $date)->value('article_id');
     if (Gifs::where('article_id', $id)->exists()) {
         if (Gifs::where('imgUrl', $imgUrl)->exists()) {
             $client2 = new Client();
             $url = 'http://api.giphy.com/v1/gifs/random?api_key=' . $key . '&tag=funny&fmt=html';
             $crawler2 = $client2->request('GET', $url);
             $imgUrl = $crawler2->filter('img')->attr('src');
             Gifs::where('article_id', $id)->update(['imgUrl' => $imgUrl]);
             echo "found new image";
             echo "stage 1";
         } else {
             Gifs::where('article_id', $id)->update(['imgUrl' => $imgUrl]);
             echo "Gif for " . $date . " stored!";
             echo "stage 2";
         }
     } else {
         if (Gifs::where('imgUrl', $imgUrl)->exists()) {
             $client2 = new Client();
             $url = 'http://api.giphy.com/v1/gifs/random?api_key=' . $key . '&tag=funny&fmt=html';
             $crawler2 = $client2->request('GET', $url);
             $imgUrl = $crawler2->filter('img')->attr('src');
             Gifs::where('article_id', $id)->update(['imgUrl' => $imgUrl]);
             echo "found new image";
             echo "stage 3";
         } else {
             $id = DB::table('email_articles')->where('post_date', $date)->value('article_id');
             $gif = new Gifs();
             $gif->article_id = $id;
             $gif->imgUrl = $imgUrl;
             $gif->save();
             echo "Gif for " . $date . " stored!";
             echo "stage 4";
         }
     }
     echo '<img src="' . $imgUrl . '">';
 }
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     parent::execute($input, $output);
     $this->outputMessage('Logout from orange wifi ...');
     $client = new Client();
     try {
         $crawler = $client->request('GET', 'https://hautdebitmobile.orange.fr:8443/home/disconnect');
         // Output raw reponse if (-vv)
         if ($this->output->getVerbosity() > OutputInterface::VERBOSITY_VERBOSE) {
             echo $client->getResponse();
         }
     } catch (\Exception $e) {
         $this->outputError('Connection error : ' . $e->getMessage(), true);
         exit(1);
     }
     $this->outputMessage('done');
 }
 /**
  * Metodo para capturar o captcha e viewstate para enviar no metodo
  * de consulta
  *
  * @param  string $cnpj CNPJ
  * @throws Exception
  * @return array Link para ver o Captcha e Cookie
  */
 public static function getParams()
 {
     $client = new Client();
     $client->request('GET', 'http://www.receita.fazenda.gov.br/pessoajuridica/cnpj/cnpjreva/Cnpjreva_Solicitacao2.asp');
     $response = $client->getResponse();
     $headers = $response->getHeaders();
     $cookie = $headers['Set-Cookie'][0];
     $ch = curl_init("http://www.receita.fazenda.gov.br/pessoajuridica/cnpj/cnpjreva/captcha/gerarCaptcha.asp");
     $options = array(CURLOPT_COOKIEJAR => 'cookiejar', CURLOPT_HTTPHEADER => array("Pragma: no-cache", "Origin: http://www.receita.fazenda.gov.br", "Host: www.receita.fazenda.gov.br", "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3", "Accept-Encoding: gzip, deflate", "Referer: http://www.receita.fazenda.gov.br/pessoajuridica/cnpj/cnpjreva/cnpjreva_solicitacao2.asp", "Cookie: flag=1; {$cookie}", "Connection: keep-alive"), CURLOPT_RETURNTRANSFER => true, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_BINARYTRANSFER => TRUE);
     curl_setopt_array($ch, $options);
     $img = curl_exec($ch);
     curl_close($ch);
     if (@imagecreatefromstring($img) == false) {
         throw new Exception('Não foi possível capturar o captcha');
     }
     return array('cookie' => $cookie, 'captchaBase64' => 'data:image/png;base64,' . base64_encode($img));
 }
 /**
  * @group liveTest
  */
 public function testRequestHostedEndpointLive()
 {
     $requestDTO = $this->generatePaymentRequestDTO();
     $response = $this->giropayHostedService->requestHostedEndpoint($requestDTO);
     $this->assertInstanceOf("PHPCommerce\\Payment\\Dto\\PaymentResponseDTO", $response);
     $redirectUrl = $response->getResponseMap()[GiropayConstants::HOSTED_REDIRECT_URL];
     $client = new Client();
     $crawler = $client->request('GET', $redirectUrl);
     //bank login screen, login with test data
     $this->assertEquals(200, $client->getResponse()->getStatus());
     $this->assertContains("Online-Banking: Anmelden", $client->getResponse()->__toString());
     $form = $crawler->selectButton('Sicher anmelden')->form();
     $crawler = $client->submit($form, array('account/addition[@name=benutzerkennung]' => 'sepatest1', 'ticket/pin' => '12345'));
     //
     //validation screen, just click next button
     $this->assertEquals(200, $client->getResponse()->getStatus());
     $this->assertContains("Bitte w&auml;hlen Sie eine Mobilfunknummer f&uuml;r den smsTAN-Versand", $client->getResponse()->__toString());
     $form = $crawler->selectButton('weiterButton')->form();
     $crawler = $client->submit($form, array());
     //
     //tan screen, enter tan
     $this->assertEquals(200, $client->getResponse()->getStatus());
     $this->assertContains("Bitte kontrollieren Sie vor der Eingabe der TAN die per SMS versandten Auftragsdaten", $client->getResponse()->__toString());
     $form = $crawler->selectButton('absendenButton')->form();
     $crawler = $client->submit($form, array('ticket/tan' => '123456'));
     //
     //success screen
     $this->assertEquals(200, $client->getResponse()->getStatus());
     $this->assertContains("Der Auftrag wurde entgegengenommen", $client->getResponse()->getContent()->__toString());
     $form = $crawler->selectButton('back2MerchantButton')->form();
     $crawler = $client->submit($form, array());
     //
     //redirect screen
     $this->assertEquals(200, $client->getResponse()->getStatus());
     $this->assertContains("Die R&uuml;cksprungadresse zum H&auml;ndler wird ermittelt", $client->getResponse()->getContent()->__toString());
     //wait until the redirect address is populated
     sleep(3);
     $client->followRedirects(false);
     $form = $crawler->selectButton('go')->form();
     $crawler = $client->submit($form, array());
     $redirectUrl = $client->getResponse()->getHeader('Location');
     //
 }
Exemple #24
0
 /**
  * Scrape html from url
  *
  * @return \Symfony\Component\DomCrawler\Crawler
  * @throws Exception
  */
 private function scrapeData()
 {
     try {
         $crawler = $this->client->request('GET', $this->url);
         $status_code = $this->client->getResponse()->getStatus();
         if ($status_code == 200) {
             $content_type = $this->client->getResponse()->getHeader('Content-Type');
             if (strpos($content_type, 'text/html') !== false) {
                 return $crawler;
             } else {
                 throw new Exception('Content is not html.');
             }
         }
         throw new Exception('Could get content from the url.');
     } catch (Exception $ex) {
         throw new Exception('Invalid Url.');
     }
 }
Exemple #25
0
 /**
  * Metodo para capturar o captcha e viewstate para enviar no metodo
  * de consulta
  *
  * @param  string $cnpj CPF
  * @throws Exception
  * @return array Link para ver o Captcha, Viewstate e Cookie
  */
 public static function getParams()
 {
     $client = new Client();
     $crawler = $client->request('GET', 'http://www.receita.fazenda.gov.br/aplicacoes/atcta/cpf/ConsultaPublica.asp');
     $response = $client->getResponse();
     $headers = $response->getHeaders();
     $cookie = $headers['Set-Cookie'][0];
     $ch = curl_init("http://www.receita.fazenda.gov.br/aplicacoes/atcta/cpf/captcha/gerarCaptcha.asp");
     $options = array(CURLOPT_COOKIEJAR => 'cookiejar', CURLOPT_HTTPHEADER => array("Pragma: no-cache", "Origin: http://www.receita.fazenda.gov.br", "Host: www.receita.fazenda.gov.br", "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3", "Accept-Encoding: gzip, deflate", "Referer: http://www.receita.fazenda.gov.br/aplicacoes/atcta/cpf/ConsultaPublica.asp", "Cookie: {$cookie}", "Connection: keep-alive"), CURLOPT_RETURNTRANSFER => true, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_BINARYTRANSFER => TRUE);
     curl_setopt_array($ch, $options);
     $img = curl_exec($ch);
     curl_close($ch);
     $resource = curl_init('http://www.receita.fazenda.gov.br/aplicacoes/atcta/cpf/captcha/gerarSom.asp');
     curl_setopt_array($resource, $options);
     $file = curl_exec($resource);
     curl_close($resource);
     return array('cookie' => $cookie, 'audio' => $file, 'captchaBase64' => 'data:image/png;base64,' . base64_encode($img));
 }
 public function __construct(UrlBuilder $u, LoggerInterface $logger)
 {
     $pages = array();
     // Disable `HTML` extension of CssSelector.
     CssSelector::disableHtmlExtension();
     $client = new Client();
     $crawler = $client->request('GET', (string) $u);
     $status = $client->getResponse()->getStatus();
     if ($status > 399) {
         $logger->emergency('Status ' . $status . ' getting ' . (string) $u);
     }
     $sitemap_crawler = $crawler->filter('urlset > url > loc');
     foreach ($sitemap_crawler as $url_loc) {
         $url = $url_loc->nodeValue;
         $pages[$url] = $url;
     }
     parent::__construct($pages);
 }
Exemple #27
0
 public function fillSources(OutputInterface $output, Client $client)
 {
     $url = $this->url;
     $name = $this->name;
     $client->request('GET', $url);
     /** @var Response $response */
     $response = $client->getResponse();
     $data = json_decode($response->getContent(), true, 64);
     if (!$data) {
         return;
     }
     $version =& $data['PS'][0]['build'];
     $link =& $data['PS'][0]['downloads']['linux']['link'];
     if (!strlen($version) || !strlen($link)) {
         return;
     }
     $source = new HttpSource($name, $version, $link);
     $this->sources[] = $source;
 }
 protected function searchReposOnGoogle(array $repos, $limit)
 {
     $this->output->write('Search on Google');
     $maxBatch = 5;
     $maxPage = 5;
     $pageNumber = 1;
     for ($batch = 1; $batch <= $maxBatch; $batch++) {
         for ($page = 1; $page <= $maxPage; $page++) {
             $url = sprintf('http://www.google.com/search?q=%s&start=%d', urlencode('site:github.com Symfony2 Bundle'), 1 === $pageNumber ? '' : $pageNumber);
             $crawler = $this->browser->request('GET', $url);
             $links = $crawler->filter('#center_col ol li h3 a');
             if (0 != $links->count()) {
                 $this->output->write('.');
             } else {
                 $this->output->write(sprintf(' - No link - [%s]', $this->browser->getResponse()->getStatus()));
                 break 2;
             }
             foreach ($links->extract('href') as $url) {
                 if (!preg_match('#^http://github.com/([\\w-]+/[\\w-]+).*$#', $url, $match)) {
                     continue;
                 }
                 $repo = Repo::create($match[1]);
                 $alreadyFound = false;
                 foreach ($repos as $_repo) {
                     if ($repo->getName() == $_repo->getName()) {
                         $alreadyFound = true;
                         break;
                     }
                 }
                 if (!$alreadyFound) {
                     $repos[] = $repo;
                     $this->output->write(sprintf('!'));
                 }
             }
             $pageNumber++;
             usleep(500 * 1000);
         }
         $this->output->write(sprintf('%d/%d', 10 * ($pageNumber - 1), $maxBatch * $maxPage * 10));
         sleep(2);
     }
     $this->output->writeLn(' DONE');
     return $repos;
 }
 public function scrape5CMenu()
 {
     $client = new Client();
     $crawler = $client->request('GET', 'https://aspc.pomona.edu/menu/');
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $date = substr(Carbon::today(), 0, 10);
     $crawler->filter('table')->last()->filter('tr')->each(function ($row) use($date) {
         $count = 0;
         $row->filter('td ul')->each(function ($node, $count) use($date) {
             $diningHall = substr($node->parents()->parents()->attr('id'), 0, -5);
             $node->filter('li')->each(function ($node1) use($count, $diningHall, $date) {
                 if ($count == 0) {
                     $meal = 'breakfast';
                 } elseif ($count == 1) {
                     $meal = 'lunch';
                 } else {
                     $meal = 'dinner';
                 }
                 $food = $node1->text();
                 echo "For " . $meal . " we have " . $food . " at " . $diningHall;
                 echo "<br>";
                 $count++;
                 $store_id = DB::table('stores')->where('sh_name', $diningHall)->value('store_id');
                 $id = DB::table('email_articles')->where('post_date', $date)->value('article_id');
                 if (Menu::where('foodName', $food)->where('meal', '=', $meal)->where('article_id', $id)->where('store_id', $store_id)->exists()) {
                     echo $food . " already exists for " . $meal . " on" . $date;
                 } else {
                     $entry = new Menu();
                     $entry->article_id = $id;
                     $entry->store_id = $store_id;
                     $entry->foodName = $food;
                     $entry->meal = $meal;
                     $entry->save();
                     echo $food . " saved for " . $diningHall . " id " . $store_id;
                 }
             });
         });
     });
 }
 public function scrape()
 {
     $client = new Client();
     $crawler = $client->request('GET', 'http://collins-cmc.cafebonappetit.com/cafe/collins/');
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $link = "";
     $link = $crawler->filter('div.cafe-hours span a')->link()->getUri();
     $client2 = new Client();
     $crawler2 = $client2->request('GET', "{$link}");
     // use "" so it can interpret variable
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $dt = Carbon::today()->dayOfWeek;
     //get the day of the week
     $snack = $crawler2->filter("table td#td-2051-{$dt}")->text();
     //html is associated with this id and day is determined by the integer following the "-"
 }