/** * Add product data * * @param Crawler $node */ private function addProduct(Crawler $node) { $product = new \stdClass(); /** @var Crawler $title */ $title = $node->filter('h3 a'); $product->title = trim($title->text()); $product->unit_price = $this->getPrice($node->filter('.pricePerUnit')); $productCrawler = $this->client->request('GET', $title->attr('href')); $product->size = $this->bytesToKb(strlen($this->client->getResponse()->getContent())); $product->description = ''; $description = $productCrawler->filterXPath('//h3[.="Description"]'); if ($description->count() > 0) { foreach ($description->siblings() as $sibling) { // product pages have different structures! if ($sibling->tagName == 'h3') { break; } if ($product->description != "") { $product->description .= "\n"; } // @TODO address formatting issues - breaks to new lines $product->description .= trim(preg_replace("/[^\\S\r\n]+/", " ", $sibling->nodeValue)); // remove excess whitespace but not new lines } } $this->total += $product->unit_price; // increment total $this->results[] = $product; }
public function testUserSeesPageTitle() { $client = new Client(); $crawler = $client->request('GET', 'http://localhost:8000/lists'); $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertCount(1, $crawler->filter('h1:contains("Lists")')); }
public function testLoggedIn() { // we don't use the trait method here since we want our // test to span two page requests, and we need to have // the session persist on the remote server // create a web client and hit the login page $url = "http://localhost/login"; $client = new Client(); $crawler = $client->request('GET', $url); $response_code = $client->getResponse()->getStatus(); // we should get 200 back $this->assertEquals(200, $response_code); // select the form on the page and populate values // since we are using Goutte\Client, we don't need // to worry about parsing the HTML to find the csrf _token $form = $crawler->selectButton('Sign in')->form(); $form->setValues(['email' => '*****@*****.**', 'password' => 'verysecret']); // submit the form $client->submit($form); $response_code_after_submit = $client->getResponse()->getStatus(); // make sure the HTML page displayed (response code 200 $this->assertEquals(200, $response_code_after_submit); // make sure we can get to the testimonial page $client->request('GET', 'http://localhost/add-testimonial'); $response_code = $client->getResponse()->getStatus(); $this->assertEquals(200, $response_code); }
public function testSyncVendingMachineLoads() { $vendingMachineConnectionPath = sprintf("%s?%s", $this->getVendingMachineSerial(), $this->getVendingMachineAuthentificationString()); $vendingMachineConnectionUrl = $this->getVendingMachineConnectionUrl($vendingMachineConnectionPath); $client = new Client(); $client->request(VendingMachineLoad::getSyncMethod(), $vendingMachineConnectionUrl, [], [], ['CONTENT_TYPE' => 'application/json'], VendingMachineLoad::getData()); $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertEquals('null', $client->getResponse()->getContent()); }
/** * @return null|Response */ protected function getResponse() { if (!$this->client) { return null; } return $this->client->getResponse(); }
private function fetchDetails() { $url = 'https://www.twitter.com/' . $this->getAccountName(); $client = new Client(); $client->followRedirects(); $crawler = $client->request('GET', $url); /** * @var Response $response */ $response = $client->getResponse(); if ($response->getStatus() != '200') { $this->setIsNotFound(true); return false; } // -- if (stripos($response->getContent(), 'suspended')) { $this->setIsSuspended(true); return false; } // -- $post_times = $crawler->filter('#stream-items-id li ._timestamp')->each(function (Crawler $node) { return $node->attr('data-time'); }); rsort($post_times); $last_post_time = $post_times[0]; $hour_difference = round((time() - $last_post_time) / 60 / 60, 2); if ($hour_difference > 24) { // if last post was later than 24 hours $this->setDoesntHaveRecentPosts(true); } }
/** * @param boolean $allPages */ public function run($allPages) { $client = new Client(); $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 30); $client->setHeader('User-Agent', $this->config['user_agent']); try { $crawler = $client->request('GET', $this->config['url']); } catch (TransferException $e) { echo $e->getMessage() . PHP_EOL; exit(1); } if ($client->getResponse()->getStatus() == 200) { $this->getUrlsAndDownload($crawler); if ($allPages) { $link = $this->getNextLink($crawler); while ($link) { $crawler = $client->click($link); $this->getUrlsAndDownload($crawler); $link = $this->getNextLink($crawler); } } } else { echo "site not available\n"; } }
/** * Call orange portal and submit credentials * @return [type] [description] */ protected function loginToOrange() { $config = $this->getHelperSet()->get('config'); $this->outputMessage('Login to orange wifi ...'); // Forge form submit as there is no button or input $parameters = array('username' => $config['login'], 'password' => $config['pass'], 'isCgu' => 'true', 'code' => 0, 'lang' => 'fr', 'auth' => 1, 'restrictedCode' => '', 'restrictedProfile' => 0, 'restrictedRealm' => '', 'originForm' => 'true', 'tab' => '1'); try { $client = new Client(); $crawler = $client->request('POST', 'https://hautdebitmobile.orange.fr:8443/home/wassup', $parameters); } catch (\Exception $e) { $this->outputError('Connection error : ' . $e->getMessage(), true); exit(1); } // If login is a success, we should have follow the redirect to orange home page if ($client->getRequest()->getUri() == 'http://www.orange.fr') { $this->outputMessage('Login success !'); } else { $error_mssg = 'Login failed'; $div_error = $crawler->filterXPath("//div[@id='loginFormWassupErrorMessage']"); if ($div_error->count() == 1) { $error_mssg .= ' : ' . trim($div_error->text()); } $this->outputError($error_mssg); // Output raw reponse if (-vv) if ($this->output->getVerbosity() > OutputInterface::VERBOSITY_VERBOSE) { echo $client->getResponse(); } return 1; } }
/** * return response code when crawling a given url * @param $url * @return mixed */ function crawl($url) { $client = new Client(); $client->request('GET', $url); $response_code = $client->getResponse()->getStatus(); return $response_code; }
public function TestUserSeesWelcomeMessage() { $client = new Client(); $crawler = $client->request('GET', 'http://homestead.app/'); $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertCount(1, $crawler->filter('h1:contains("Welcome to TODOParrot")')); }
/** * Crawl single URL * @param string $url * @param int $depth */ protected function traverseSingle($url, $depth) { try { $client = new Client(); $client->followRedirects(); $crawler = $client->request('GET', $url); $statusCode = $client->getResponse()->getStatus(); $hash = $this->getPathFromUrl($url); $this->links[$hash]['status_code'] = $statusCode; if ($statusCode === 200) { $content_type = $client->getResponse()->getHeader('Content-Type'); if (strpos($content_type, 'text/html') !== false) { //traverse children in case the response in HTML document only $this->extractTitleInfo($crawler, $hash); $childLinks = array(); if (isset($this->links[$hash]['external_link']) === true && $this->links[$hash]['external_link'] === false) { $childLinks = $this->extractLinksInfo($crawler, $hash); } $this->links[$hash]['visited'] = true; $this->traverseChildren($childLinks, $depth - 1); } } } catch (CurlException $e) { $this->links[$url]['status_code'] = '404'; $this->links[$url]['error_code'] = $e->getCode(); $this->links[$url]['error_message'] = $e->getMessage(); } catch (\Exception $e) { $this->links[$url]['status_code'] = '404'; $this->links[$url]['error_code'] = $e->getCode(); $this->links[$url]['error_message'] = $e->getMessage(); } }
/** * @param $basic * @return mixed * @throws \Exception */ public function postOauth2Token($basic) { $rquestBody = 'grant_type=client_credentials'; $this->client->setHeader('Authorization', 'Basic ' . $basic); $this->client->setHeader('Content-Type', 'application/x-www-form-urlencoded;charset=UTF-8'); $this->client->request('POST', $this->getApiBaseUrl() . '/' . $this->authenticationUri, [], [], [], $rquestBody); /** * @var $response Response */ $response = $this->client->getResponse(); $decodedResponse = json_decode($response->getContent(), true); $lastError = json_last_error(); if ($lastError !== JSON_ERROR_NONE) { throw new \Exception('An error occurred when decoding the response (Error code: ' . $lastError . ')'); } return $decodedResponse; }
/** * Funcion con la que realizaremos la deteccion de tipo de foro que nos estan solicitando * @param type $name * @return type */ public function detect() { $client = new Client(); $crawler = $client->request($this->method, $this->uri); $client->getResponse()->getStatus(); // Filtramos por metas $crawler->filter($metas)->each(function (Crawler $node, $i) { }); }
public function request($url, $method = 'GET', $parameters = array()) { if (strpos($url, '/') === 0) { $url = 'http://' . $_SERVER['SERVER_NAME'] . $url; } $client = new Client(); $client->request($method, $url, $parameters); return $client->getResponse()->getContent(); }
public function testSubmitFormOk() { $client = new Client(); $crawler = $client->request('GET', 'http://localhost'); $form = $crawler->selectButton('OK')->form(); $crawler = $client->submit($form); $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertEquals('Data successfully submitted', $crawler->filter('.message')->text()); }
/** * Get Crawler for url * * @param string $url The url to crawl * * @return \Goutte\Client Crawler goute client */ protected function getBaseCrawler($url) { $client = new GoutteClient(); $crawler = $client->request('GET', $url); $status_code = $client->getResponse()->getStatus(); if (200 != $status_code) { throw new EntityClientException(sprintf('Request for url: %s, returned with status code: %s', $url, $status_code)); } return $crawler; }
protected function execute(InputInterface $input, OutputInterface $output) { $name = $input->getArgument('name'); if ($name) { $text = 'Hello ' . $name; } else { $text = 'Hello'; } if ($input->getOption('yell')) { $text = strtoupper($text); } // $output->writeln($text); if (!($file = fopen("prijzen-" . date('Y-m-d') . ".csv", "w"))) { $output->writeln("can not open file"); die; } $client = new Client(); $client->getClient()->setDefaultOption('config/curl/' . CURLOPT_TIMEOUT, 60); $products = $this->getProducts(); $counter = 1; foreach ($products as $product) { $crawler = $client->request('GET', 'http://brickwat.ch/' . $product); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { $result = $crawler->filter('#prices')->filter('tr.row-collapse'); $output->writeln("Sites found for product " . $product . ": " . $result->count()); $prices[$product] = array(); if ($result->count()) { for ($i = 0; $i < $result->count(); $i++) { $price = ""; $company = ""; $values = $result->eq($i)->filter('td'); // Get company Name if ($values->eq(0)->filter('a img')->count()) { $company = $values->eq(0)->filter('a img')->attr('title'); $price = $values->eq(2)->filter('a')->text(); } else { $company = $values->eq(0)->text(); $price = $values->eq(2)->text(); } $prices[$product][$company] = $price; $companys[$company] = $company; } } } $counter++; usleep(500000); //if ($counter > 2) // break; } $this->writeResults($output, $prices, $companys, $file); fclose($file); }
/** * @param Client $client */ public function request(Client $client) { if (isset($this->provider)) { $class = static::$providers[$this->provider]; if (isset($class::$endpoint)) { $client->request('GET', $class::$endpoint . rawurlencode($this->url)); $this->content = json_decode($client->getResponse()->getContent()); } else { $this->content = $client->request('GET', $this->url); } } }
protected function searchBundlesOnTwitter($query, array $repos, $limit) { $this->output->write(sprintf('Search "%s" on Twitter', $query)); $url = sprintf('http://search.twitter.com/search.json?q=%s&rpp=%d', urlencode($query), 100); $this->browser->request('GET', $url); $data = $this->browser->getResponse()->getContent(); $data = json_decode($data, true); $alreadyFound = array(); if ($data) { $results = $data['results']; foreach ($results as $result) { $tweet = $result['text']; // Search urls in the tweet if (preg_match_all("#https?://([-\\w\\.]+)+(:\\d+)?(/([\\w/_\\.]*(\\?\\S+)?)?)?#i", $tweet, $m)) { $urls = $m[0]; foreach ($urls as $url) { $url = rtrim($url, '.'); if (isset($alreadyFound[$url])) { continue; } $alreadyFound[$url] = true; // The url is perhaps directly a github url if (preg_match('#^https?://github.com/([^/]+/[^/]+)(/.*)?#', $url, $m)) { $name = $m[1]; if (!$this->isValidBundleName($name)) { continue; } $repos[strtolower($name)] = new Bundle($name); // Or a redirect/multi-redirect link => we parse the resulting github page } else { try { $html = file_get_contents($url); } catch (\ErrorException $e) { continue; } if (preg_match('#<title>([a-z0-9-_]+/[^\'"/ ]+) - GitHub</title>#i', $html, $m)) { $name = $m[1]; if (!$this->isValidBundleName($name)) { continue; } $repos[strtolower($name)] = new Bundle($name); } } } } } } $this->output->writeln('... DONE'); return $repos; }
public function getThisWeeksGifs() { $key = env('GIPHY_API_KEY'); $client = new Client(); $url = 'http://api.giphy.com/v1/gifs/random?api_key=' . $key . '&tag=funny&fmt=html'; $crawler = $client->request('GET', $url); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $date = substr(Carbon::today(), 0, 10); $imgUrl = $crawler->filter('img')->attr('src'); $id = DB::table('email_articles')->where('post_date', $date)->value('article_id'); if (Gifs::where('article_id', $id)->exists()) { if (Gifs::where('imgUrl', $imgUrl)->exists()) { $client2 = new Client(); $url = 'http://api.giphy.com/v1/gifs/random?api_key=' . $key . '&tag=funny&fmt=html'; $crawler2 = $client2->request('GET', $url); $imgUrl = $crawler2->filter('img')->attr('src'); Gifs::where('article_id', $id)->update(['imgUrl' => $imgUrl]); echo "found new image"; echo "stage 1"; } else { Gifs::where('article_id', $id)->update(['imgUrl' => $imgUrl]); echo "Gif for " . $date . " stored!"; echo "stage 2"; } } else { if (Gifs::where('imgUrl', $imgUrl)->exists()) { $client2 = new Client(); $url = 'http://api.giphy.com/v1/gifs/random?api_key=' . $key . '&tag=funny&fmt=html'; $crawler2 = $client2->request('GET', $url); $imgUrl = $crawler2->filter('img')->attr('src'); Gifs::where('article_id', $id)->update(['imgUrl' => $imgUrl]); echo "found new image"; echo "stage 3"; } else { $id = DB::table('email_articles')->where('post_date', $date)->value('article_id'); $gif = new Gifs(); $gif->article_id = $id; $gif->imgUrl = $imgUrl; $gif->save(); echo "Gif for " . $date . " stored!"; echo "stage 4"; } } echo '<img src="' . $imgUrl . '">'; }
protected function execute(InputInterface $input, OutputInterface $output) { parent::execute($input, $output); $this->outputMessage('Logout from orange wifi ...'); $client = new Client(); try { $crawler = $client->request('GET', 'https://hautdebitmobile.orange.fr:8443/home/disconnect'); // Output raw reponse if (-vv) if ($this->output->getVerbosity() > OutputInterface::VERBOSITY_VERBOSE) { echo $client->getResponse(); } } catch (\Exception $e) { $this->outputError('Connection error : ' . $e->getMessage(), true); exit(1); } $this->outputMessage('done'); }
/** * Metodo para capturar o captcha e viewstate para enviar no metodo * de consulta * * @param string $cnpj CNPJ * @throws Exception * @return array Link para ver o Captcha e Cookie */ public static function getParams() { $client = new Client(); $client->request('GET', 'http://www.receita.fazenda.gov.br/pessoajuridica/cnpj/cnpjreva/Cnpjreva_Solicitacao2.asp'); $response = $client->getResponse(); $headers = $response->getHeaders(); $cookie = $headers['Set-Cookie'][0]; $ch = curl_init("http://www.receita.fazenda.gov.br/pessoajuridica/cnpj/cnpjreva/captcha/gerarCaptcha.asp"); $options = array(CURLOPT_COOKIEJAR => 'cookiejar', CURLOPT_HTTPHEADER => array("Pragma: no-cache", "Origin: http://www.receita.fazenda.gov.br", "Host: www.receita.fazenda.gov.br", "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3", "Accept-Encoding: gzip, deflate", "Referer: http://www.receita.fazenda.gov.br/pessoajuridica/cnpj/cnpjreva/cnpjreva_solicitacao2.asp", "Cookie: flag=1; {$cookie}", "Connection: keep-alive"), CURLOPT_RETURNTRANSFER => true, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_BINARYTRANSFER => TRUE); curl_setopt_array($ch, $options); $img = curl_exec($ch); curl_close($ch); if (@imagecreatefromstring($img) == false) { throw new Exception('Não foi possível capturar o captcha'); } return array('cookie' => $cookie, 'captchaBase64' => 'data:image/png;base64,' . base64_encode($img)); }
/** * @group liveTest */ public function testRequestHostedEndpointLive() { $requestDTO = $this->generatePaymentRequestDTO(); $response = $this->giropayHostedService->requestHostedEndpoint($requestDTO); $this->assertInstanceOf("PHPCommerce\\Payment\\Dto\\PaymentResponseDTO", $response); $redirectUrl = $response->getResponseMap()[GiropayConstants::HOSTED_REDIRECT_URL]; $client = new Client(); $crawler = $client->request('GET', $redirectUrl); //bank login screen, login with test data $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertContains("Online-Banking: Anmelden", $client->getResponse()->__toString()); $form = $crawler->selectButton('Sicher anmelden')->form(); $crawler = $client->submit($form, array('account/addition[@name=benutzerkennung]' => 'sepatest1', 'ticket/pin' => '12345')); // //validation screen, just click next button $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertContains("Bitte wählen Sie eine Mobilfunknummer für den smsTAN-Versand", $client->getResponse()->__toString()); $form = $crawler->selectButton('weiterButton')->form(); $crawler = $client->submit($form, array()); // //tan screen, enter tan $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertContains("Bitte kontrollieren Sie vor der Eingabe der TAN die per SMS versandten Auftragsdaten", $client->getResponse()->__toString()); $form = $crawler->selectButton('absendenButton')->form(); $crawler = $client->submit($form, array('ticket/tan' => '123456')); // //success screen $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertContains("Der Auftrag wurde entgegengenommen", $client->getResponse()->getContent()->__toString()); $form = $crawler->selectButton('back2MerchantButton')->form(); $crawler = $client->submit($form, array()); // //redirect screen $this->assertEquals(200, $client->getResponse()->getStatus()); $this->assertContains("Die Rücksprungadresse zum Händler wird ermittelt", $client->getResponse()->getContent()->__toString()); //wait until the redirect address is populated sleep(3); $client->followRedirects(false); $form = $crawler->selectButton('go')->form(); $crawler = $client->submit($form, array()); $redirectUrl = $client->getResponse()->getHeader('Location'); // }
/** * Scrape html from url * * @return \Symfony\Component\DomCrawler\Crawler * @throws Exception */ private function scrapeData() { try { $crawler = $this->client->request('GET', $this->url); $status_code = $this->client->getResponse()->getStatus(); if ($status_code == 200) { $content_type = $this->client->getResponse()->getHeader('Content-Type'); if (strpos($content_type, 'text/html') !== false) { return $crawler; } else { throw new Exception('Content is not html.'); } } throw new Exception('Could get content from the url.'); } catch (Exception $ex) { throw new Exception('Invalid Url.'); } }
/** * Metodo para capturar o captcha e viewstate para enviar no metodo * de consulta * * @param string $cnpj CPF * @throws Exception * @return array Link para ver o Captcha, Viewstate e Cookie */ public static function getParams() { $client = new Client(); $crawler = $client->request('GET', 'http://www.receita.fazenda.gov.br/aplicacoes/atcta/cpf/ConsultaPublica.asp'); $response = $client->getResponse(); $headers = $response->getHeaders(); $cookie = $headers['Set-Cookie'][0]; $ch = curl_init("http://www.receita.fazenda.gov.br/aplicacoes/atcta/cpf/captcha/gerarCaptcha.asp"); $options = array(CURLOPT_COOKIEJAR => 'cookiejar', CURLOPT_HTTPHEADER => array("Pragma: no-cache", "Origin: http://www.receita.fazenda.gov.br", "Host: www.receita.fazenda.gov.br", "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3", "Accept-Encoding: gzip, deflate", "Referer: http://www.receita.fazenda.gov.br/aplicacoes/atcta/cpf/ConsultaPublica.asp", "Cookie: {$cookie}", "Connection: keep-alive"), CURLOPT_RETURNTRANSFER => true, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_BINARYTRANSFER => TRUE); curl_setopt_array($ch, $options); $img = curl_exec($ch); curl_close($ch); $resource = curl_init('http://www.receita.fazenda.gov.br/aplicacoes/atcta/cpf/captcha/gerarSom.asp'); curl_setopt_array($resource, $options); $file = curl_exec($resource); curl_close($resource); return array('cookie' => $cookie, 'audio' => $file, 'captchaBase64' => 'data:image/png;base64,' . base64_encode($img)); }
public function __construct(UrlBuilder $u, LoggerInterface $logger) { $pages = array(); // Disable `HTML` extension of CssSelector. CssSelector::disableHtmlExtension(); $client = new Client(); $crawler = $client->request('GET', (string) $u); $status = $client->getResponse()->getStatus(); if ($status > 399) { $logger->emergency('Status ' . $status . ' getting ' . (string) $u); } $sitemap_crawler = $crawler->filter('urlset > url > loc'); foreach ($sitemap_crawler as $url_loc) { $url = $url_loc->nodeValue; $pages[$url] = $url; } parent::__construct($pages); }
public function fillSources(OutputInterface $output, Client $client) { $url = $this->url; $name = $this->name; $client->request('GET', $url); /** @var Response $response */ $response = $client->getResponse(); $data = json_decode($response->getContent(), true, 64); if (!$data) { return; } $version =& $data['PS'][0]['build']; $link =& $data['PS'][0]['downloads']['linux']['link']; if (!strlen($version) || !strlen($link)) { return; } $source = new HttpSource($name, $version, $link); $this->sources[] = $source; }
protected function searchReposOnGoogle(array $repos, $limit) { $this->output->write('Search on Google'); $maxBatch = 5; $maxPage = 5; $pageNumber = 1; for ($batch = 1; $batch <= $maxBatch; $batch++) { for ($page = 1; $page <= $maxPage; $page++) { $url = sprintf('http://www.google.com/search?q=%s&start=%d', urlencode('site:github.com Symfony2 Bundle'), 1 === $pageNumber ? '' : $pageNumber); $crawler = $this->browser->request('GET', $url); $links = $crawler->filter('#center_col ol li h3 a'); if (0 != $links->count()) { $this->output->write('.'); } else { $this->output->write(sprintf(' - No link - [%s]', $this->browser->getResponse()->getStatus())); break 2; } foreach ($links->extract('href') as $url) { if (!preg_match('#^http://github.com/([\\w-]+/[\\w-]+).*$#', $url, $match)) { continue; } $repo = Repo::create($match[1]); $alreadyFound = false; foreach ($repos as $_repo) { if ($repo->getName() == $_repo->getName()) { $alreadyFound = true; break; } } if (!$alreadyFound) { $repos[] = $repo; $this->output->write(sprintf('!')); } } $pageNumber++; usleep(500 * 1000); } $this->output->write(sprintf('%d/%d', 10 * ($pageNumber - 1), $maxBatch * $maxPage * 10)); sleep(2); } $this->output->writeLn(' DONE'); return $repos; }
public function scrape5CMenu() { $client = new Client(); $crawler = $client->request('GET', 'https://aspc.pomona.edu/menu/'); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $date = substr(Carbon::today(), 0, 10); $crawler->filter('table')->last()->filter('tr')->each(function ($row) use($date) { $count = 0; $row->filter('td ul')->each(function ($node, $count) use($date) { $diningHall = substr($node->parents()->parents()->attr('id'), 0, -5); $node->filter('li')->each(function ($node1) use($count, $diningHall, $date) { if ($count == 0) { $meal = 'breakfast'; } elseif ($count == 1) { $meal = 'lunch'; } else { $meal = 'dinner'; } $food = $node1->text(); echo "For " . $meal . " we have " . $food . " at " . $diningHall; echo "<br>"; $count++; $store_id = DB::table('stores')->where('sh_name', $diningHall)->value('store_id'); $id = DB::table('email_articles')->where('post_date', $date)->value('article_id'); if (Menu::where('foodName', $food)->where('meal', '=', $meal)->where('article_id', $id)->where('store_id', $store_id)->exists()) { echo $food . " already exists for " . $meal . " on" . $date; } else { $entry = new Menu(); $entry->article_id = $id; $entry->store_id = $store_id; $entry->foodName = $food; $entry->meal = $meal; $entry->save(); echo $food . " saved for " . $diningHall . " id " . $store_id; } }); }); }); }
public function scrape() { $client = new Client(); $crawler = $client->request('GET', 'http://collins-cmc.cafebonappetit.com/cafe/collins/'); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $link = ""; $link = $crawler->filter('div.cafe-hours span a')->link()->getUri(); $client2 = new Client(); $crawler2 = $client2->request('GET', "{$link}"); // use "" so it can interpret variable if ($status_code == 200) { echo '200 OK<br>'; } $dt = Carbon::today()->dayOfWeek; //get the day of the week $snack = $crawler2->filter("table td#td-2051-{$dt}")->text(); //html is associated with this id and day is determined by the integer following the "-" }