public function postCrawler() { $input = $this->input(); $url = $input['url']; $ch = curl_init(); // Now set some options (most are optional) // Set URL to download curl_setopt($ch, CURLOPT_URL, $url); // Set a referer curl_setopt($ch, CURLOPT_REFERER, "http://www.example.org/yay.htm"); // User agent curl_setopt($ch, CURLOPT_USERAGENT, "MozillaXYZ/1.0"); // Include header in result? (0 = yes, 1 = no) curl_setopt($ch, CURLOPT_HEADER, 0); // Should cURL return or print out the data? (true = return, false = print) curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // Timeout in seconds curl_setopt($ch, CURLOPT_TIMEOUT, 10); // Download the given URL, and return output $output = curl_exec($ch); // Close the cURL resource, and free system resources curl_close($ch); $dom = new \Symfony\Component\DomCrawler\Crawler($output); $listVideo = $dom->filter('div.videoList > div.videoListItem > div.thumb a.play')->children(); foreach ($listVideo as $li) { $name = $li->getAttribute('alt'); $src = $li->getAttribute('src'); } }
public function testCssAndJs() { $result = $this->request('GET', 'index.php', array())->getContent(); $this->assertNotEmpty($result); $crawler = new \Symfony\Component\DomCrawler\Crawler($result); $this->assertCount(1, $crawler->filter('html')); $this->assertCount(1, $crawler->filter('html > head > link[rel="stylesheet"]')); $this->assertCount(1, $crawler->filter('html > body > script[type="text/javascript"][src="/tubepress/web/js/tubepress.js"]')); }
/** * Fetch this comic */ public function fetch() { $html = @file_get_contents($this->url); if ($html) { $crawler = new \Symfony\Component\DomCrawler\Crawler(); $crawler->addContent($html); $f = $crawler->filter($this->filter); if ($f->count()) { $this->imageUrl = $this->prefix . $f->attr($this->attr); } } }
public static function getComponentText($page_contents, $component_selector = null) { if (is_null($component_selector)) { throw new Exception("Component selector cannot be null!", 1); } $crawler = new Symfony\Component\DomCrawler\Crawler($page_contents); $component = $crawler->filter($component_selector)->eq(0)->text(); if (self::$debug_mode) { echo "Component text:" . PHP_EOL; print_r($component); } return $component; }
public function testFailMessageResponseWithoutUrl() { $this->constraint = new Codeception\PHPUnit\Constraint\CrawlerNot('warcraft'); $nodes = new Symfony\Component\DomCrawler\Crawler('<p>Bye world</p><p>Bye warcraft</p>'); try { $this->constraint->evaluate($nodes->filter('p'), 'selector'); } catch (PHPUnit_Framework_AssertionFailedError $fail) { $this->assertContains("There was 'selector' element", $fail->getMessage()); $this->assertNotContains("There was 'selector' element on page <bold>/user</bold>", $fail->getMessage()); return; } $this->fail("should have failed, but not"); }
function pulisciTabellaPerPapa($html) { return $html; $crawler = new Symfony\Component\DomCrawler\Crawler($html); //dump($html); $crawler->filter('table tr')->reduce(function ($node, $i) { $tipologia = $node->filter('td')->eq(0)->text(); $remove = !in_array(strtolower($tipologia), ['bibite', 'aggiunte', 'condimenti', 'panini']); return $remove; }); $crawler->clear(); dd($crawler->html()); return dd(); }
/** * Returns an array with information and download * link for each subtitle on the given HTML * * @param string * @return array */ private function getSubtitlesFromHtml($html) { $crawler = new \Symfony\Component\DomCrawler\Crawler($html); $subs = array(); $allSubsInPage = $crawler->filterXPath('//*[@id="buscador_detalle"]'); $allTitlesInPage = $crawler->filterXPath('//*[@id="menu_titulo_buscador"]'); $allTitlesInPage->filterXPath('//a')->each(function ($node, $i) use(&$subs) { $subs[$i + 1]['sub_link'] = $node->attr('href'); }); // get description of each sub $allSubsInPage->filterXPath('//*[@id="buscador_detalle_sub"]')->each(function ($node, $i) use(&$subs) { $text = trim(str_replace("(adsbygoogle = window.adsbygoogle || []).push({});", "", $node->text())); $subs[$i + 1]['description'] = $text; }); return $subs; }
function pulisciTabellaPerPapa($html) { $crawler = new Symfony\Component\DomCrawler\Crawler($html); $res = $crawler->filter('table tr')->reduce(function ($node) { $tipologia = $node->filter('td')->eq(0)->text(); $remove = in_array(strtolower($tipologia), ['bibite', 'aggiunte', 'condimenti', 'panini imbottiti', 'frutta e dessert']); return $remove; // if($remove) { // dd(get_class_methods($node)); // $node->extract(); // // $node->parents()->removeChild($node); // } }); //die; dd($res->html()); die; return dd(); }
protected function execute(InputInterface $input, OutputInterface $output) { $gmail = new \Jambon\GmailClient\GmailClient(); $emails = $gmail->readEmails(['maxResults' => $input->getArgument('entries'), 'labelIds' => 'INBOX', 'includeSpamTrash' => true, 'q' => 'from:serviziomensa@mosaicoon.com conferma menu ' . $input->getArgument('extraString')], function (array $email) use($output) { $content = $email['body']['html'][0]; // -------------------------------------------------------------------- preg_match("/[0-9]{1,2}\\/[0-9]{1,2}\\/[0-9]{4}/", $content, $matched); list($d, $m, $y) = explode('/', $matched[0]); $menuDate = Carbon::parse("{$y}-{$m}-{$d} 00:00:00"); $output->writeLn("Importando Menu del " . $matched[0]); // -------------------------------------------------------------------- if ($menu = Menu::where(['start_date' => $menuDate, 'type' => Menu::TYPE_PRANZO])->count() > 0) { //menu già registrato $output->writeLn("MENU del " . $matched[0] . " GIA SALVATO"); return; } $menu = Menu::write($menuDate, $menuDate->copy()->addHours(14)); $crawler = new \Symfony\Component\DomCrawler\Crawler(); $crawler->addHTMLContent($content, 'UTF-8'); $crawler->filter('table')->eq(0)->filter('tr')->each(function ($tr, $i) use($menu) { if ($i === 0) { return; } $prodData = []; $tr->filter('td')->each(function ($td, $i) use(&$prodData) { if ($i == 0) { $prodData['category'] = str_replace(['<', '>', '[', ']'], null, $td->text()); } elseif ($i == 1) { $desc = trim(str_replace(['[NUOVO]', '(NUOVO)', '(Nuovo)'], null, $td->text())); $prodData['description'] = strip_tags($desc); } elseif ($i == 2) { $prodData['price'] = floatval(ltrim($td->text(), '€')); } }); //prevent insert injection code if (in_array($prodData['category'], ['', null, 'code', 'c'])) { return; } $menu->addProduct(Product::firstOrCreate($prodData)); echo '.'; }); $output->writeLn(" "); }); }
public function readTitle($url) { if (!$this->enabled) { return null; } try { $content = $this->fetchUrl($url); } catch (\Exception $e) { \Yii::getLogger()->log("Crawler fetchUrl exception: {$e->getMessage()}", Logger::LEVEL_ERROR); return null; } try { $crawler = new \Symfony\Component\DomCrawler\Crawler(); $crawler->addHtmlContent($content); $node = $crawler->filterXPath('html/head/title'); if ($node->count() > 0) { return $node->first()->text(); } } catch (\Exception $e) { \Yii::getLogger()->log("Crawler DOM extraction exception: {$e->getMessage()}", Logger::LEVEL_ERROR); } return null; }
/** * @Then /^"([^"]*)" should have the class "([^"]*)"$/ */ public function shouldHaveTheClass($selector, $arg2) { $crawler = new \Symfony\Component\DomCrawler\Crawler($this->result); $class = $crawler->filter($selector)->attr('class'); if (strpos($class, $arg2) === false) { throw new Exception(sprintf('Expected class `%s` does not match `%s`', $arg2, $class)); } }
/** * Searches for an input element of type checkbox with the name $name using * $crawler. Contains an assertion that only one such checkbox exists within * the scope of $crawler. * * @param Symfony\Component\DomCrawler\Crawler $crawler * @param string $name * @param string $message * * @return Symfony\Component\DomCrawler\Crawler */ public function assert_find_one_checkbox($crawler, $name, $message = '') { $query = sprintf('//input[@type="checkbox" and @name="%s"]', $name); $result = $crawler->filterXPath($query); $this->assertEquals(1, sizeof($result), $message ?: 'Failed asserting that exactly one checkbox with name' . " {$name} exists in crawler scope."); return $result; }
/** * Tests that no dividers are rendered if the divider is set to `null`. * * @dataProvider crumbsWithCssClassesProvider */ public function testOutputWithoutDividers($crumbs, $classes) { $b = new Breadcrumbs($crumbs, $classes); $b->setDivider(null); $crawler = new Symfony\Component\DomCrawler\Crawler($b->render()); /** * There should be no `span.divider` elements present. */ Assert::count(0, $crawler->filter('span.divider')); }
public function load__cashex_xml($options = null) { // import options is_array($options) && extract($options, EXTR_PREFIX_ALL | EXTR_REFS, ''); // var $api = $this->api; $payment_api = $this->payment_api; // prepare request options $url = 'http://api.cashex.com.ua/XmlApi.ashx'; $request_options = ['is_redirect' => true, 'is_response_raw' => true]; @$_request_options && ($request_options = array_replace_recursive($request_options, $_request_options)); $result = $api->_request($url, null, $request_options); list($status, $response) = $result; if (empty($status)) { return $result; } require_php_lib('sf_dom_crawler'); $crawler = new \Symfony\Component\DomCrawler\Crawler($response); $table = $crawler->filter('element'); $count = $table->count(); if ($count < 1) { return null; } $currencies = $payment_api->currencies; $data = []; $table->each(function ($node, $i) use(&$currencies, &$data) { $currency_id = $node->filter('currency')->text(); if (empty($currencies[$currency_id])) { return; } $buy = $node->filter('buy')->text(); $sale = $node->filter('sale')->text(); $data[] = ['from' => $currency_id, 'to' => 'UAH', 'from_value' => 1, 'to_value' => $buy]; $data[] = ['from' => 'UAH', 'to' => $currency_id, 'from_value' => $sale, 'to_value' => 1]; }); return $data; }
public static function formatResponse($response) { if (strlen($response) <= 500) { $response = trim($response); $response = preg_replace('/\\s[\\s]+/', ' ', $response); // strip spaces $response = str_replace("\n", '', $response); return $response; } if (strpos($response, '<html') !== false) { $formatted = 'page ['; $crawler = new \Symfony\Component\DomCrawler\Crawler($response); $title = $crawler->filter('title'); if (count($title)) { $formatted .= "Title: " . trim($title->first()->text()); } $h1 = $crawler->filter('h1'); if (count($h1)) { $formatted .= "\nH1: " . trim($h1->first()->text()); } return $formatted . "]"; } return "page."; }
/** * {@inheritdoc} */ protected function getMetadataThumbnail(MediaInterface $media) { $url = sprintf('http://videos.sapo.pt/%s', $media->getProviderReference()); try { $html = $this->browser->get($url)->getContent(); /* $c = curl_init($url); curl_setopt($c, CURLOPT_RETURNTRANSFER, true); //curl_setopt(... other options you want...) $html = curl_exec($c); if (curl_error($c)){ die(curl_error($c)); } // Get the status code $status = curl_getinfo($c, CURLINFO_HTTP_CODE); curl_close($c); */ } catch (\RuntimeException $e) { throw new \RuntimeException('Unable to retrieve the thumbnail information for :' . $url, null, $e); } /* $crawler = new \Symfony\Component\DomCrawler\Crawler($response->getContent()); */ $crawler = new \Symfony\Component\DomCrawler\Crawler($html); $metadata = []; $thumbnail_node = $crawler->filter('link[itemprop="thumbnailUrl"]'); if ($thumbnail_node->count() === 1) { //http://thumbs.web.sapo.io/?pic=http://cache04.stormap.sapo.pt/vidstore18/thumbnais/54/88/76/11128693_4b5Bb.jpg&crop=center&tv=2&W=1280&H=960&errorpic=http://assets.web.sapo.io/sapovideo/sv/20150903/imgs/playlist_default_thumb_error_pt.gif $thumbnail_url = $thumbnail_node->getNode(0)->getAttribute('href'); $parsed_url = parse_url($thumbnail_url); $data = []; parse_str($parsed_url['query'], $data); if (isset($data['pic'])) { $metadata['thumbnail_url'] = $data['pic']; } } if (empty($metadata)) { throw new \RuntimeException('Unable to decode the video information for :' . $url); } return $metadata; }
#!/usr/bin/php <?php $config = ['require_services' => ['sf_css_selector'], 'git_urls' => ['https://github.com/yfix/DomCrawler.git' => 'sf_dom_crawler/'], 'autoload_config' => ['sf_dom_crawler/' => 'Symfony\\Component\\DomCrawler'], 'example' => function () { $crawler = new \Symfony\Component\DomCrawler\Crawler(); $crawler->addContent('<html><body><p>Hello World!</p></body></html>'); echo $crawler->filterXPath('descendant-or-self::body/p')->text(); echo PHP_EOL; echo $crawler->filter('body > p')->text(); // require css selector echo PHP_EOL; }]; if ($return_config) { return $config; } require_once __DIR__ . '/_yf_autoloader.php'; new yf_autoloader($config);
/** * Parse curse.com HTML for project properties * * @param string $html * @return array */ public function parse($html) { $this->crawler->add($html); // Return null if this isn't a content page if (!$this->crawler->filter('ul.details-list .game')->exists()) { return null; } $properties = ['title' => $this->crawler->filter('meta[property="og:title"]')->attr('content'), 'game' => $this->crawler->filter('ul.details-list .game')->text(), 'category' => $this->crawler->filter('#breadcrumbs-wrapper ul.breadcrumbs li a')->eq(2)->text(), 'url' => $this->crawler->filter('meta[property="og:url"]')->attr('content'), 'thumbnail' => $this->crawler->filter('meta[property="og:image"]')->attr('content'), 'authors' => $this->crawler->filter('ul.authors li a')->each(function ($node, $i) { return $node->text(); }), 'downloads' => ['monthly' => $this->crawler->filter('ul.details-list .average-downloads')->number(), 'total' => $this->crawler->filter('ul.details-list .downloads')->number()], 'favorites' => $this->crawler->filter('ul.details-list .favorited')->number(), 'likes' => $this->crawler->filter('li.grats span.project-rater')->number(), 'updated_at' => $this->crawler->filter('ul.details-list .updated .standard-date')->eq(0)->attrAsTime('data-epoch'), 'created_at' => $this->crawler->filter('ul.details-list .updated .standard-date')->eq(1)->attrAsTime('data-epoch'), 'project_url' => $this->crawler->filter('ul.details-list .curseforge a')->attr('href'), 'release_type' => $this->crawler->filter('ul.details-list .release')->value(), 'license' => $this->crawler->filter('ul.details-list .license')->value(), 'files' => $this->crawler->filter('table.project-file-listing tr')->eachWithoutNull(function ($node, $i) { if ($i === 0) { return; } // skip the table heading return ['id' => (int) $node->filter('td a')->eq(0)->finalUrlSegment('href'), 'url' => 'http://curse.com' . $node->filter('td a')->eq(0)->attr('href'), 'name' => $node->filter('td a')->eq(0)->text(), 'type' => strtolower($node->filter('td')->eq(1)->text()), 'version' => $node->filter('td')->eq(2)->text(), 'downloads' => $node->filter('td')->eq(3)->number(), 'created_at' => $node->filter('td .standard-date')->attrAsTime('data-epoch')]; })]; return $properties; }
/** * @param mixed $content * @param string $filter * @return \Symfony\Component\DomCrawler\Crawler */ public function createNode($content, $filter = null) { $crawler = new \Symfony\Component\DomCrawler\Crawler($content); if ($filter !== null) { $node = $crawler->filter($filter); $this->assertNotEquals(null, $node); return $node; } return $crawler; }
protected function standardizeProps(&$result) { $autoColor = (int) $this->getOption('auto_card_color', 1); if ($autoColor && isset($result['favicon_colors'])) { $color = $this->getOption('default_card_color', '#D71212'); foreach ($result['favicon_colors'] as $colors) { $min = min($colors['color']); $max = max($colors['color']); $color = 'rgb(' . implode(',', $colors['color']) . ')'; if ($max - $min > 10) { break; } } $result['color'] = $color; } if (isset($result['authors']) && !empty($result['authors'])) { $result['author_name'] = $result['authors'][0]['name']; $result['author_url'] = $result['authors'][0]['url']; } if (isset($result['media']['type'])) { switch ($result['media']['type']) { case 'photo': $result['type'] = 'photo'; break; case 'video': $result['type'] = 'video'; break; } } if (isset($result['images'][0]['url'])) { $result['thumbnail_url'] = $result['images'][0]['url']; $width = $this->getMaxWidth(); if (isset($result['images'][0]['width']) && $result['images'][0]['width'] < $width - 50) { $result['thumbnail_type'] = 'small'; } $result['thumbnail_width'] = $width; } if (strtolower($result['provider_name']) == 'amazon') { if (isset($result['media']['html'])) { $crawler = new \Symfony\Component\DomCrawler\Crawler(); $crawler->addContent($result['media']['html']); try { $img = $crawler->filter('tr > td')->first()->filter('img')->attr('src'); } catch (\Exception $e) { $img = ''; } try { $subHead = $crawler->filter('span.subhead')->text(); } catch (\Exception $e) { $subHead = ''; } try { $listPrice = $crawler->filter('td.listprice')->text(); } catch (\Exception $e) { $listPrice = ''; } try { $price = $crawler->filter('td.price')->text(); } catch (\Exception $e) { $price = ''; } try { $saved = $crawler->filter('td.saved')->text(); } catch (\Exception $e) { $saved = ''; } $result['amazon'] = array('img' => $img, 'subHead' => $subHead, 'listPrice' => $listPrice, 'price' => $price, 'saved' => $saved); } else { $img = ''; if (isset($result['images'][0]['url'])) { $img = $result['images'][0]['url']; } $result['amazon'] = array('img' => $img, 'subHead' => '', 'listPrice' => '', 'price' => '', 'saved' => ''); } if ($autoColor) { $result['color'] = 'rgb(254, 167, 7)'; } } }
/** * Gets a Crawler object. * * @return \Symfony\Component\DomCrawler\Crawler */ public function getCrawler() { if (!class_exists('Symfony\\Component\\DomCrawler\\Crawler')) { trigger_error('The Symfony\\Component\\DomCrawler\\Crawler object is not installed'); return null; } $crawler = new \Symfony\Component\DomCrawler\Crawler(null, $this->getUrl()); $crawler->addContent($this->getRawContents(), $this->getContentType()); return $crawler; }
public function getBodyHtml() { return $this->crawler->html(); }
protected function execute(InputInterface $input, OutputInterface $output) { $gmail = new \Jambon\GmailClient\GmailClient(); $emails = $gmail->readEmails(['maxResults' => $input->getArgument('entries'), 'labelIds' => 'INBOX', 'includeSpamTrash' => true, 'q' => 'from:serviziomensa@mosaicoon.com invio ordine ' . $input->getArgument('extraString')], function (array $email) use($output) { $content = $email['body']['html'][0]; // -------------------------------------------------------------------- preg_match("/[0-9]{1,2}\\/[0-9]{1,2}\\/[0-9]{4}/", $content, $matched); list($d, $m, $y) = explode('/', $matched[0]); $menuDate = Carbon::parse("{$y}-{$m}-{$d} 00:00:00"); //$menuDate = "{$y}-{$m}-{$d} 00:00:00"; $output->writeLn("Importando Ordini del " . $matched[0]); // -------------------------------------------------------------------- $agency = Agency::firstOrCreate(['name' => 'Mosaicoon']); $c = Order::where(['delivery_time' => $menuDate->copy()->addHours(13), 'agency_id' => $agency->id])->count(); if ($c !== 0) { //ordine già registrato $output->writeLn("Ordine del " . $matched[0] . " GIA SALVATO"); return; } //prendi menu del giorno try { $menu = Menu::where(['start_date' => $menuDate, 'type' => Menu::TYPE_PRANZO])->firstOrFail(); } catch (\Exception $e) { $output->writeLn("Menu del " . $matched[0] . " NON PRESENTE"); return; } $crawler = new \Symfony\Component\DomCrawler\Crawler(); $crawler->addHTMLContent($content, 'UTF-8'); $crawler->filter('table')->eq(1)->filter('tr')->each(function ($tr, $i) use($menu, $agency, $menuDate) { if ($i === 0) { return; } $ordData = []; $tr->filter('td')->each(function ($td, $i) use(&$ordData) { if ($i == 0) { $ordData['user'] = $td->text(); } elseif ($i == 2) { $ordData['product-name'] = trim(str_replace(['[NUOVO]', '(NUOVO)', '(Nuovo)'], null, $td->text())); } elseif ($i == 3) { $ordData['qta'] = (int) $td->text(); } elseif ($i == 1) { $ordData['category'] = trim($td->text()); } }); if ($ordData['user'] === 'TOTALE') { return; } //prevent insert injection code if (in_array($ordData['category'], ['', null, 'code', 'c'])) { return; } try { $pid = $menu->menuProducts()->get()->lists('product_id'); $product = Product::whereIn('id', $pid)->where('description', '=', $ordData['product-name'])->firstOrFail(); } catch (\Exception $e) { //Prodotto Mancante echo 'P'; return; } //dd($product); //da rivedere... $user = User::firstOrCreate(['agency_id' => $agency->id, 'display_name' => $ordData['user'], 'role' => User::ROLE_USER]); $order = Order::firstOrCreate(['user_id' => $user->id, 'agency_id' => $agency->id, 'delivery_time' => $menuDate->copy()->addHours(13)]); try { $order->addMenuProduct($menu, $product, $ordData['qta']); } catch (\Exception $e) { //Associazione NON TROVATA echo 'X'; return; } echo '.'; }); $output->writeLn(" "); }); }