Пример #1
0
 /**
  * Returns an array with information and download
  * link for each subtitle on the given HTML
  *
  * @param  string
  * @return array
  */
 private function getSubtitlesFromHtml($html)
 {
     $crawler = new \Symfony\Component\DomCrawler\Crawler($html);
     $subs = array();
     $allSubsInPage = $crawler->filterXPath('//*[@id="buscador_detalle"]');
     $allTitlesInPage = $crawler->filterXPath('//*[@id="menu_titulo_buscador"]');
     $allTitlesInPage->filterXPath('//a')->each(function ($node, $i) use(&$subs) {
         $subs[$i + 1]['sub_link'] = $node->attr('href');
     });
     // get description of each sub
     $allSubsInPage->filterXPath('//*[@id="buscador_detalle_sub"]')->each(function ($node, $i) use(&$subs) {
         $text = trim(str_replace("(adsbygoogle = window.adsbygoogle || []).push({});", "", $node->text()));
         $subs[$i + 1]['description'] = $text;
     });
     return $subs;
 }
Пример #2
0
 public function readTitle($url)
 {
     if (!$this->enabled) {
         return null;
     }
     try {
         $content = $this->fetchUrl($url);
     } catch (\Exception $e) {
         \Yii::getLogger()->log("Crawler fetchUrl exception: {$e->getMessage()}", Logger::LEVEL_ERROR);
         return null;
     }
     try {
         $crawler = new \Symfony\Component\DomCrawler\Crawler();
         $crawler->addHtmlContent($content);
         $node = $crawler->filterXPath('html/head/title');
         if ($node->count() > 0) {
             return $node->first()->text();
         }
     } catch (\Exception $e) {
         \Yii::getLogger()->log("Crawler DOM extraction exception: {$e->getMessage()}", Logger::LEVEL_ERROR);
     }
     return null;
 }
 /**
  * Searches for an input element of type checkbox with the name $name using
  * $crawler. Contains an assertion that only one such checkbox exists within
  * the scope of $crawler.
  *
  * @param Symfony\Component\DomCrawler\Crawler $crawler
  * @param string $name
  * @param string $message
  *
  * @return Symfony\Component\DomCrawler\Crawler
  */
 public function assert_find_one_checkbox($crawler, $name, $message = '')
 {
     $query = sprintf('//input[@type="checkbox" and @name="%s"]', $name);
     $result = $crawler->filterXPath($query);
     $this->assertEquals(1, sizeof($result), $message ?: 'Failed asserting that exactly one checkbox with name' . " {$name} exists in crawler scope.");
     return $result;
 }
 /**
  * {@inheritdoc}
  */
 protected function getMetadataThumbnail(MediaInterface $media)
 {
     $url = sprintf('http://videos.sapo.pt/%s', $media->getProviderReference());
     try {
         $html = $this->browser->get($url)->getContent();
         /*
          $c = curl_init($url);
          curl_setopt($c, CURLOPT_RETURNTRANSFER, true);
          //curl_setopt(... other options you want...)
         
          $html = curl_exec($c);
          if (curl_error($c)){
          die(curl_error($c));
          }
         
          // Get the status code
          $status = curl_getinfo($c, CURLINFO_HTTP_CODE);
         
          curl_close($c);
         */
     } catch (\RuntimeException $e) {
         throw new \RuntimeException('Unable to retrieve the thumbnail information for :' . $url, null, $e);
     }
     $crawler = new \Symfony\Component\DomCrawler\Crawler($html);
     $metadata = [];
     //requires symfony css selector
     //$thumbnail_node = $crawler->filter('link[itemprop="thumbnailUrl"]');
     //use xpath
     $thumbnail_node = $crawler->filterXPath('//*[@id="body_content"]/div/article/div[2]/link[2]');
     if ($thumbnail_node->count() === 1) {
         //http://thumbs.web.sapo.io/?pic=http://cache04.stormap.sapo.pt/vidstore18/thumbnais/54/88/76/11128693_4b5Bb.jpg&crop=center&tv=2&W=1280&H=960&errorpic=http://assets.web.sapo.io/sapovideo/sv/20150903/imgs/playlist_default_thumb_error_pt.gif
         $thumbnail_url = $thumbnail_node->getNode(0)->getAttribute('href');
         $parsed_url = parse_url($thumbnail_url);
         $data = [];
         parse_str($parsed_url['query'], $data);
         if (isset($data['pic'])) {
             $metadata['thumbnail_url'] = $data['pic'];
         }
     }
     if (empty($metadata)) {
         throw new \RuntimeException('Unable to decode the video information for :' . $url);
     }
     return $metadata;
 }
Пример #5
0
#!/usr/bin/php
<?php 
$config = ['require_services' => ['sf_css_selector'], 'git_urls' => ['https://github.com/yfix/DomCrawler.git' => 'sf_dom_crawler/'], 'autoload_config' => ['sf_dom_crawler/' => 'Symfony\\Component\\DomCrawler'], 'example' => function () {
    $crawler = new \Symfony\Component\DomCrawler\Crawler();
    $crawler->addContent('<html><body><p>Hello World!</p></body></html>');
    echo $crawler->filterXPath('descendant-or-self::body/p')->text();
    echo PHP_EOL;
    echo $crawler->filter('body > p')->text();
    // require css selector
    echo PHP_EOL;
}];
if ($return_config) {
    return $config;
}
require_once __DIR__ . '/_yf_autoloader.php';
new yf_autoloader($config);