Example #1
0
 public function ingest(Media $media, Request $request, ErrorStore $errorStore)
 {
     $data = $request->getContent();
     if (!isset($data['o:source'])) {
         $errorStore->addError('o:source', 'No OEmbed URL specified');
         return;
     }
     $config = $this->getServiceLocator()->get('Config');
     $whitelist = $config['oembed']['whitelist'];
     $whitelisted = false;
     foreach ($whitelist as $regex) {
         if (preg_match($regex, $data['o:source']) === 1) {
             $whitelisted = true;
             break;
         }
     }
     if (!$whitelisted) {
         $errorStore->addError('o:source', 'Invalid OEmbed URL');
         return;
     }
     $source = $data['o:source'];
     $response = $this->makeRequest($source, 'OEmbed URL', $errorStore);
     if (!$response) {
         return;
     }
     $document = $response->getBody();
     $dom = new Query($document);
     $oEmbedLinks = $dom->queryXpath('//link[@rel="alternate" or @rel="alternative"][@type="application/json+oembed"]');
     if (!count($oEmbedLinks)) {
         $errorStore->addError('o:source', 'No OEmbed links were found at the given URI');
         return;
     }
     $oEmbedLink = $oEmbedLinks[0];
     $linkResponse = $this->makeRequest($oEmbedLink->getAttribute('href'), 'OEmbed link URL', $errorStore);
     if (!$linkResponse) {
         return;
     }
     $mediaData = json_decode($linkResponse->getBody(), true);
     if (!$mediaData) {
         $errorStore->addError('o:source', 'Error decoding OEmbed JSON');
         return;
     }
     if (isset($mediaData['thumbnail_url'])) {
         $fileManager = $this->getServiceLocator()->get('Omeka\\File\\Manager');
         $file = $this->getServiceLocator()->get('Omeka\\File');
         $this->downloadFile($mediaData['thumbnail_url'], $file->getTempPath());
         $hasThumbnails = $fileManager->storeThumbnails($file);
         if ($hasThumbnails) {
             $media->setFilename($file->getStorageName());
             $media->setHasThumbnails(true);
         }
     }
     $media->setData($mediaData);
     $media->setSource($source);
 }
Example #2
0
 /**
  * Tries discover the given URL and set $_feedUrl and $_feedFormat
  * returns FALSE if no oembed data
  *
  * @param string|false $url
  */
 private function discover($url)
 {
     static $paths = array(Oembed::FEED_FORMAT_JSON => '//link[@href][@type=\'application/json+oembed\']', Oembed::FEED_FORMAT_XML => '//link[@href][@type=\'text/xml+oembed\']');
     $this->_feedUrl = false;
     try {
         $httpClient = $this->getServiceLocator()->get('Zend\\Http\\Client');
         $httpResponse = $httpClient->setUri($url)->send();
         if ($httpResponse && $httpResponse->isSuccess()) {
             $query = new Query($httpResponse->getBody());
             foreach ($paths as $format => $path) {
                 foreach ($query->queryXpath($path) as $node) {
                     $this->_feedFormat = $format;
                     $this->_feedUrl = $node->getAttribute('href');
                     return;
                 }
             }
         }
     } catch (\Exception $ex) {
         //do nothing
     }
 }
Example #3
0
 /**
  * Execute a DOM/XPath query
  *
  * @param  string $path
  * @param  boolean $useXpath
  * @return array
  */
 private function query($path, $useXpath = false)
 {
     $response = $this->getResponse();
     $dom = new Dom\Query($response->getContent());
     if ($useXpath) {
         $dom->registerXpathNamespaces($this->xpathNamespaces);
         return $dom->queryXpath($path);
     }
     return $dom->execute($path);
 }
Example #4
0
 public function index04Action()
 {
     $dom = new Query($this->_html, 'UTF-8');
     /**Lấy tên của bộ phim */
     $nameNodes = $dom->queryXpath('//div[@class="caption"]/h3');
     $imageNodes = $dom->execute('//div[@class="thumbnail"]/img');
     $yearNodes = $dom->execute('//div[@class="caption"]/p');
     $result = array();
     /** xác định số phần tử có trong mảng */
     echo $totalItem = $nameNodes->count();
     for ($i = 0; $i < $totalItem; $i++) {
         $result[$i]['name'] = $nameNodes->current()->nodeValue;
         $result[$i]['year'] = $yearNodes->current()->nodeValue;
         $result[$i]['image'] = $imageNodes->current()->getAttribute('src');
         $nameNodes->next();
         $yearNodes->next();
         $imageNodes->next();
     }
     echo '<pre>';
     print_r($result);
     echo '</pre>';
     return false;
 }
Example #5
0
 /**
  * @param string $currentPage
  * @param string $path
  * @param bool   $getHref
  *
  * @return string|null
  */
 protected function getV2SelectedPage($currentPage, $path, $getHref = true)
 {
     $doc = new DomQuery(file_get_contents($path . 'index.html'));
     if (true === $getHref) {
         // Fetch first link
         $links = $doc->queryXpath(sprintf('//a[@href = "%s"]/parent::li/parent::ul/li[1]/a', $currentPage));
         // Check link
         if ($links->count() && $links->current()->hasAttribute('href')) {
             return $links->current()->getAttribute('href');
         }
     } else {
         // Fetch headline (component name)
         $headline = $doc->queryXpath(sprintf('//a[@href = "%s"]/parent::li/parent::ul/parent::div/parent::blockquote/parent::div/h3', $currentPage));
         // Check headline
         if ($headline->count()) {
             return str_replace('¶', '', $headline->current()->nodeValue);
         } else {
             // Fetch headline
             $headline = $doc->queryXpath(sprintf('//a[@href = "%s"]/parent::li/parent::ul/parent::div/parent::blockquote/parent::div/h2', $currentPage));
             // Check headline
             if ($headline->count()) {
                 return str_replace('¶', '', $headline->current()->nodeValue);
             }
         }
     }
     return null;
 }
Example #6
0
 public static function fromPageSource($source)
 {
     $response = new Response();
     $domQuery = new DomQuery();
     $domQuery->setDocumentHtml($source);
     $metas = array();
     $nodes = $domQuery->queryXpath('//meta');
     foreach ($nodes as $node) {
         if (!$node->hasAttribute('name') && !$node->hasAttribute('property')) {
             continue;
         }
         $name = $node->getAttribute('name') ?: $node->getAttribute('property');
         $name = strtolower($name);
         $content = $node->getAttribute('content');
         $metas[$name] = $content;
     }
     $response->getMeta()->exchangeArray($metas);
     $tags = $response->getHeadingTags();
     $h1 = array();
     $h2 = array();
     $h3 = array();
     $h4 = array();
     $h5 = array();
     $nodes = $domQuery->queryXpath('//h1');
     foreach ($nodes as $node) {
         $h1[] = $node->textContent;
     }
     $nodes = $domQuery->queryXpath('//h2');
     foreach ($nodes as $node) {
         $h2[] = $node->textContent;
     }
     $nodes = $domQuery->queryXpath('//h3');
     foreach ($nodes as $node) {
         $h3[] = $node->textContent;
     }
     $nodes = $domQuery->queryXpath('//h4');
     foreach ($nodes as $node) {
         $h4[] = $node->textContent;
     }
     $nodes = $domQuery->queryXpath('//h5');
     foreach ($nodes as $node) {
         $h5[] = $node->textContent;
     }
     $tags->offsetSet('h1', $h1);
     $tags->offsetSet('h2', $h2);
     $tags->offsetSet('h3', $h3);
     $tags->offsetSet('h4', $h4);
     $tags->offsetSet('h5', $h5);
     $node = $domQuery->queryXpath('//title')->current();
     if ($node) {
         $response->title = $node->textContent;
     }
     $img = array();
     $nodes = $domQuery->queryXpath('//img');
     foreach ($nodes as $node) {
         $img[] = $node->getAttribute('src');
     }
     $response->getImages()->exchangeArray(array_unique($img));
     $links = array();
     $nodes = $domQuery->queryXpath('//a');
     foreach ($nodes as $node) {
         if (!$node->hasAttribute('href')) {
             continue;
         }
         $href = $node->getAttribute('href');
         if (preg_match('/^#/', $href) || preg_match('/^javascript/', $href)) {
             continue;
         }
         $links[] = $href;
     }
     $response->links = array_unique($links);
     return $response;
 }