public function ingest(Media $media, Request $request, ErrorStore $errorStore) { $data = $request->getContent(); if (!isset($data['o:source'])) { $errorStore->addError('o:source', 'No OEmbed URL specified'); return; } $config = $this->getServiceLocator()->get('Config'); $whitelist = $config['oembed']['whitelist']; $whitelisted = false; foreach ($whitelist as $regex) { if (preg_match($regex, $data['o:source']) === 1) { $whitelisted = true; break; } } if (!$whitelisted) { $errorStore->addError('o:source', 'Invalid OEmbed URL'); return; } $source = $data['o:source']; $response = $this->makeRequest($source, 'OEmbed URL', $errorStore); if (!$response) { return; } $document = $response->getBody(); $dom = new Query($document); $oEmbedLinks = $dom->queryXpath('//link[@rel="alternate" or @rel="alternative"][@type="application/json+oembed"]'); if (!count($oEmbedLinks)) { $errorStore->addError('o:source', 'No OEmbed links were found at the given URI'); return; } $oEmbedLink = $oEmbedLinks[0]; $linkResponse = $this->makeRequest($oEmbedLink->getAttribute('href'), 'OEmbed link URL', $errorStore); if (!$linkResponse) { return; } $mediaData = json_decode($linkResponse->getBody(), true); if (!$mediaData) { $errorStore->addError('o:source', 'Error decoding OEmbed JSON'); return; } if (isset($mediaData['thumbnail_url'])) { $fileManager = $this->getServiceLocator()->get('Omeka\\File\\Manager'); $file = $this->getServiceLocator()->get('Omeka\\File'); $this->downloadFile($mediaData['thumbnail_url'], $file->getTempPath()); $hasThumbnails = $fileManager->storeThumbnails($file); if ($hasThumbnails) { $media->setFilename($file->getStorageName()); $media->setHasThumbnails(true); } } $media->setData($mediaData); $media->setSource($source); }
/** * Tries discover the given URL and set $_feedUrl and $_feedFormat * returns FALSE if no oembed data * * @param string|false $url */ private function discover($url) { static $paths = array(Oembed::FEED_FORMAT_JSON => '//link[@href][@type=\'application/json+oembed\']', Oembed::FEED_FORMAT_XML => '//link[@href][@type=\'text/xml+oembed\']'); $this->_feedUrl = false; try { $httpClient = $this->getServiceLocator()->get('Zend\\Http\\Client'); $httpResponse = $httpClient->setUri($url)->send(); if ($httpResponse && $httpResponse->isSuccess()) { $query = new Query($httpResponse->getBody()); foreach ($paths as $format => $path) { foreach ($query->queryXpath($path) as $node) { $this->_feedFormat = $format; $this->_feedUrl = $node->getAttribute('href'); return; } } } } catch (\Exception $ex) { //do nothing } }
/** * Execute a DOM/XPath query * * @param string $path * @param boolean $useXpath * @return array */ private function query($path, $useXpath = false) { $response = $this->getResponse(); $dom = new Dom\Query($response->getContent()); if ($useXpath) { $dom->registerXpathNamespaces($this->xpathNamespaces); return $dom->queryXpath($path); } return $dom->execute($path); }
public function index04Action() { $dom = new Query($this->_html, 'UTF-8'); /**Lấy tên của bộ phim */ $nameNodes = $dom->queryXpath('//div[@class="caption"]/h3'); $imageNodes = $dom->execute('//div[@class="thumbnail"]/img'); $yearNodes = $dom->execute('//div[@class="caption"]/p'); $result = array(); /** xác định số phần tử có trong mảng */ echo $totalItem = $nameNodes->count(); for ($i = 0; $i < $totalItem; $i++) { $result[$i]['name'] = $nameNodes->current()->nodeValue; $result[$i]['year'] = $yearNodes->current()->nodeValue; $result[$i]['image'] = $imageNodes->current()->getAttribute('src'); $nameNodes->next(); $yearNodes->next(); $imageNodes->next(); } echo '<pre>'; print_r($result); echo '</pre>'; return false; }
/** * @param string $currentPage * @param string $path * @param bool $getHref * * @return string|null */ protected function getV2SelectedPage($currentPage, $path, $getHref = true) { $doc = new DomQuery(file_get_contents($path . 'index.html')); if (true === $getHref) { // Fetch first link $links = $doc->queryXpath(sprintf('//a[@href = "%s"]/parent::li/parent::ul/li[1]/a', $currentPage)); // Check link if ($links->count() && $links->current()->hasAttribute('href')) { return $links->current()->getAttribute('href'); } } else { // Fetch headline (component name) $headline = $doc->queryXpath(sprintf('//a[@href = "%s"]/parent::li/parent::ul/parent::div/parent::blockquote/parent::div/h3', $currentPage)); // Check headline if ($headline->count()) { return str_replace('¶', '', $headline->current()->nodeValue); } else { // Fetch headline $headline = $doc->queryXpath(sprintf('//a[@href = "%s"]/parent::li/parent::ul/parent::div/parent::blockquote/parent::div/h2', $currentPage)); // Check headline if ($headline->count()) { return str_replace('¶', '', $headline->current()->nodeValue); } } } return null; }
public static function fromPageSource($source) { $response = new Response(); $domQuery = new DomQuery(); $domQuery->setDocumentHtml($source); $metas = array(); $nodes = $domQuery->queryXpath('//meta'); foreach ($nodes as $node) { if (!$node->hasAttribute('name') && !$node->hasAttribute('property')) { continue; } $name = $node->getAttribute('name') ?: $node->getAttribute('property'); $name = strtolower($name); $content = $node->getAttribute('content'); $metas[$name] = $content; } $response->getMeta()->exchangeArray($metas); $tags = $response->getHeadingTags(); $h1 = array(); $h2 = array(); $h3 = array(); $h4 = array(); $h5 = array(); $nodes = $domQuery->queryXpath('//h1'); foreach ($nodes as $node) { $h1[] = $node->textContent; } $nodes = $domQuery->queryXpath('//h2'); foreach ($nodes as $node) { $h2[] = $node->textContent; } $nodes = $domQuery->queryXpath('//h3'); foreach ($nodes as $node) { $h3[] = $node->textContent; } $nodes = $domQuery->queryXpath('//h4'); foreach ($nodes as $node) { $h4[] = $node->textContent; } $nodes = $domQuery->queryXpath('//h5'); foreach ($nodes as $node) { $h5[] = $node->textContent; } $tags->offsetSet('h1', $h1); $tags->offsetSet('h2', $h2); $tags->offsetSet('h3', $h3); $tags->offsetSet('h4', $h4); $tags->offsetSet('h5', $h5); $node = $domQuery->queryXpath('//title')->current(); if ($node) { $response->title = $node->textContent; } $img = array(); $nodes = $domQuery->queryXpath('//img'); foreach ($nodes as $node) { $img[] = $node->getAttribute('src'); } $response->getImages()->exchangeArray(array_unique($img)); $links = array(); $nodes = $domQuery->queryXpath('//a'); foreach ($nodes as $node) { if (!$node->hasAttribute('href')) { continue; } $href = $node->getAttribute('href'); if (preg_match('/^#/', $href) || preg_match('/^javascript/', $href)) { continue; } $links[] = $href; } $response->links = array_unique($links); return $response; }