/** * Extracts OpenGraph informations from the given URL. * * @param string $url URL to fetch informations from. * @return array Extracted informations. */ protected function _extractInformations($url) { $attributes = $this->_Dom->extractAttributes($this->_Http->get($url), ['meta' => ['property' => '#^og:.+#i', 'content']]); $og = []; if (empty($attributes['meta'])) { throw new Exception("Unable to extract OpenGraph data from '{$url}'."); } else { foreach ($attributes['meta'] as $meta) { if (!isset($og[$meta['property']])) { $og[$meta['property']] = trim($meta['content']); } } } return $og; }
/** * Extracts an oEmbed endpoint from the given URL. * * @param string $url URL from which to extract an endpoint. * @param string $endpoint The extracted endpoint. * @param string $format The extracted format. * @return boolean If an endpoint was extracted. */ protected function _extractEndpoint($url, &$endpoint, &$format) { $attributes = $this->_Dom->extractAttributes($this->_Http->get($url), ['link' => ['rel' => '#alternate#i', 'type', 'href']]); foreach ($attributes['link'] as $link) { if (preg_match('#(?<format>json|xml)#i', $link['type'], $matches)) { $endpoint = $link['href']; $format = $matches['format']; return true; } } return false; }
/** * Extracts URLs from an HTML source. * * @param string $html The HTML source to extract URLs from. * @return array Extracted URLs. */ protected function _extractUrls($html) { $options = ['a' => 'href', 'embed' => 'src', 'iframe' => 'src']; try { $attributes = $this->_Dom->extractAttributes($html, $options); } catch (Exception $Exception) { $this->_Logger->log(Logger::notice, 'Error parsing HTML source', ['exception' => $Exception, 'html' => $html]); return []; } $urls = []; foreach ($options as $tagName => $attributeName) { foreach ($attributes[$tagName] as $tag) { $urls[] = $tag[$attributeName]; } } return $urls; }