/** * Creates a WebpageAnalyzer object. * * @param string $url The URL of the webpage * @param \FlameCore\Webtools\HttpClient $http The HttpClient instance to use * @throws \RuntimeException if the URL could not be loaded. */ public function __construct($url, HttpClient $http = null) { $this->url = $url; $this->baseUrl = preg_replace('#^(https?://[^/]+).+$#', '\\1', $url); $this->localUrl = preg_replace('#^(https?://.+)/[^/]+$#', '\\1', $url); $http = $http ?: new HttpClient(); $html = HtmlExplorer::fromWeb($url, $http); $node = $html->findFirstTag('base'); if ($node && ($href = $node->getAttribute('href'))) { $this->baseUrl = trim($href, ' /'); } $this->http = $http; $this->html = $html; }
/** * Tries to fetch data for the given content URL using discovery. * * @param string $url The content URL * @param array $args An array of optional extra arguments * @return object|false Returns the request data as an object, or FALSE on failure. */ public function discover($url, array $args = []) { $url = trim($url); $html = HtmlExplorer::fromWeb($url, $this->http); $typePattern = '#^(' . join('|', array_keys($this->formats)) . ')\\+oembed$#'; $nodes = $html->findTags('link'); foreach ($nodes as $node) { $rel = strtolower(trim($node->getAttribute('rel'))); $type = strtolower(trim($node->getAttribute('type'))); if (in_array($rel, ['alternate', 'alternative']) && preg_match($typePattern, $type, $matches)) { $url = trim($node->getAttribute('href')); $result = $this->queryProvider($url, null, null, $args); if ($result->success) { $parser = $this->formats[$matches[1]]; return $parser($result->data); } } } return false; }