示例#1
0
 /**
  * Creates a WebpageAnalyzer object.
  *
  * @param string $url The URL of the webpage
  * @param \FlameCore\Webtools\HttpClient $http The HttpClient instance to use
  * @throws \RuntimeException if the URL could not be loaded.
  */
 public function __construct($url, HttpClient $http = null)
 {
     $this->url = $url;
     $this->baseUrl = preg_replace('#^(https?://[^/]+).+$#', '\\1', $url);
     $this->localUrl = preg_replace('#^(https?://.+)/[^/]+$#', '\\1', $url);
     $http = $http ?: new HttpClient();
     $html = HtmlExplorer::fromWeb($url, $http);
     $node = $html->findFirstTag('base');
     if ($node && ($href = $node->getAttribute('href'))) {
         $this->baseUrl = trim($href, ' /');
     }
     $this->http = $http;
     $this->html = $html;
 }
示例#2
0
 /**
  * Tries to fetch data for the given content URL using discovery.
  *
  * @param string $url The content URL
  * @param array $args An array of optional extra arguments
  * @return object|false Returns the request data as an object, or FALSE on failure.
  */
 public function discover($url, array $args = [])
 {
     $url = trim($url);
     $html = HtmlExplorer::fromWeb($url, $this->http);
     $typePattern = '#^(' . join('|', array_keys($this->formats)) . ')\\+oembed$#';
     $nodes = $html->findTags('link');
     foreach ($nodes as $node) {
         $rel = strtolower(trim($node->getAttribute('rel')));
         $type = strtolower(trim($node->getAttribute('type')));
         if (in_array($rel, ['alternate', 'alternative']) && preg_match($typePattern, $type, $matches)) {
             $url = trim($node->getAttribute('href'));
             $result = $this->queryProvider($url, null, null, $args);
             if ($result->success) {
                 $parser = $this->formats[$matches[1]];
                 return $parser($result->data);
             }
         }
     }
     return false;
 }