Example #1
0
 protected function _resolveTitle(Url $url)
 {
     if (!$url->isHttp()) {
         return $url->toString();
     }
     $http = new Http(5);
     $res = $http->get($url);
     if (!$res->isSuccess()) {
         return $url->toString();
     }
     $url = $res->url();
     if (!preg_match('/^text\\/html/i', $res->header('content-type'))) {
         return $url->toString();
     }
     $doc = new DOMDocument();
     libxml_use_internal_errors(true);
     $result = $doc->loadHTML($res->body());
     libxml_use_internal_errors(false);
     if (!$result) {
         return $url->toString();
     }
     $xpath = new DOMXPath($doc);
     $els = $xpath->query('//title');
     if (!$els->length) {
         return $url->toString();
     }
     return $els->item(0)->textContent;
 }
Example #2
0
 public function toCanonical()
 {
     $url = clone $this;
     if (!$url->isHttp()) {
         return $url;
     }
     $http = new Http(5);
     $res = $http->get($url);
     if (!$res->isSuccess()) {
         return $url;
     }
     $url = $res->url();
     if (!preg_match('/^text\\/html/i', $res->header('content-type'))) {
         return $url;
     }
     $doc = new DOMDocument();
     libxml_use_internal_errors(true);
     $result = $doc->loadHTML($res->body());
     libxml_use_internal_errors(false);
     if (!$result) {
         return $url;
     }
     $types = [['//link[@rel="canonical"]', 'href'], ['//meta[@property="og:url"]', 'content']];
     $xpath = new DOMXPath($doc);
     foreach ($types as $type) {
         $els = $xpath->query($type[0]);
         if ($els->length) {
             $url = new Url($els->item(0)->getAttribute($type[1]));
             break;
         }
     }
     return $url;
 }