protected function _resolveTitle(Url $url) { if (!$url->isHttp()) { return $url->toString(); } $http = new Http(5); $res = $http->get($url); if (!$res->isSuccess()) { return $url->toString(); } $url = $res->url(); if (!preg_match('/^text\\/html/i', $res->header('content-type'))) { return $url->toString(); } $doc = new DOMDocument(); libxml_use_internal_errors(true); $result = $doc->loadHTML($res->body()); libxml_use_internal_errors(false); if (!$result) { return $url->toString(); } $xpath = new DOMXPath($doc); $els = $xpath->query('//title'); if (!$els->length) { return $url->toString(); } return $els->item(0)->textContent; }
public function toCanonical() { $url = clone $this; if (!$url->isHttp()) { return $url; } $http = new Http(5); $res = $http->get($url); if (!$res->isSuccess()) { return $url; } $url = $res->url(); if (!preg_match('/^text\\/html/i', $res->header('content-type'))) { return $url; } $doc = new DOMDocument(); libxml_use_internal_errors(true); $result = $doc->loadHTML($res->body()); libxml_use_internal_errors(false); if (!$result) { return $url; } $types = [['//link[@rel="canonical"]', 'href'], ['//meta[@property="og:url"]', 'content']]; $xpath = new DOMXPath($doc); foreach ($types as $type) { $els = $xpath->query($type[0]); if ($els->length) { $url = new Url($els->item(0)->getAttribute($type[1])); break; } } return $url; }