/** * @param HtmlDocument $document HTML document to scan for title. * @return string */ protected function getTitle(HtmlDocument $document) { $title = $document->getMetaContent('title'); if (strlen($title) > 0) { return $title; } preg_match('/<title>(.+?)<\\/title>/mis', $document->getHtml(), $matches); return isset($matches[1]) ? $matches[1] : null; }
/** * @param HtmlDocument $document * @return bool */ protected function detectOembedLink(HtmlDocument $document) { preg_match_all('/<link.+?alternate.+?>/', $document->getHtml(), $linkElements); foreach ($linkElements[0] as $linkElement) { $typeJson = strpos($linkElement, $this::OEMBED_TYPE_JSON) !== false; $typeXml = strpos($linkElement, $this::OEMBED_TYPE_XML) !== false; if ($typeJson || $typeXml) { if (preg_match('/href=[\'"](.+?)[\'"]/', $linkElement, $attributes)) { $this->metadataType = $typeJson ? 'json' : 'xml'; $this->metadataUrl = $attributes[1]; return true; } } } return false; }
/** * @param HtmlDocument $document * @return bool */ protected function initializeDom(HtmlDocument $document) { if (!class_exists('DOMDocument')) { return false; } $this->dom = new \DOMDocument(); // Prevents parsing errors bubbling libxml_use_internal_errors(true); $result = $this->dom->loadHTML('<?xml encoding="' . $document->getEncoding() . '">' . $document->getHtml()); return $result; }