/** * Parses HTML documents OpenGraph metadata * * @param HtmlDocument $document HTML document to be parsed. * @return void */ public function handle(HtmlDocument $document) { if (strlen($document->getTitle()) == 0) { $ogTitle = $document->getMetaContent('og:title'); if (strlen($ogTitle) > 0) { $document->setTitle($ogTitle); } } if (strlen($document->getDescription()) == 0) { $ogDescription = $document->getMetaContent('og:description'); if (strlen($ogDescription) > 0) { $document->setDescription($ogDescription); } } if (strlen($document->getImage()) == 0) { $ogImage = $document->getMetaContent('og:image:secure_url') ?: $document->getMetaContent('og:image'); if (strlen($ogImage) > 0) { $document->setImage($ogImage); } } if (!$document->getExtraField('SITE_NAME')) { $ogSiteName = $document->getMetaContent('og:site_name'); if (strlen($ogSiteName) > 0) { $document->setExtraField('SITE_NAME', $ogSiteName); } } /* Not really opengraph property :), but it's placed in opengraph parser to prevent executing full parser chain just to get favicon */ if (!$document->getExtraField('FAVICON')) { if ($favicon = $document->getLinkHref('icon')) { $document->setExtraField('FAVICON', $favicon); } } }
/** * @param HtmlDocument $document HTML document to scan for title. * @return string */ protected function getTitle(HtmlDocument $document) { $title = $document->getMetaContent('title'); if (strlen($title) > 0) { return $title; } preg_match('/<title>(.+?)<\\/title>/mis', $document->getHtml(), $matches); return isset($matches[1]) ? $matches[1] : null; }