Esempio n. 1
0
 /**
  * @param HtmlDocument $document
  * @return string
  */
 protected function getImage(HtmlDocument $document)
 {
     $result = $document->getLinkHref('image_src');
     if (strlen($result) > 0) {
         return $result;
     }
     foreach ($this->imgElements as $imgElement) {
         if (isset($imgElement['rel']) && $imgElement['rel'] == 'image_src') {
             $result = $imgElement['src'];
             return $result;
         }
     }
     return null;
 }
Esempio n. 2
0
 /**
  * Executes chain of parsers, passing them $document
  *
  * @param HtmlDocument $document
  */
 public static function extractMetadata(HtmlDocument $document)
 {
     foreach (static::getParserChain($document->getUri()) as $parserClassName) {
         /** @var \Bitrix\Main\UrlPreview\Parser $parser */
         if (class_exists($parserClassName)) {
             $parser = new $parserClassName();
             if (is_a($parser, '\\Bitrix\\Main\\UrlPreview\\Parser')) {
                 $parser->handle($document);
             }
         }
         if ($document->checkMetadata()) {
             break;
         }
     }
 }
Esempio n. 3
0
 /**
  * @param HtmlDocument $document
  * @return bool
  */
 protected function detectOembedLink(HtmlDocument $document)
 {
     preg_match_all('/<link.+?alternate.+?>/', $document->getHtml(), $linkElements);
     foreach ($linkElements[0] as $linkElement) {
         $typeJson = strpos($linkElement, $this::OEMBED_TYPE_JSON) !== false;
         $typeXml = strpos($linkElement, $this::OEMBED_TYPE_XML) !== false;
         if ($typeJson || $typeXml) {
             if (preg_match('/href=[\'"](.+?)[\'"]/', $linkElement, $attributes)) {
                 $this->metadataType = $typeJson ? 'json' : 'xml';
                 $this->metadataUrl = $attributes[1];
                 return true;
             }
         }
     }
     return false;
 }
Esempio n. 4
0
 /**
  * @param HtmlDocument $document
  * @return bool
  */
 protected function initializeDom(HtmlDocument $document)
 {
     if (!class_exists('DOMDocument')) {
         return false;
     }
     $this->dom = new \DOMDocument();
     // Prevents parsing errors bubbling
     libxml_use_internal_errors(true);
     $result = $this->dom->loadHTML('<?xml encoding="' . $document->getEncoding() . '">' . $document->getHtml());
     return $result;
 }
Esempio n. 5
0
 /**
  * @param Uri $uri Absolute URL to get metadata for.
  * @return array|false
  */
 protected static function getRemoteUrlMetadata(Uri $uri)
 {
     $httpClient = new HttpClient();
     $httpClient->setTimeout(5);
     $httpClient->setStreamTimeout(5);
     if (!$httpClient->query('GET', $uri->getUri())) {
         return false;
     }
     if ($httpClient->getStatus() !== 200) {
         return false;
     }
     $htmlContentType = strtolower($httpClient->getHeaders()->getContentType());
     if ($htmlContentType !== 'text/html') {
         return static::getFileMetadata($httpClient->getEffectiveUrl(), $httpClient->getHeaders());
     }
     $html = $httpClient->getResult();
     $htmlDocument = new HtmlDocument($html, $uri);
     $htmlDocument->setEncoding($httpClient->getCharset());
     ParserChain::extractMetadata($htmlDocument);
     $metadata = $htmlDocument->getMetadata();
     if (is_array($metadata) && static::validateRemoteMetadata($metadata)) {
         if (isset($metadata['IMAGE']) && static::getOptionSaveImages()) {
             $metadata['IMAGE_ID'] = static::saveImage($metadata['IMAGE']);
             unset($metadata['IMAGE']);
         }
         return $metadata;
     }
     return false;
 }
Esempio n. 6
0
 /**
  * Parses HTML documents OpenGraph metadata
  *
  * @param HtmlDocument $document HTML document to be parsed.
  * @return void
  */
 public function handle(HtmlDocument $document)
 {
     if (strlen($document->getTitle()) == 0) {
         $ogTitle = $document->getMetaContent('og:title');
         if (strlen($ogTitle) > 0) {
             $document->setTitle($ogTitle);
         }
     }
     if (strlen($document->getDescription()) == 0) {
         $ogDescription = $document->getMetaContent('og:description');
         if (strlen($ogDescription) > 0) {
             $document->setDescription($ogDescription);
         }
     }
     if (strlen($document->getImage()) == 0) {
         $ogImage = $document->getMetaContent('og:image:secure_url') ?: $document->getMetaContent('og:image');
         if (strlen($ogImage) > 0) {
             $document->setImage($ogImage);
         }
     }
     if (!$document->getExtraField('SITE_NAME')) {
         $ogSiteName = $document->getMetaContent('og:site_name');
         if (strlen($ogSiteName) > 0) {
             $document->setExtraField('SITE_NAME', $ogSiteName);
         }
     }
     /*	Not really opengraph property :), but it's placed in opengraph parser to prevent executing full parser chain
     			just to get favicon */
     if (!$document->getExtraField('FAVICON')) {
         if ($favicon = $document->getLinkHref('icon')) {
             $document->setExtraField('FAVICON', $favicon);
         }
     }
 }