Example #1
0
/**
 * html_entity_decode with recursion
 *
 * @param $string   string|array
 * @param $flags    integer default is ENT_COMPAT | ENT_HTML401
 * @param $encoding string default is ini_get('default_charset')
 * @return string|array
 */
function htmlEntityDecode($string, $flags = null, $encoding = null)
{
    if (!isset($flags)) {
        $flags = ENT_COMPAT | ENT_HTML401;
    }
    if (!isset($encoding)) {
        $encoding = ini_get('default_charset');
    }
    if (is_array($string)) {
        foreach ($string as $key => $value) {
            $string[$key] = htmlEntityDecode($value, $flags, $encoding);
        }
    } else {
        $string = html_entity_decode($string, $flags, $encoding);
    }
    return $string;
}
Example #2
0
 /**
  * Examine a page at {@link $Url} for title, description & images.
  *
  * Be sure to check the resultant array for any Exceptions that occurred while retrieving the page.
  *
  * @param string $url The url to examine.
  * @param integer $timeout How long to allow for this request.
  * Default Garden.SocketTimeout or 1, 0 to never timeout. Default is 0.
  * @param bool $sendCookies Whether or not to send browser cookies with the request.
  * @return array Returns an array containing Url, Title, Description, Images (array) and Exception
  * (if there were problems retrieving the page).
  */
 function fetchPageInfo($url, $timeout = 3, $sendCookies = false)
 {
     $PageInfo = array('Url' => $url, 'Title' => '', 'Description' => '', 'Images' => array(), 'Exception' => false);
     try {
         // Make sure the URL is valid.
         $urlParts = parse_url($url);
         if ($urlParts === false || !in_array(val('scheme', $urlParts), array('http', 'https'))) {
             throw new Exception('Invalid URL.', 400);
         }
         if (!defined('HDOM_TYPE_ELEMENT')) {
             require_once PATH_LIBRARY . '/vendors/simplehtmldom/simple_html_dom.php';
         }
         $Request = new ProxyRequest();
         $PageHtml = $Request->Request(array('URL' => $url, 'Timeout' => $timeout, 'Cookies' => $sendCookies));
         if (!$Request->status()) {
             throw new Exception('Couldn\'t connect to host.', 400);
         }
         $Dom = str_get_html($PageHtml);
         if (!$Dom) {
             throw new Exception('Failed to load page for parsing.');
         }
         // FIRST PASS: Look for open graph title, desc, images
         $PageInfo['Title'] = domGetContent($Dom, 'meta[property=og:title]');
         Trace('Getting og:description');
         $PageInfo['Description'] = domGetContent($Dom, 'meta[property=og:description]');
         foreach ($Dom->find('meta[property=og:image]') as $Image) {
             if (isset($Image->content)) {
                 $PageInfo['Images'][] = $Image->content;
             }
         }
         // SECOND PASS: Look in the page for title, desc, images
         if ($PageInfo['Title'] == '') {
             $PageInfo['Title'] = $Dom->find('title', 0)->plaintext;
         }
         if ($PageInfo['Description'] == '') {
             Trace('Getting meta description');
             $PageInfo['Description'] = domGetContent($Dom, 'meta[name=description]');
         }
         // THIRD PASS: Look in the page contents
         if ($PageInfo['Description'] == '') {
             foreach ($Dom->find('p') as $element) {
                 Trace('Looking at p for description.');
                 if (strlen($element->plaintext) > 150) {
                     $PageInfo['Description'] = $element->plaintext;
                     break;
                 }
             }
             if (strlen($PageInfo['Description']) > 400) {
                 $PageInfo['Description'] = SliceParagraph($PageInfo['Description'], 400);
             }
         }
         // Final: Still nothing? remove limitations
         if ($PageInfo['Description'] == '') {
             foreach ($Dom->find('p') as $element) {
                 Trace('Looking at p for description (no restrictions)');
                 if (trim($element->plaintext) != '') {
                     $PageInfo['Description'] = $element->plaintext;
                     break;
                 }
             }
         }
         // Page Images
         if (count($PageInfo['Images']) == 0) {
             $Images = domGetImages($Dom, $url);
             $PageInfo['Images'] = array_values($Images);
         }
         $PageInfo['Title'] = htmlEntityDecode($PageInfo['Title']);
         $PageInfo['Description'] = htmlEntityDecode($PageInfo['Description']);
     } catch (Exception $ex) {
         $PageInfo['Exception'] = $ex->getMessage();
     }
     return $PageInfo;
 }
Example #3
0
 /**
  * Render the entire head module.
  */
 public function toString()
 {
     // Add the canonical Url if necessary.
     if (method_exists($this->_Sender, 'CanonicalUrl') && !c('Garden.Modules.NoCanonicalUrl', false)) {
         $CanonicalUrl = $this->_Sender->canonicalUrl();
         if (!isUrl($CanonicalUrl)) {
             $CanonicalUrl = Gdn::router()->ReverseRoute($CanonicalUrl);
         }
         $this->_Sender->canonicalUrl($CanonicalUrl);
         //            $CurrentUrl = url('', true);
         //            if ($CurrentUrl != $CanonicalUrl) {
         $this->addTag('link', array('rel' => 'canonical', 'href' => $CanonicalUrl));
         //            }
     }
     // Include facebook open-graph meta information.
     if ($FbAppID = c('Plugins.Facebook.ApplicationID')) {
         $this->addTag('meta', array('property' => 'fb:app_id', 'content' => $FbAppID));
     }
     $SiteName = c('Garden.Title', '');
     if ($SiteName != '') {
         $this->addTag('meta', array('property' => 'og:site_name', 'content' => $SiteName));
     }
     $Title = htmlEntityDecode(Gdn_Format::text($this->title('', true)));
     if ($Title != '') {
         $this->addTag('meta', array('name' => 'twitter:title', 'property' => 'og:title', 'content' => $Title));
     }
     if (isset($CanonicalUrl)) {
         $this->addTag('meta', array('property' => 'og:url', 'content' => $CanonicalUrl));
     }
     if ($Description = trim(Gdn_Format::reduceWhiteSpaces($this->_Sender->Description()))) {
         $this->addTag('meta', array('name' => 'description', 'property' => 'og:description', 'content' => $Description));
     }
     $hasRelevantImage = false;
     // Default to the site logo if there were no images provided by the controller.
     if (count($this->_Sender->Image()) == 0) {
         $Logo = c('Garden.ShareImage', c('Garden.Logo', ''));
         if ($Logo != '') {
             // Fix the logo path.
             if (stringBeginsWith($Logo, 'uploads/')) {
                 $Logo = substr($Logo, strlen('uploads/'));
             }
             $Logo = Gdn_Upload::url($Logo);
             $this->addTag('meta', array('property' => 'og:image', 'content' => $Logo));
         }
     } else {
         foreach ($this->_Sender->Image() as $Img) {
             $this->addTag('meta', array('name' => 'twitter:image', 'property' => 'og:image', 'content' => $Img));
             $hasRelevantImage = true;
         }
     }
     // For the moment at least, only discussions are supported.
     if ($Title && val('DiscussionID', $this->_Sender)) {
         if ($hasRelevantImage) {
             $twitterCardType = 'summary_large_image';
         } else {
             $twitterCardType = 'summary';
         }
         // Let's force a description for the image card since it makes sense to see a card with only an image and a title.
         if (!$Description && $twitterCardType === 'summary_large_image') {
             $Description = '...';
         }
         // Card && Title && Description are required
         if ($twitterCardType && $Description) {
             $this->addTag('meta', array('name' => 'twitter:description', 'content' => $Description));
             $this->addTag('meta', array('name' => 'twitter:card', 'content' => $twitterCardType));
         }
     }
     $this->fireEvent('BeforeToString');
     $Tags = $this->_Tags;
     // Make sure that css loads before js (for jquery)
     usort($this->_Tags, array('HeadModule', 'TagCmp'));
     // "link" comes before "script"
     $Tags2 = $this->_Tags;
     // Start with the title.
     $Head = '<title>' . Gdn_Format::text($this->title()) . "</title>\n";
     $TagStrings = array();
     // Loop through each tag.
     foreach ($this->_Tags as $Index => $Attributes) {
         $Tag = $Attributes[self::TAG_KEY];
         // Inline the content of the tag, if necessary.
         if (val('_hint', $Attributes) == 'inline') {
             $Path = val('_path', $Attributes);
             if ($Path && !stringBeginsWith($Path, 'http')) {
                 $Attributes[self::CONTENT_KEY] = file_get_contents($Path);
                 if (isset($Attributes['src'])) {
                     $Attributes['_src'] = $Attributes['src'];
                     unset($Attributes['src']);
                 }
                 if (isset($Attributes['href'])) {
                     $Attributes['_href'] = $Attributes['href'];
                     unset($Attributes['href']);
                 }
             }
         }
         // If we set an IE conditional AND a "Not IE" condition, we will need to make a second pass.
         do {
             // Reset tag string
             $TagString = '';
             // IE conditional? Validates condition.
             $IESpecific = isset($Attributes['_ie']) && preg_match('/((l|g)t(e)? )?IE [0-9\\.]/', $Attributes['_ie']);
             // Only allow $NotIE if we're not doing a conditional this loop.
             $NotIE = !$IESpecific && isset($Attributes['_notie']);
             // Open IE conditional tag
             if ($IESpecific) {
                 $TagString .= '<!--[if ' . $Attributes['_ie'] . ']>';
             }
             if ($NotIE) {
                 $TagString .= '<!--[if !IE]> -->';
             }
             // Build tag
             $TagString .= '  <' . $Tag . Attribute($Attributes, '_');
             if (array_key_exists(self::CONTENT_KEY, $Attributes)) {
                 $TagString .= '>' . $Attributes[self::CONTENT_KEY] . '</' . $Tag . '>';
             } elseif ($Tag == 'script') {
                 $TagString .= '></script>';
             } else {
                 $TagString .= ' />';
             }
             // Close IE conditional tag
             if ($IESpecific) {
                 $TagString .= '<![endif]-->';
             }
             if ($NotIE) {
                 $TagString .= '<!-- <![endif]-->';
             }
             // Cleanup (prevent infinite loop)
             if ($IESpecific) {
                 unset($Attributes['_ie']);
             }
             $TagStrings[] = $TagString;
         } while ($IESpecific && isset($Attributes['_notie']));
         // We need a second pass
     }
     //endforeach
     $Head .= implode("\n", array_unique($TagStrings));
     foreach ($this->_Strings as $String) {
         $Head .= $String;
         $Head .= "\n";
     }
     return $Head;
 }
Example #4
0
 /**
  * Examine a page at {@link $Url} for title, description & images.
  *
  * Be sure to check the resultant array for any Exceptions that occurred while retrieving the page.
  *
  * @param string $url The url to examine.
  * @param integer $timeout How long to allow for this request.
  * Default Garden.SocketTimeout or 1, 0 to never timeout. Default is 0.
  * @param bool $sendCookies Whether or not to send browser cookies with the request.
  * @return array Returns an array containing Url, Title, Description, Images (array) and Exception
  * (if there were problems retrieving the page).
  */
 function fetchPageInfo($url, $timeout = 3, $sendCookies = false)
 {
     $PageInfo = array('Url' => $url, 'Title' => '', 'Description' => '', 'Images' => array(), 'Exception' => false);
     try {
         // Make sure the URL is valid.
         $urlParts = parse_url($url);
         if ($urlParts === false || !in_array(val('scheme', $urlParts), array('http', 'https'))) {
             throw new Exception('Invalid URL.', 400);
         }
         $Request = new ProxyRequest();
         $PageHtml = $Request->Request(array('URL' => $url, 'Timeout' => $timeout, 'Cookies' => $sendCookies, 'Redirects' => true));
         if (!$Request->status()) {
             throw new Exception('Couldn\'t connect to host.', 400);
         }
         $Dom = pQuery::parseStr($PageHtml);
         if (!$Dom) {
             throw new Exception('Failed to load page for parsing.');
         }
         // FIRST PASS: Look for open graph title, desc, images
         $PageInfo['Title'] = domGetContent($Dom, 'meta[property="og:title"]');
         Trace('Getting og:description');
         $PageInfo['Description'] = domGetContent($Dom, 'meta[property="og:description"]');
         foreach ($Dom->query('meta[property="og:image"]') as $Image) {
             if ($Image->attr('content')) {
                 $PageInfo['Images'][] = $Image->attr('content');
             }
         }
         // SECOND PASS: Look in the page for title, desc, images
         if ($PageInfo['Title'] == '') {
             $PageInfo['Title'] = $Dom->query('title')->text();
         }
         if ($PageInfo['Description'] == '') {
             Trace('Getting meta description');
             $PageInfo['Description'] = domGetContent($Dom, 'meta[name="description"]');
         }
         // THIRD PASS: Look in the page contents
         if ($PageInfo['Description'] == '') {
             foreach ($Dom->query('p') as $element) {
                 Trace('Looking at p for description.');
                 if (strlen($element->plaintext) > 150) {
                     $PageInfo['Description'] = $element->text();
                     break;
                 }
             }
             if (strlen($PageInfo['Description']) > 400) {
                 $PageInfo['Description'] = SliceParagraph($PageInfo['Description'], 400);
             }
         }
         // Final: Still nothing? remove limitations
         if ($PageInfo['Description'] == '') {
             foreach ($Dom->query('p') as $element) {
                 Trace('Looking at p for description (no restrictions)');
                 if (trim($element->text()) != '') {
                     $PageInfo['Description'] = $element->text();
                     break;
                 }
             }
         }
         // Page Images
         if (count($PageInfo['Images']) == 0) {
             $Images = domGetImages($Dom, $url);
             $PageInfo['Images'] = array_values($Images);
         }
         $PageInfo['Title'] = htmlEntityDecode($PageInfo['Title']);
         $PageInfo['Description'] = htmlEntityDecode($PageInfo['Description']);
     } catch (Exception $ex) {
         $PageInfo['Exception'] = $ex->getMessage();
     }
     return $PageInfo;
 }