/** * html_entity_decode with recursion * * @param $string string|array * @param $flags integer default is ENT_COMPAT | ENT_HTML401 * @param $encoding string default is ini_get('default_charset') * @return string|array */ function htmlEntityDecode($string, $flags = null, $encoding = null) { if (!isset($flags)) { $flags = ENT_COMPAT | ENT_HTML401; } if (!isset($encoding)) { $encoding = ini_get('default_charset'); } if (is_array($string)) { foreach ($string as $key => $value) { $string[$key] = htmlEntityDecode($value, $flags, $encoding); } } else { $string = html_entity_decode($string, $flags, $encoding); } return $string; }
/** * Examine a page at {@link $Url} for title, description & images. * * Be sure to check the resultant array for any Exceptions that occurred while retrieving the page. * * @param string $url The url to examine. * @param integer $timeout How long to allow for this request. * Default Garden.SocketTimeout or 1, 0 to never timeout. Default is 0. * @param bool $sendCookies Whether or not to send browser cookies with the request. * @return array Returns an array containing Url, Title, Description, Images (array) and Exception * (if there were problems retrieving the page). */ function fetchPageInfo($url, $timeout = 3, $sendCookies = false) { $PageInfo = array('Url' => $url, 'Title' => '', 'Description' => '', 'Images' => array(), 'Exception' => false); try { // Make sure the URL is valid. $urlParts = parse_url($url); if ($urlParts === false || !in_array(val('scheme', $urlParts), array('http', 'https'))) { throw new Exception('Invalid URL.', 400); } if (!defined('HDOM_TYPE_ELEMENT')) { require_once PATH_LIBRARY . '/vendors/simplehtmldom/simple_html_dom.php'; } $Request = new ProxyRequest(); $PageHtml = $Request->Request(array('URL' => $url, 'Timeout' => $timeout, 'Cookies' => $sendCookies)); if (!$Request->status()) { throw new Exception('Couldn\'t connect to host.', 400); } $Dom = str_get_html($PageHtml); if (!$Dom) { throw new Exception('Failed to load page for parsing.'); } // FIRST PASS: Look for open graph title, desc, images $PageInfo['Title'] = domGetContent($Dom, 'meta[property=og:title]'); Trace('Getting og:description'); $PageInfo['Description'] = domGetContent($Dom, 'meta[property=og:description]'); foreach ($Dom->find('meta[property=og:image]') as $Image) { if (isset($Image->content)) { $PageInfo['Images'][] = $Image->content; } } // SECOND PASS: Look in the page for title, desc, images if ($PageInfo['Title'] == '') { $PageInfo['Title'] = $Dom->find('title', 0)->plaintext; } if ($PageInfo['Description'] == '') { Trace('Getting meta description'); $PageInfo['Description'] = domGetContent($Dom, 'meta[name=description]'); } // THIRD PASS: Look in the page contents if ($PageInfo['Description'] == '') { foreach ($Dom->find('p') as $element) { Trace('Looking at p for description.'); if (strlen($element->plaintext) > 150) { $PageInfo['Description'] = $element->plaintext; break; } } if (strlen($PageInfo['Description']) > 400) { $PageInfo['Description'] = SliceParagraph($PageInfo['Description'], 400); } } // Final: Still nothing? remove limitations if ($PageInfo['Description'] == '') { foreach ($Dom->find('p') as $element) { Trace('Looking at p for description (no restrictions)'); if (trim($element->plaintext) != '') { $PageInfo['Description'] = $element->plaintext; break; } } } // Page Images if (count($PageInfo['Images']) == 0) { $Images = domGetImages($Dom, $url); $PageInfo['Images'] = array_values($Images); } $PageInfo['Title'] = htmlEntityDecode($PageInfo['Title']); $PageInfo['Description'] = htmlEntityDecode($PageInfo['Description']); } catch (Exception $ex) { $PageInfo['Exception'] = $ex->getMessage(); } return $PageInfo; }
/** * Render the entire head module. */ public function toString() { // Add the canonical Url if necessary. if (method_exists($this->_Sender, 'CanonicalUrl') && !c('Garden.Modules.NoCanonicalUrl', false)) { $CanonicalUrl = $this->_Sender->canonicalUrl(); if (!isUrl($CanonicalUrl)) { $CanonicalUrl = Gdn::router()->ReverseRoute($CanonicalUrl); } $this->_Sender->canonicalUrl($CanonicalUrl); // $CurrentUrl = url('', true); // if ($CurrentUrl != $CanonicalUrl) { $this->addTag('link', array('rel' => 'canonical', 'href' => $CanonicalUrl)); // } } // Include facebook open-graph meta information. if ($FbAppID = c('Plugins.Facebook.ApplicationID')) { $this->addTag('meta', array('property' => 'fb:app_id', 'content' => $FbAppID)); } $SiteName = c('Garden.Title', ''); if ($SiteName != '') { $this->addTag('meta', array('property' => 'og:site_name', 'content' => $SiteName)); } $Title = htmlEntityDecode(Gdn_Format::text($this->title('', true))); if ($Title != '') { $this->addTag('meta', array('name' => 'twitter:title', 'property' => 'og:title', 'content' => $Title)); } if (isset($CanonicalUrl)) { $this->addTag('meta', array('property' => 'og:url', 'content' => $CanonicalUrl)); } if ($Description = trim(Gdn_Format::reduceWhiteSpaces($this->_Sender->Description()))) { $this->addTag('meta', array('name' => 'description', 'property' => 'og:description', 'content' => $Description)); } $hasRelevantImage = false; // Default to the site logo if there were no images provided by the controller. if (count($this->_Sender->Image()) == 0) { $Logo = c('Garden.ShareImage', c('Garden.Logo', '')); if ($Logo != '') { // Fix the logo path. if (stringBeginsWith($Logo, 'uploads/')) { $Logo = substr($Logo, strlen('uploads/')); } $Logo = Gdn_Upload::url($Logo); $this->addTag('meta', array('property' => 'og:image', 'content' => $Logo)); } } else { foreach ($this->_Sender->Image() as $Img) { $this->addTag('meta', array('name' => 'twitter:image', 'property' => 'og:image', 'content' => $Img)); $hasRelevantImage = true; } } // For the moment at least, only discussions are supported. if ($Title && val('DiscussionID', $this->_Sender)) { if ($hasRelevantImage) { $twitterCardType = 'summary_large_image'; } else { $twitterCardType = 'summary'; } // Let's force a description for the image card since it makes sense to see a card with only an image and a title. if (!$Description && $twitterCardType === 'summary_large_image') { $Description = '...'; } // Card && Title && Description are required if ($twitterCardType && $Description) { $this->addTag('meta', array('name' => 'twitter:description', 'content' => $Description)); $this->addTag('meta', array('name' => 'twitter:card', 'content' => $twitterCardType)); } } $this->fireEvent('BeforeToString'); $Tags = $this->_Tags; // Make sure that css loads before js (for jquery) usort($this->_Tags, array('HeadModule', 'TagCmp')); // "link" comes before "script" $Tags2 = $this->_Tags; // Start with the title. $Head = '<title>' . Gdn_Format::text($this->title()) . "</title>\n"; $TagStrings = array(); // Loop through each tag. foreach ($this->_Tags as $Index => $Attributes) { $Tag = $Attributes[self::TAG_KEY]; // Inline the content of the tag, if necessary. if (val('_hint', $Attributes) == 'inline') { $Path = val('_path', $Attributes); if ($Path && !stringBeginsWith($Path, 'http')) { $Attributes[self::CONTENT_KEY] = file_get_contents($Path); if (isset($Attributes['src'])) { $Attributes['_src'] = $Attributes['src']; unset($Attributes['src']); } if (isset($Attributes['href'])) { $Attributes['_href'] = $Attributes['href']; unset($Attributes['href']); } } } // If we set an IE conditional AND a "Not IE" condition, we will need to make a second pass. do { // Reset tag string $TagString = ''; // IE conditional? Validates condition. $IESpecific = isset($Attributes['_ie']) && preg_match('/((l|g)t(e)? )?IE [0-9\\.]/', $Attributes['_ie']); // Only allow $NotIE if we're not doing a conditional this loop. $NotIE = !$IESpecific && isset($Attributes['_notie']); // Open IE conditional tag if ($IESpecific) { $TagString .= '<!--[if ' . $Attributes['_ie'] . ']>'; } if ($NotIE) { $TagString .= '<!--[if !IE]> -->'; } // Build tag $TagString .= ' <' . $Tag . Attribute($Attributes, '_'); if (array_key_exists(self::CONTENT_KEY, $Attributes)) { $TagString .= '>' . $Attributes[self::CONTENT_KEY] . '</' . $Tag . '>'; } elseif ($Tag == 'script') { $TagString .= '></script>'; } else { $TagString .= ' />'; } // Close IE conditional tag if ($IESpecific) { $TagString .= '<![endif]-->'; } if ($NotIE) { $TagString .= '<!-- <![endif]-->'; } // Cleanup (prevent infinite loop) if ($IESpecific) { unset($Attributes['_ie']); } $TagStrings[] = $TagString; } while ($IESpecific && isset($Attributes['_notie'])); // We need a second pass } //endforeach $Head .= implode("\n", array_unique($TagStrings)); foreach ($this->_Strings as $String) { $Head .= $String; $Head .= "\n"; } return $Head; }
/** * Examine a page at {@link $Url} for title, description & images. * * Be sure to check the resultant array for any Exceptions that occurred while retrieving the page. * * @param string $url The url to examine. * @param integer $timeout How long to allow for this request. * Default Garden.SocketTimeout or 1, 0 to never timeout. Default is 0. * @param bool $sendCookies Whether or not to send browser cookies with the request. * @return array Returns an array containing Url, Title, Description, Images (array) and Exception * (if there were problems retrieving the page). */ function fetchPageInfo($url, $timeout = 3, $sendCookies = false) { $PageInfo = array('Url' => $url, 'Title' => '', 'Description' => '', 'Images' => array(), 'Exception' => false); try { // Make sure the URL is valid. $urlParts = parse_url($url); if ($urlParts === false || !in_array(val('scheme', $urlParts), array('http', 'https'))) { throw new Exception('Invalid URL.', 400); } $Request = new ProxyRequest(); $PageHtml = $Request->Request(array('URL' => $url, 'Timeout' => $timeout, 'Cookies' => $sendCookies, 'Redirects' => true)); if (!$Request->status()) { throw new Exception('Couldn\'t connect to host.', 400); } $Dom = pQuery::parseStr($PageHtml); if (!$Dom) { throw new Exception('Failed to load page for parsing.'); } // FIRST PASS: Look for open graph title, desc, images $PageInfo['Title'] = domGetContent($Dom, 'meta[property="og:title"]'); Trace('Getting og:description'); $PageInfo['Description'] = domGetContent($Dom, 'meta[property="og:description"]'); foreach ($Dom->query('meta[property="og:image"]') as $Image) { if ($Image->attr('content')) { $PageInfo['Images'][] = $Image->attr('content'); } } // SECOND PASS: Look in the page for title, desc, images if ($PageInfo['Title'] == '') { $PageInfo['Title'] = $Dom->query('title')->text(); } if ($PageInfo['Description'] == '') { Trace('Getting meta description'); $PageInfo['Description'] = domGetContent($Dom, 'meta[name="description"]'); } // THIRD PASS: Look in the page contents if ($PageInfo['Description'] == '') { foreach ($Dom->query('p') as $element) { Trace('Looking at p for description.'); if (strlen($element->plaintext) > 150) { $PageInfo['Description'] = $element->text(); break; } } if (strlen($PageInfo['Description']) > 400) { $PageInfo['Description'] = SliceParagraph($PageInfo['Description'], 400); } } // Final: Still nothing? remove limitations if ($PageInfo['Description'] == '') { foreach ($Dom->query('p') as $element) { Trace('Looking at p for description (no restrictions)'); if (trim($element->text()) != '') { $PageInfo['Description'] = $element->text(); break; } } } // Page Images if (count($PageInfo['Images']) == 0) { $Images = domGetImages($Dom, $url); $PageInfo['Images'] = array_values($Images); } $PageInfo['Title'] = htmlEntityDecode($PageInfo['Title']); $PageInfo['Description'] = htmlEntityDecode($PageInfo['Description']); } catch (Exception $ex) { $PageInfo['Exception'] = $ex->getMessage(); } return $PageInfo; }